- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
基本上 pygame.surfarray.pixels3d
返回一个 (672,672,3)
形状的数组,它给出错误:Resources exhausted
但是当我传递一个 (6,30,30)
数组时它起作用了。
如有任何帮助,我们将不胜感激。
import numpy
import random
from DeepRTS import PyDeepRTS
from Algorithms.DQN2.DQN import DQN
# Start the game
g = PyDeepRTS('21x21-2v2.json')
# Add players
player1 = g.add_player()
player2 = g.add_player()
#player3 = g.add_player()
#player4 = g.add_player()
# Set FPS and UPS limits
g.set_max_fps(10000000)
g.set_max_ups(10000000)
# How often the state should be drawn
g.render_every(20)
# How often the capture function should return a state
g.capture_every(20)
# How often the game image should be drawn to the screen
g.view_every(20)
# Start the game (flag)
g.start()
actions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15, 16]
observation = numpy.ndarray(shape=(6,30,30), dtype=float)
flag = 0
player1.do_action(13)
player2.do_action(13)
player1.get_Score()
while flag == 0:
g.tick() # Update the game clock
g.update() # Process the game state
g.render() # Draw the game state to graphics
state2 = g.capture()
if state2 is not None:
dqn = DQN(state2, len(actions))
flag=1
# Run forever
i=0
while True:
g.tick() # Update the game clock
g.update() # Process the game state
g.render() # Draw the game state to graphics
state2 = g.capture() # Captures current state (Returns None if .capture_every is set for some iterations)
g.caption() # Show Window caption
g.view() # View the game state in the pygame window
if state2 is not None and flag == 1:
actionID = dqn.act()
# If the game is in terminal state
terminal = g.is_terminal()
reward_ = player1.get_Score()
player1.do_action(actionID)
player2.do_action(numpy.random.randint(0,19))
dqn.train(actionID, reward_, terminal,state2)
if g.is_terminal():
print("finished")
g.reset()
print(actionID, " Reward",reward_)
i += 1
#This is the DQN algorithm
import os
import random
import numpy as np
import tensorflow as tf
from collections import deque
from skimage.color import rgb2gray
from skimage.transform import resize
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras import backend as K
K.set_image_dim_ordering('th')
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
class DQN:
def __init__(self,
initial_state,
num_actions,
initial_epsilon=1.0,
final_epsilon=0.1,
exploration_steps=10000,
initial_replay_size=10,
memory_size=400000,
batch_size=9, # 32
learning_rate=0.0025,
momentum=0.95,
min_grad=0.01,
env_name="DeepRTS",
save_network_path = "dqn2/saved_networks/",
save_summary_path = "dqn2/summary/",
load_network = False,
gamma=0.99,
train_interval = 40,
target_update_interval = 1000,
save_interval = 30000
):
self.state = initial_state
self.sshape = initial_state.shape # Shape of the state
self.num_actions = num_actions # Action space
self.epsilon = initial_epsilon # Epsilon-greedy start
self.final_epsilon = final_epsilon # Epsilon-greedy end
self.epsilon_step = (self.epsilon - self.final_epsilon) / exploration_steps # Epsilon decrease step
self.initial_replay_size = initial_replay_size
self.memory_size = memory_size
self.exploration_steps = exploration_steps
self.learning_rate = learning_rate
self.momentum = momentum
self.min_grad = min_grad
self.batch_size = batch_size
self.gamma = gamma
self.target_update_interval = target_update_interval
self.save_interval = save_interval
self.env_name = env_name
self.save_network_path = save_network_path + self.env_name
self.save_summary_path = save_summary_path + self.env_name
self.load_network = load_network
self.train_interval = train_interval
self.t = 0 # TODO
# Summary Parameters
self.total_reward = 0
self.total_q_max = 0
self.total_loss = 0
self.duration = 0
self.episode = 0
# Replay Memory
self.replay_memory = deque()
# Create Q Network
self.s, self.q_values, q_network = self.build_model()
q_network_weights = q_network.trainable_weights
# Create target network
self.st, self.target_q_values, target_network = self.build_model()
target_network_weights = target_network.trainable_weights
# Define target network update operation
self.update_target_network = [target_network_weights[i].assign(q_network_weights[i]) for i in range(len(target_network_weights))]
# Define loss and gradient update operation
self.a, self.y, self.loss, self.grads_update = self.build_functions(q_network_weights)
self.sess = tf.InteractiveSession()
self.saver = tf.train.Saver(q_network_weights)
self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary()
self.summary_writer = tf.summary.FileWriter(self.save_summary_path, self.sess.graph)
if not os.path.exists(self.save_network_path):
os.makedirs(self.save_network_path)
self.sess.run(tf.global_variables_initializer())
# Load network
self.load()
# Initialize target network
self.sess.run(self.update_target_network)
def build_model(self):
model = Sequential()
model.add(Conv2D(32, (1, 1), strides=(1, 1), activation='relu', input_shape=self.sshape))
model.add(Conv2D(64, (1, 1), activation="relu", strides=(1, 1)))
model.add(Conv2D(64, (1, 1), activation="relu", strides=(1, 1)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(self.num_actions))
s = tf.placeholder(tf.float32, [None, *self.sshape])
q_values = model(s)
return s, q_values, model
def build_functions(self, q_network_weights):
a = tf.placeholder(tf.int64, [None])
y = tf.placeholder(tf.float32, [None])
# Convert action to one hot vector
a_one_hot = tf.one_hot(a, self.num_actions, 1.0, 0.0)
q_value = tf.reduce_sum(tf.multiply(self.q_values, a_one_hot), reduction_indices=1)
# Clip the error, the loss is quadratic when the error is in (-1, 1), and linear outside of that region
error = tf.abs(y - q_value)
quadratic_part = tf.clip_by_value(error, 0.0, 1.0)
linear_part = error - quadratic_part
loss = tf.reduce_mean(0.5 * tf.square(quadratic_part) + linear_part)
optimizer = tf.train.RMSPropOptimizer(self.learning_rate, momentum=self.momentum, epsilon=self.min_grad)
grads_update = optimizer.minimize(loss, var_list=q_network_weights)
return a, y, loss, grads_update
def new_episode(self):
pass
def end_episode(self):
pass
def act(self):
if self.epsilon >= random.random() or self.t < self.initial_replay_size:
action = random.randrange(self.num_actions)
else:
action = np.argmax(self.q_values.eval(feed_dict={self.s: [np.float32(self.state)]}))
# Anneal epsilon linearly over time
if self.epsilon > self.final_epsilon and self.t >= self.initial_replay_size:
self.epsilon -= self.epsilon_step
return action
def train_network(self):
state_batch = []
action_batch = []
reward_batch = []
next_state_batch = []
terminal_batch = []
y_batch = []
# Sample random minibatch of transition from replay memory
minibatch = random.sample(self.replay_memory, self.batch_size)
for data in minibatch:
state_batch.append(data[0])
action_batch.append(data[1])
reward_batch.append(data[2])
next_state_batch.append(data[3])
terminal_batch.append(data[4])
# Convert True to 1, False to 0
terminal_batch = np.array(terminal_batch) + 0
target_q_values_batch = self.target_q_values.eval(feed_dict={self.st: np.float32(np.array(next_state_batch))})
y_batch = reward_batch + (1 - terminal_batch) * self.gamma * np.max(target_q_values_batch, axis=1)
loss, _ = self.sess.run([self.loss, self.grads_update], feed_dict={
self.s: np.float32(np.array(state_batch)),
self.a: action_batch,
self.y: y_batch
})
self.total_loss += loss
def train(self, action, reward, terminal, observation):
"""
# action - The performed action which led to this state
# reward - The reward given in the state transition
# terminal - Is state terminal? (Loss / Victory)
# observation - New state observation after action
"""
next_state = np.append(self.state[1:, :, :], observation, axis=0)
# Clip all positive rewards at 1 and all negative rewards at -1, leaving 0 rewards unchanged
reward = np.clip(reward, -1, 1)
# Store transition in replay memory
self.replay_memory.append((self.state, action, reward, self.state, terminal))
if len(self.replay_memory) > self.memory_size:
self.replay_memory.popleft()
if self.t >= self.initial_replay_size:
# Train network
if self.t % self.train_interval == 0:
self.train_network()
# Update target network
if self.t % self.target_update_interval == 0:
self.sess.run(self.update_target_network)
# Save network
if self.t % self.save_interval == 0:
save_path = self.saver.save(self.sess, self.save_network_path + '/' + self.env_name, global_step=self.t)
print('Successfully saved: ' + save_path)
self.total_reward += reward
self.total_q_max += np.max(self.q_values.eval(feed_dict={self.s: [np.float32(self.state)]}))
self.duration += 1
if terminal:
# Write summary
if self.t >= self.initial_replay_size:
stats = [self.total_reward, self.total_q_max / float(self.duration),self.duration, self.total_loss / (float(self.duration) / float(self.train_interval))]
for i in range(len(stats)):
self.sess.run(self.update_ops[i], feed_dict={self.summary_placeholders[i]: float(stats[i])})
summary_str = self.sess.run(self.summary_op)
self.summary_writer.add_summary(summary_str, self.episode + 1)
# Debug
if self.t < self.initial_replay_size:
mode = 'random'
elif self.initial_replay_size <= self.t < self.initial_replay_size + self.exploration_steps:
mode = 'explore'
else:
mode = 'exploit'
print('EPISODE: {0:6d} / TIMESTEP: {1:8d} / DURATION: {2:5d} / EPSILON: {3:.5f} / TOTAL_REWARD: {4:3.0f} / AVG_MAX_Q: {5:2.4f} / AVG_LOSS: {6:.5f} / MODE: {7}'.format(self.episode + 1, self.t, self.duration, self.epsilon,self.total_reward, self.total_q_max / float(self.duration),self.total_loss / (float(self.duration) / float(self.train_interval)), mode))
self.total_reward = 0
self.total_q_max = 0
self.total_loss = 0
self.duration = 0
self.episode += 1
self.t += 1
def iterate(self):
pass
def load(self):
checkpoint = tf.train.get_checkpoint_state(self.save_network_path)
if self.load_network and checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(self.sess, checkpoint.model_checkpoint_path)
print('Successfully loaded: ' + checkpoint.model_checkpoint_path)
else:
print('Training new network...')
def setup_summary(self):
episode_total_reward = tf.Variable(0.)
tf.summary.scalar(self.env_name + '/Total Reward/Episode', episode_total_reward)
episode_avg_max_q = tf.Variable(0.)
tf.summary.scalar(self.env_name + '/Average Max Q/Episode', episode_avg_max_q)
episode_duration = tf.Variable(0.)
tf.summary.scalar(self.env_name + '/Duration/Episode', episode_duration)
episode_avg_loss = tf.Variable(0.)
tf.summary.scalar(self.env_name + '/Average Loss/Episode', episode_avg_loss)
summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
summary_op = tf.summary.merge_all()
return summary_placeholders, update_ops, summary_op
错误:
2019-07-07 02:58:55.652029: W tensorflow/core/common_runtime/bfc_allocator.cc:319] *******************************************************************************____________________* 2019-07-07 02:58:55.652085: W tensorflow/core/framework/op_kernel.cc:1502] OP_REQUIRES failed at assign_op.h:117 : Resource exhausted: OOM when allocating tensor with shape[409600,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc Traceback (most recent call last):
最佳答案
在使用 CNNs
时,当它导致 Out Of Memory (OOM)
错误时,我们可以尝试以下提到的步骤:
Mini-Batch
的大小,如上面 Priyanka Chaudhary 所述。32 位 float
替换为 16 位 float
(如果值在该范围内)维度/形状
,因此减少参数数量,从而减少消耗的 RAM。关于python - 如何更改图像尺寸以使我的卷积算法正常工作,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56916896/
SO亲爱的 friend 们: 2014 年 3 月 18 日。我正在处理一种情况,在使用 ng-repeat 时,数组内的元素(我从 Json 字符串中获取)更改了原始顺序。 需要明确的是,数组中的
有很多问题询问如何在 JavaScript 单击处理程序中更改 div 的类,例如,此处:Change Div style onclick .我理解得很好(只需更改 .className),并且它有效
我从access导入了一个数据库到mysql,但其中一个表的列名“股数”带有空格,但我尝试更改、替换甚至删除列名,但失败了。任何人都可以帮助解决这一问题 String UpdateQuary = "U
我正在做一个随机的学校元素。 目前,我有一个包含两个 CSS 的页面。一种用于正常 View ,一种用于残障人士 View 。 此页面还包括两个按钮,它们将更改使用的样式表。 function c
我需要使用 javascript 更改 HTML 元素中的文本,但我不知道该怎么做。 ¿有什么帮助吗? 我把它定义成这样: Text I want to change. 我正在尝试这样做: docum
我在它自己的文件 nav_bar.shtml 中有一个主导航栏,每个其他页面都包含该导航栏。这个菜单栏是一个 jQuery 菜单栏(ApyCom 是销售这些导航栏的公司的名称)。导航栏上的元素如何确定
我正在摆弄我的代码,并开始想知道这个变化是否来自: if(array[index] == 0) 对此: if(!array[index] != 0) 可能会影响任何代码,或者它只是做同样的事情而我不需
我一直在想办法调整控制台窗口的大小。这是我正在使用的函数的代码: #include #include #define WIDTH 70 #define HEIGHT 35 HANDLE wHnd;
我有很多情况会导致相同的消息框警报。 有没有比做几个 if 语句更简单/更好的解决方案? PRODUCTS BOX1 BOX2 BOX3
我有一个包含这些元素的 XELEMENT B Bob Petier 19310227 1 我想像这样转换前缀。 B Bob Pet
我使用 MySQL 5.6 遇到了这种情况: 此查询有效并返回预期结果: select * from some_table where a = 'b' and metadata->>"$.countr
我想知道是否有人知道可以检测 R 中日期列格式的任何中断的包或函数,即检测日期向量格式更改的位置,例如: 11/2/90 12/2/90 . . . 15/Feb/1990 16/Feb/1990 .
我希望能够在小部件显示后更改 GtkButton 的标签 char *ButtonStance == "Connect"; GtkWidget *EntryButton = gtk_button_ne
我正在使用 Altera DE2 FPGA 开发板并尝试使用 SD 卡端口和音频线路输出。我正在使用 VHDL 和 C 进行编程,但由于缺乏经验/知识,我在 C 部分遇到了困难。 目前,我可以从 SD
注意到这个链接后: http://www.newscientist.com/blogs/nstv/2010/12/best-videos-of-2010-progress-bar-illusion.h
我想知道在某些情况下,即使剧本任务已成功执行并且 ok=2,ansible 也会显示“changed=0”。使用 Rest API 和 uri 模块时会发生这种情况。我试图找到解释但没有成功。谁能告诉
这个问题已经有答案了: 已关闭12 年前。 Possible Duplicate: add buttons to push notification alert 是否可以在远程通知显示的警报框中指定有
当您的 TabBarController 中有超过 5 个 View Controller 时,系统会自动为您设置一个“更多” View 。是否可以更改此 View 中导航栏的颜色以匹配我正在使用的颜
如何更改.AndroidStudioBeta文件夹的位置,默认情况下,该文件夹位于Windows中的\ .. \ User \ .AndroidStudioBeta,而不会破坏任何内容? /编辑: 找
我目前正在尝试将更具功能性的编程风格应用于涉及低级(基于 LWJGL)GUI 开发的项目。显然,在这种情况下,需要携带很多状态,这在当前版本中是可变的。我的目标是最终拥有一个完全不可变的状态,以避免状
我是一名优秀的程序员,十分优秀!