gpt4 book ai didi

javascript - 使用 Tensorflow.js 计算损失梯度

转载 作者:行者123 更新时间:2023-11-29 23:06:09 26 4
gpt4 key购买 nike

我正在尝试使用 Tensorflow.js 计算与网络可训练权重相关的损失梯度,以便将这些梯度应用于我的网络权重。在 python 中,这可以使用 tf.gradients() 函数轻松完成,该函数需要两个最小输入,代表 dx 和 dy。但是,我无法在 Tensorflow.js 中重现该行为。我不确定我对权重的损失梯度的理解是否错误,或者我的代码是否包含错误。

我花了一些时间分析 tfjs-node 包的核心代码,以了解当我们调用函数 tf.model.fit() 时它是如何完成的,但到目前为止收效甚微。

let model = build_model(); //Two stacked dense layers followed by two parallel dense layers for the output
let loss = compute_loss(...); //This function returns a tf.Tensor of shape [1] containing the mean loss for the batch.
const f = () => loss;
const grad = tf.variableGrads(f);
grad(model.getWeights());

model.getWeights() 函数返回一个 tf.variable() 数组,所以我假设该函数会计算每一层的 dL/dW,我稍后可以将其应用于我的网络权重,然而,这并不完全是我收到此错误的情况:

Error: Cannot compute gradient of y=f(x) with respect to x. Make sure that the f you passed encloses all operations that lead from x to y.

我不太明白这个错误是什么意思。那么我应该如何使用 Tensorflow.js 计算损失的梯度(类似于 Python 中的 tf.gradients())?

编辑: 这是计算损失的函数:

function compute_loss(done, new_state, memory, agent, gamma=0.99) {
let reward_sum = 0.;
if(done) {
reward_sum = 0.;
} else {
reward_sum = agent.call(tf.oneHot(new_state, 12).reshape([1, 9, 12]))
.values.flatten().get(0);
}

let discounted_rewards = [];
let memory_reward_rev = memory.rewards;
for(let reward of memory_reward_rev.reverse()) {
reward_sum = reward + gamma * reward_sum;
discounted_rewards.push(reward_sum);
}
discounted_rewards.reverse();

let onehot_states = [];
for(let state of memory.states) {
onehot_states.push(tf.oneHot(state, 12));
}
let init_onehot = onehot_states[0];

for(let i=1; i<onehot_states.length;i++) {
init_onehot = init_onehot.concat(onehot_states[i]);
}

let log_val = agent.call(
init_onehot.reshape([memory.states.length, 9, 12])
);

let disc_reward_tensor = tf.tensor(discounted_rewards);
let advantage = disc_reward_tensor.reshapeAs(log_val.values).sub(log_val.values);
let value_loss = advantage.square();
log_val.values.print();

let policy = tf.softmax(log_val.logits);
let logits_cpy = log_val.logits.clone();

let entropy = policy.mul(logits_cpy.mul(tf.scalar(-1)));
entropy = entropy.sum();

let memory_actions = [];
for(let i=0; i< memory.actions.length; i++) {
memory_actions.push(new Array(2000).fill(0));
memory_actions[i][memory.actions[i]] = 1;
}
memory_actions = tf.tensor(memory_actions);
let policy_loss = tf.losses.softmaxCrossEntropy(memory_actions.reshape([memory.actions.length, 2000]), log_val.logits);

let value_loss_copy = value_loss.clone();
let entropy_mul = (entropy.mul(tf.scalar(0.01))).mul(tf.scalar(-1));
let total_loss_1 = value_loss_copy.mul(tf.scalar(0.5, dtype='float32'));

let total_loss_2 = total_loss_1.add(policy_loss);
let total_loss = total_loss_2.add(entropy_mul);
total_loss.print();
return total_loss.mean();

}

编辑 2:

我设法使用 compute_loss 作为在 model.compile() 上指定的损失函数。但是,要求它只需要两个输入(预测、标签),所以它不适合我,因为我想输入多个参数。

我真的迷失在这件事上。

最佳答案

错误说明了一切。您的问题与 tf.variableGrads 有关。 loss 应该是使用所有可用的 tf 张量运算符计算的标量。 loss 不应返回问题中指示的张量。

这是损失应该是什么的示例:

const a = tf.variable(tf.tensor1d([3, 4]));
const b = tf.variable(tf.tensor1d([5, 6]));
const x = tf.tensor1d([1, 2]);

const f = () => a.mul(x.square()).add(b.mul(x)).sum(); // f is a function
// df/da = x ^ 2, df/db = x
const {value, grads} = tf.variableGrads(f); // gradient of f as respect of each variable

Object.keys(grads).forEach(varName => grads[varName].print());

/!\请注意,梯度是根据使用 tf.variable

创建的变量计算的

更新:

您没有按应有的方式计算梯度。这是修复。

function compute_loss(done, new_state, memory, agent, gamma=0.99) {
const f = () => { let reward_sum = 0.;
if(done) {
reward_sum = 0.;
} else {
reward_sum = agent.call(tf.oneHot(new_state, 12).reshape([1, 9, 12]))
.values.flatten().get(0);
}

let discounted_rewards = [];
let memory_reward_rev = memory.rewards;
for(let reward of memory_reward_rev.reverse()) {
reward_sum = reward + gamma * reward_sum;
discounted_rewards.push(reward_sum);
}
discounted_rewards.reverse();

let onehot_states = [];
for(let state of memory.states) {
onehot_states.push(tf.oneHot(state, 12));
}
let init_onehot = onehot_states[0];

for(let i=1; i<onehot_states.length;i++) {
init_onehot = init_onehot.concat(onehot_states[i]);
}

let log_val = agent.call(
init_onehot.reshape([memory.states.length, 9, 12])
);

let disc_reward_tensor = tf.tensor(discounted_rewards);
let advantage = disc_reward_tensor.reshapeAs(log_val.values).sub(log_val.values);
let value_loss = advantage.square();
log_val.values.print();

let policy = tf.softmax(log_val.logits);
let logits_cpy = log_val.logits.clone();

let entropy = policy.mul(logits_cpy.mul(tf.scalar(-1)));
entropy = entropy.sum();

let memory_actions = [];
for(let i=0; i< memory.actions.length; i++) {
memory_actions.push(new Array(2000).fill(0));
memory_actions[i][memory.actions[i]] = 1;
}
memory_actions = tf.tensor(memory_actions);
let policy_loss = tf.losses.softmaxCrossEntropy(memory_actions.reshape([memory.actions.length, 2000]), log_val.logits);

let value_loss_copy = value_loss.clone();
let entropy_mul = (entropy.mul(tf.scalar(0.01))).mul(tf.scalar(-1));
let total_loss_1 = value_loss_copy.mul(tf.scalar(0.5, dtype='float32'));

let total_loss_2 = total_loss_1.add(policy_loss);
let total_loss = total_loss_2.add(entropy_mul);
total_loss.print();
return total_loss.mean().asScalar();
}

return tf.variableGrads(f);
}

请注意,您很快就会遇到内存消耗问题。建议将用 tf.tidy 区分的函数包围起来以处理张量。

关于javascript - 使用 Tensorflow.js 计算损失梯度,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54728772/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com