在 tensorflow 中计算 Hessian 非常简单:
x = tf.Variable([1., 1., 1.], dtype=tf.float32, name="x")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2
hessian = tf.hessians(f, x)
这会正确返回
[[ 8., 20., 4.],
[20., 34., 6.],
[ 4., 6., 2.]]
在我的真实案例中,我需要将它分成两个变量,而不是使用一个变量 x
来保存三个值:x
(保存前两个)和 y
(按住最后一个)。
x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
我尝试了一个天真的
hessian = tf.hessians(f, [x, y])
但我得到:[[ 8., 20.], [20., 34.]], [[2.]]
我也试过:
xy = tf.concat([x, y], axis=-1)
但是在定义粗麻布时
hessian = tf.hessians(f, xy)
我得到一个非常严重的错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
510 as_ref=input_arg.is_ref,
--> 511 preferred_dtype=default_dtype)
512 except TypeError as err:
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
1174 if ret is None:
-> 1175 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1176
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
303 _ = as_ref
--> 304 return constant(v, dtype=dtype, name=name)
305
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
244 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245 allow_broadcast=True)
246
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
282 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283 allow_broadcast=allow_broadcast))
284 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
453 if values is None:
--> 454 raise ValueError("None values not supported.")
455 # if dtype is provided, forces numpy array to be the type
ValueError: None values not supported.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
524 observed = ops.internal_convert_to_tensor(
--> 525 values, as_ref=input_arg.is_ref).dtype.name
526 except ValueError as err:
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
1174 if ret is None:
-> 1175 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1176
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
303 _ = as_ref
--> 304 return constant(v, dtype=dtype, name=name)
305
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
244 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245 allow_broadcast=True)
246
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
282 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283 allow_broadcast=allow_broadcast))
284 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
453 if values is None:
--> 454 raise ValueError("None values not supported.")
455 # if dtype is provided, forces numpy array to be the type
ValueError: None values not supported.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-358-70bce7e5d400> in <module>
3 f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
4 xy = tf.concat([x, y], axis=-1)
----> 5 hessian = tf.hessians(f, xy)
~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gradients_impl.py in hessians(ys, xs, name, colocate_gradients_with_ops, gate_gradients, aggregation_method)
1405 for gradient, x in zip(_gradients, xs):
1406 # change shape to one-dimension without graph branching
-> 1407 gradient = array_ops.reshape(gradient, [-1])
1408
1409 # Declare an iterator and tensor array loop variables for the gradients.
~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
7178 try:
7179 _, _, _op = _op_def_lib._apply_op_helper(
-> 7180 "Reshape", tensor=tensor, shape=shape, name=name)
7181 except (TypeError, ValueError):
7182 result = _dispatch.dispatch(
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
527 raise ValueError(
528 "Tried to convert '%s' to a tensor and failed. Error: %s" %
--> 529 (input_name, err))
530 prefix = ("Input '%s' of '%s' Op has type %s that does not match" %
531 (input_name, op_type_name, observed))
ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.
1
编辑:这是一个更充实的解决方案,本质上是相同的,但适用于任意数量的变量。此外,我还添加了为雅可比矩阵使用 Python 或 TensorFlow 循环的选项。请注意,代码假定所有变量都是一维张量。
from itertools import combinations, count
import tensorflow as tf
def jacobian(y, x, tf_loop=False):
# If the shape of Y is fully defined you can choose between a
# Python-level or TF-level loop to make the Jacobian matrix
# If the shape of Y is not fully defined you must use TF loop
# In both cases it is just a matter of stacking gradients for each Y
if tf_loop or y.shape.num_elements() is None:
i = tf.constant(0, dtype=tf.int32)
y_size = tf.size(y)
rows = tf.TensorArray(dtype=y.dtype, size=y_size, element_shape=x.shape)
_, rows = tf.while_loop(
lambda i, rows: i < y_size,
lambda i, rows: [i + 1, rows.write(i, tf.gradients(y[i], x)[0])],
[i, rows])
return rows.stack()
else:
return tf.stack([tf.gradients(y[i], x)[0]
for i in range(y.shape.num_elements())], axis=0)
def hessian_multivar(ys, xs, tf_loop=False):
# List of list of pieces of the Hessian matrix
hessian_pieces = [[None] * len(xs) for _ in xs]
# Hessians with respect to each x (diagonal pieces of the full Hessian)
for i, h in enumerate(tf.hessians(ys, xs)):
hessian_pieces[i][i] = h
# First-order derivatives
xs_grad = tf.gradients(ys, xs)
# Pairwise second order derivatives as Jacobian matrices
for (i1, (x1, g1)), (i2, (x2, g2)) in combinations(zip(count(), zip(xs, xs_grad)), 2):
# Derivates in both orders
hessian_pieces[i1][i2] = jacobian(g1, x2, tf_loop=tf_loop)
hessian_pieces[i2][i1] = jacobian(g2, x1, tf_loop=tf_loop)
# Concatenate everything together
return tf.concat([tf.concat(hp, axis=1) for hp in hessian_pieces], axis=0)
# Test it with three variables
with tf.Graph().as_default():
x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
z = tf.Variable([1., 1.], dtype=tf.float32, name="z")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y + x * y * z) ** 2
hessian = hessian_multivar(f, [x, y, z])
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
print(sess.run(hessian))
输出:
[[26. 54. 30. 16. 4.]
[54. 90. 38. 6. 18.]
[30. 38. 16. 14. 14.]
[16. 6. 14. 2. 0.]
[ 4. 18. 14. 0. 2.]]
我不确定当前的 API 是否有一种“好的”方式来做到这一点。显然,您可以自己计算 Hessian 矩阵元素......它不是很优雅,也可能不是最快的解决方案,但在您的示例中可以这样做:
import tensorflow as tf
x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
# X and Y pieces of Hessian
hx, hy = tf.hessians(f, [x, y])
# First-order X and Y derivatives
gx, gy = tf.gradients(f, [x, y])
# Remanining elements of Hessian can be computed as Jacobian matrices with
# X, Y and first-order derivatives. However TensorFlow does not implement this
# (https://github.com/tensorflow/tensorflow/issues/675)
# So you have to build it "by hand"
hxy = [tf.gradients(gx[i], y)[0] for i in range(x.shape.num_elements())]
hxy = tf.concat(hxy, axis=0)
# Here since Y has one element only it is easier
hyx, = tf.gradients(gy, x)
# Combine pieces of Hessian
h1 = tf.concat([hx, tf.expand_dims(hxy, 1)], axis=1)
h2 = tf.concat([tf.expand_dims(hyx, 0), hy], axis=1)
hessian = tf.concat([h1, h2], axis=0)
# Test it
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
print(sess.run(hessian))
输出:
[[ 8. 20. 4.]
[20. 34. 6.]
[ 4. 6. 2.]]
我是一名优秀的程序员,十分优秀!