gpt4 book ai didi

python - 以函数为核的卷积

转载 作者:太空宇宙 更新时间:2023-11-04 02:16:57 26 4
gpt4 key购买 nike

我需要试验卷积,其中内核不是常数,而是取决于输入的补丁(我将其称为过滤器以区分)并由函数 f 计算。

所以我需要的是:

conv2d :: 
R^(batch x height x width x in_channel) x
f :
R ^ (filter_height x filter_width x in_channels)
-> R ^ (filter_height x filter_width x in_channels x out_channels)
-> out

我不想要的是只为卷积中的每个补丁评估 ff 只为输入的每个补丁生成过滤器,然后将过滤器应用于补丁,这对我的应用程序至关重要。

这是否可以通过 tf 以某种方式破解,如果不能,是否有合理的方法来扩展 tf 以提供我所需的功能?我从未写过 CUDA,但我并不回避它。

最佳答案

编辑:

正如 OP 所建议的,考虑到操作的复杂性,使用 tf.map_fn 可能会变得更容易处理:

import tensorflow as tf
import numpy as np

mode_same = True # True to make output same size as input
BATCH_SIZE = 10
HEIGHT = 100
WIDTH = 200
IN_CHANNELS = 3
FILTER_HEIGHT = 10
FILTER_WIDTH = 7
OUT_CHANNELS = 5

def make_img_filters(img):
# Dummy filters function
img_shape = tf.shape(img)
img_height = img_shape[0]
img_width = img_shape[1]
img_ch = img_shape[2]
filters_shape = (img_height, img_width, FILTER_HEIGHT, FILTER_WIDTH, img_ch, OUT_CHANNELS)
return tf.zeros(filters_shape, dtype=img.dtype)

def filter_img(img, mode_same=True):
img_filters = make_img_filters(img)
# Shapes
img_shape = tf.shape(img)
img_height = img_shape[0]
img_width = img_shape[1]
filters_shape = tf.shape(img_filters)
filter_height = filters_shape[2]
filter_width = filters_shape[3]
# Image margins to pad or crop
margin_bottom = filter_height // 2
margin_top = tf.maximum(filter_height - margin_bottom - 1, 0)
margin_right = filter_width // 2
margin_left = tf.maximum(filter_width - margin_right - 1, 0)
# Pad or crop depending on "same" or "valid" mode
img_pad = img
img_filters_crop = img_filters
if mode_same:
img_pad = tf.pad(img, [[margin_top, margin_bottom], [margin_left, margin_right], [0, 0]])
img_height += margin_top + margin_bottom
img_width += margin_left + margin_right
else:
img_filters_crop = img_filters[margin_top:img_height - margin_bottom, margin_left:img_width - margin_right]
# Make tensor of image patches
# This could be replaced with tf.while_loop and tf.TensorArray
img_extend = tf.stack([img_pad[i:(img_height - (FILTER_HEIGHT - i - 1))] for i in range(FILTER_HEIGHT)], axis=2)
img_extend = tf.stack([img_extend[:, i:(img_width - (FILTER_WIDTH - i - 1))] for i in range(FILTER_WIDTH)], axis=3)
# Compute "convolution" result
img_result = tf.einsum('hwpqc,hwpqcd->hwd', img_extend, img_filters_crop)
# Or with multiplication and reduction
img_result = tf.reduce_sum(img_extend[..., tf.newaxis] * img_filters_crop, axis=(2, 3))
return img_result

# Input
imgs = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, IN_CHANNELS])
filters = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS])
# Compute "convolution" with mapping
result = tf.map_fn(lambda img: filter_img(img, mode_same), imgs)

# Test
with tf.Session() as sess:
imgs_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, IN_CHANNELS))
filters_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS))
value = sess.run(result, feed_dict={imgs: imgs_random, filters: filters_random})
print(value.shape)
# (10, 91, 194, 5) with mode_same=False, (10, 100, 200, 5) with mode_same=True

如果我对你的理解正确,这应该可以完成你想要的操作。它可能不是最有效的方法,但我不确定使用标准 TensorFlow 操作是否可以更快地完成它。

import tensorflow as tf
import numpy as np

mode_same = False # True to make output same size as input
BATCH_SIZE = 10
HEIGHT = 100
WIDTH = 200
IN_CHANNELS = 3
FILTER_HEIGHT = 10
FILTER_WIDTH = 7
OUT_CHANNELS = 5
# Input
imgs = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, IN_CHANNELS])
filters = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS])
# Shapes
imgs_shape = tf.shape(imgs)
img_height = imgs_shape[1]
img_width = imgs_shape[2]
filters_shape = tf.shape(filters)
filter_height = filters_shape[3]
filter_width = filters_shape[4]
# Image margins to pad or crop
margin_bottom = filter_height // 2
margin_top = tf.maximum(filter_height - margin_bottom - 1, 0)
margin_right = filter_width // 2
margin_left = tf.maximum(filter_width - margin_right - 1, 0)
# Pad or crop depending on "same" or "valid" mode
imgs_pad = imgs
filters_crop = filters
if mode_same:
imgs_pad = tf.pad(imgs, [[0, 0], [margin_top, margin_bottom], [margin_left, margin_right], [0, 0]])
img_height += margin_top + margin_bottom
img_width += margin_left + margin_right
else:
filters_crop = filters[:, margin_top:img_height - margin_bottom, margin_left:img_width - margin_right]
# Make tensor of image patches
# This could be replaced with tf.while_loop and tf.TensorArray
imgs_extend = tf.stack([imgs_pad[:, i:(img_height - (FILTER_HEIGHT - i - 1))] for i in range(FILTER_HEIGHT)], axis=3)
imgs_extend = tf.stack([imgs_extend[:, :, i:(img_width - (FILTER_WIDTH - i - 1))] for i in range(FILTER_WIDTH)], axis=4)
# Compute "convolution" result
result = tf.einsum('ahwpqc,ahwpqcd->ahwd', imgs_extend, filters_crop)
# Test
with tf.Session() as sess:
imgs_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, IN_CHANNELS))
filters_random = np.random.random((BATCH_SIZE, HEIGHT, WIDTH, FILTER_HEIGHT, FILTER_WIDTH, IN_CHANNELS, OUT_CHANNELS))
value = sess.run(result, feed_dict={imgs: imgs_random, filters: filters_random})
print(value.shape)
# (10, 91, 194, 5) with mode_same=False, (10, 100, 200, 5) with mode_same=True

关于python - 以函数为核的卷积,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52445630/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com