gpt4 book ai didi

python - 在 Python 中创建快速 RGB 查找表

转载 作者:行者123 更新时间:2023-12-02 16:13:42 25 4
gpt4 key购买 nike

我有一个函数,我将调用 'rgb2something' 将 RGB 数据 [1x1x3] 转换为单个值(概率),循环输入 RGB 数据中的每个像素结果相当慢。

我尝试了以下方法来加快转换速度。要生成 LUT(查找表):

import numpy as np

levels = 256
levels2 = levels**2
lut = [0] * (levels ** 3)

levels_range = range(0, levels)

for r in levels_range:
for g in levels_range:
for b in levels_range:
lut[r + (g * levels) + (b * levels2)] = rgb2something(r, g, b)

并将 RGB 转换为转换后的概率图像:
result = np.take(lut, r_channel + (g_channel * 256) + (b_channel * 65536))

然而,生成 LUT 和计算结果仍然很慢。在 2 维中它相当快,但是在 3 维(r、g 和 b)中它很慢。我怎样才能提高这个性能?

编辑
rgb2something(r, g, b)看起来像这样:
def rgb2something(r, g, b):
y = np.array([[r, g, b]])
y_mean = np.mean(y, axis=0)
y_centered = y - y_mean
y_cov = y_centered.T.dot(y_centered) / len(y_centered)
m = len(Consts.x)
n = len(y)
q = m + n
pool_cov = (m / q * x_cov) + (n / q * y_cov)
inv_pool_cov = np.linalg.inv(pool_cov)
g = Consts.x_mean - y_mean
mah = g.T.dot(inv_pool_cov).dot(g) ** 0.5
return mah

编辑2:

我正在尝试实现的完整工作代码示例,我正在使用 OpenCV,因此任何 OpenCV 方法,例如 Apply LUT欢迎使用,C/C++ 方法也是如此:
import matplotlib.pyplot as plt
import numpy as np
import cv2

class Model:
x = np.array([
[6, 5, 2],
[2, 5, 7],
[6, 3, 1]
])
x_mean = np.mean(x, axis=0)
x_centered = x - x_mean
x_covariance = x_centered.T.dot(x_centered) / len(x_centered)
m = len(x)
n = 1 # Only ever comparing to a single pixel
q = m + n
pooled_covariance = (m / q * x_covariance) # + (n / q * y_cov) -< Always 0 for a single point
inverse_pooled_covariance = np.linalg.inv(pooled_covariance)

def rgb2something(r, g, b):
#Calculates Mahalanobis Distance between pixel and model X
y = np.array([[r, g, b]])
y_mean = np.mean(y, axis=0)
g = Model.x_mean - y_mean
mah = g.T.dot(Model.inverse_pooled_covariance).dot(g) ** 0.5
return mah

def generate_lut():
levels = 256
levels2 = levels**2
lut = [0] * (levels ** 3)

levels_range = range(0, levels)

for r in levels_range:
for g in levels_range:
for b in levels_range:
lut[r + (g * levels) + (b * levels2)] = rgb2something(r, g, b)

return lut

def calculate_distance(lut, input_image):
return np.take(lut, input_image[:, :, 0] + (input_image[:, :, 1] * 256) + (input_image[:, :, 2] * 65536))

lut = generate_lut()
rgb = np.random.randint(255, size=(1080, 1920, 3), dtype=np.uint8)
result = calculate_distance(lut, rgb)

cv2.imshow("Example", rgb)
cv2.imshow("Result", result)
cv2.waitKey(0)

最佳答案

更新:添加了 blas 优化

有几个直接且非常有效的优化:

(1) 向量化,向量化!对这段代码中的所有内容进行矢量化并不难。见下文。

(2) 使用正确的查找,即花哨的索引,而不是 np.take
(3) 使用 Cholesky decomp。带 blas dtrmm我们可以利用它的三角形结构

这是代码。只需将其添加到 OP 代码的末尾(在 EDIT 2 下)。除非您非常有耐心,否则您可能还想注释掉 lut = generate_lut()result = calculate_distance(lut, rgb)行和对 cv2 的所有引用。我还在 x 中添加了一个随机行使其协方差矩阵非奇异。

class Full_Model(Model):
ch = np.linalg.cholesky(Model.inverse_pooled_covariance)
chx = Model.x_mean@ch

def rgb2something_vectorized(rgb):
return np.sqrt(np.sum(((rgb - Full_Model.x_mean)@Full_Model.ch)**2, axis=-1))

from scipy.linalg import blas

def rgb2something_blas(rgb):
*shp, nchan = rgb.shape
return np.sqrt(np.einsum('...i,...i', *2*(blas.dtrmm(1, Full_Model.ch.T, rgb.reshape(-1, nchan).T, 0, 0, 0, 0, 0).T - Full_Model.chx,))).reshape(shp)

def generate_lut_vectorized():
return rgb2something_vectorized(np.transpose(np.indices((256, 256, 256))))

def generate_lut_blas():
rng = np.arange(256)
arr = np.empty((256, 256, 256, 3))
arr[0, ..., 0] = rng
arr[0, ..., 1] = rng[:, None]
arr[1:, ...] = arr[0]
arr[..., 2] = rng[:, None, None]
return rgb2something_blas(arr)

def calculate_distance_vectorized(lut, input_image):
return lut[input_image[..., 2], input_image[..., 1], input_image[..., 0]]

# test code

def random_check_lut(lut):
"""Because the original lut generator is excruciatingly slow,
we only compare a random sample, using the original code
"""
levels = 256
levels2 = levels**2
lut = lut.ravel()

levels_range = range(0, levels)

for r, g, b in np.random.randint(0, 256, (1000, 3)):
assert np.isclose(lut[r + (g * levels) + (b * levels2)], rgb2something(r, g, b))

import time
td = []
td.append((time.time(), 'create lut vectorized'))
lutv = generate_lut_vectorized()
td.append((time.time(), 'create lut using blas'))
lutb = generate_lut_blas()
td.append((time.time(), 'lookup using np.take'))
res = calculate_distance(lutv, rgb)
td.append((time.time(), 'process on the fly (no lookup)'))
resotf = rgb2something_vectorized(rgb)
td.append((time.time(), 'process on the fly (blas)'))
resbla = rgb2something_blas(rgb)
td.append((time.time(), 'lookup using fancy indexing'))
resv = calculate_distance_vectorized(lutv, rgb)
td.append((time.time(), None))

print("sanity checks ... ", end='')
assert np.allclose(res, resotf) and np.allclose(res, resv) \
and np.allclose(res, resbla) and np.allclose(lutv, lutb)
random_check_lut(lutv)
print('all ok\n')

t, d = zip(*td)
for ti, di in zip(np.diff(t), d):
print(f'{di:32s} {ti:10.3f} seconds')

sample 运行:
sanity checks ... all ok

create lut vectorized 1.116 seconds
create lut using blas 0.917 seconds
lookup using np.take 0.398 seconds
process on the fly (no lookup) 0.127 seconds
process on the fly (blas) 0.069 seconds
lookup using fancy indexing 0.064 seconds

我们可以看到,最佳查找胜过最佳即时计算。也就是说,该示例可能高估了查找成本,因为随机像素可能不如自然图像对缓存友好。

原始答案(也许对某些人仍然有用)

如果 rgb2something 不能被矢量化,并且你想处理一个典型的图像,那么你可以使用 np.unique 获得不错的加速。 .

如果 rgb2something 很昂贵并且必须处理多个图像,那么 unique可以与缓存结合使用,使用 functools.lru_cache 可以方便地完成---唯一(次要)绊脚石:参数必须是可散列的。事实证明,这种强制的代码修改(将 rgb 数组转换为 3 字节字符串)恰好有益于性能。

仅当您拥有覆盖大多数色调的大量像素时,才值得使用完整的查找表。在这种情况下,最快的方法是使用 numpy 花式索引进行实际查找。
import numpy as np
import time
import functools

def rgb2something(rgb):
# waste some time:
np.exp(0.1*rgb)
return rgb.mean()

@functools.lru_cache(None)
def rgb2something_lru(rgb):
rgb = np.frombuffer(rgb, np.uint8)
# waste some time:
np.exp(0.1*rgb)
return rgb.mean()

def apply_to_img(img):
shp = img.shape
return np.reshape([rgb2something(x) for x in img.reshape(-1, shp[-1])], shp[:2])

def apply_to_img_lru(img):
shp = img.shape
return np.reshape([rgb2something_lru(x) for x in img.ravel().view('S3')], shp[:2])

def apply_to_img_smart(img, print_stats=True):
shp = img.shape
unq, bck = np.unique(img.reshape(-1, shp[-1]), return_inverse=True, axis=0)
if print_stats:
print('total no pixels', shp[0]*shp[1], '\nno unique pixels', len(unq))
return np.array([rgb2something(x) for x in unq])[bck].reshape(shp[:2])

def apply_to_img_smarter(img, print_stats=True):
shp = img.shape
unq, bck = np.unique(img.ravel().view('S3'), return_inverse=True)
if print_stats:
print('total no pixels', shp[0]*shp[1], '\nno unique pixels', len(unq))
return np.array([rgb2something_lru(x) for x in unq])[bck].reshape(shp[:2])

def make_full_lut():
x = np.empty((3,), np.uint8)
return np.reshape([rgb2something(x) for x[0] in range(256)
for x[1] in range(256) for x[2] in range(256)],
(256, 256, 256))

def make_full_lut_cheat(): # for quicker testing lookup
i, j, k = np.ogrid[:256, :256, :256]
return (i + j + k) / 3

def apply_to_img_full_lut(img, lut):
return lut[(*np.moveaxis(img, 2, 0),)]

from scipy.misc import face

t0 = time.perf_counter()
bw = apply_to_img(face())
t1 = time.perf_counter()
print('naive ', t1-t0, 'seconds')

t0 = time.perf_counter()
bw = apply_to_img_lru(face())
t1 = time.perf_counter()
print('lru first time ', t1-t0, 'seconds')

t0 = time.perf_counter()
bw = apply_to_img_lru(face())
t1 = time.perf_counter()
print('lru second time ', t1-t0, 'seconds')

t0 = time.perf_counter()
bw = apply_to_img_smart(face(), False)
t1 = time.perf_counter()
print('using unique: ', t1-t0, 'seconds')

rgb2something_lru.cache_clear()

t0 = time.perf_counter()
bw = apply_to_img_smarter(face(), False)
t1 = time.perf_counter()
print('unique and lru first: ', t1-t0, 'seconds')

t0 = time.perf_counter()
bw = apply_to_img_smarter(face(), False)
t1 = time.perf_counter()
print('unique and lru second:', t1-t0, 'seconds')

t0 = time.perf_counter()
lut = make_full_lut_cheat()
t1 = time.perf_counter()
print('creating full lut: ', t1-t0, 'seconds')

t0 = time.perf_counter()
bw = apply_to_img_full_lut(face(), lut)
t1 = time.perf_counter()
print('using full lut: ', t1-t0, 'seconds')

print()
apply_to_img_smart(face())

import Image
Image.fromarray(bw.astype(np.uint8)).save('bw.png')

sample 运行:
naive                  6.8886632949870545 seconds
lru first time 1.7458112589956727 seconds
lru second time 0.4085628940083552 seconds
using unique: 2.0951434450107627 seconds
unique and lru first: 2.0168916099937633 seconds
unique and lru second: 0.3118703299842309 seconds
creating full lut: 151.17599205300212 seconds
using full lut: 0.12164952099556103 seconds

total no pixels 786432
no unique pixels 134105

关于python - 在 Python 中创建快速 RGB 查找表,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52486840/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com