gpt4 book ai didi

python - Cython 中的水稻编码

转载 作者:太空宇宙 更新时间:2023-11-04 06:04:49 25 4
gpt4 key购买 nike

这是著名的 Rice 编码(= Golomb 编码,M = 2^k http://en.wikipedia.org/wiki/Golomb_coding)的一个实现,广泛用于压缩算法,在 Python 中。

不幸的是它相当慢。这种低速的原因可能是什么? (StringIO?数据是逐字节写入的吗?)

为了加快编码速度,您会推荐使用什么? 您会使用什么技巧来使用 Cython 加速它?

import struct
import StringIO

def put_bit(f, b):
global buff, filled
buff = buff | (b << (7-filled))
if (filled == 7):
f.write(struct.pack('B',buff))
buff = 0
filled = 0
else:
filled += 1

def rice_code(f, x, k):
q = x / (1 << k)
for i in range(q):
put_bit(f, 1)
put_bit(f, 0)
for i in range(k-1, -1, -1):
put_bit(f, (x >> i) & 1)

def compress(L, k):
f = StringIO.StringIO()
global buff, filled
buff = 0
filled = 0
for x in L: # encode all numbers
rice_code(f, x, k)
for i in range(8-filled): # write the last byte (if necessary pad with 1111...)
put_bit(f, 1)
return f.getvalue()

if __name__ == '__main__':
print struct.pack('BBB', 0b00010010, 0b00111001, 0b01111111) #see http://fr.wikipedia.org/wiki/Codage_de_Rice#Exemples
print compress([1,2,3,10],k = 3)

PS:这个问题应该移到https://codereview.stackexchange.com/吗? ?

最佳答案

在构建压缩结果时,我会使用 C 风格的缓冲区而不是 StringIO,并且我会尝试在编码循环中仅使用 C 风格的临时缓冲区。我还注意到您可以预先初始化您的缓冲区以填充设置位(“1”位),这将使具有大商的编码值更快,因为您可以简单地跳过输出缓冲区中的那些位。我考虑到这些事情重写了压缩函数,并测量了结果的速度,看起来我的版本比你的编码器快十倍以上,但生成的代码可读性较差。

这是我的版本:


cimport cpython.string
cimport libc.stdlib
cimport libc.string
import struct

cdef int BUFFER_SIZE = 4096

def compress(L, k):
result = ''

cdef unsigned cvalue
cdef char *position
cdef int bit, nbit
cdef unsigned q, r
cdef unsigned ck = k
cdef unsigned mask = (1 << ck) - 1

cdef char *buff = <char *>libc.stdlib.malloc(BUFFER_SIZE)
if buff is NULL:
raise MemoryError

try:
# Initialize the buffer space is assumed to contain all set bits
libc.string.memset(buff, 0xFF, BUFFER_SIZE)

position = buff
bit = 7

for value in L:
cvalue = value
q = cvalue >> ck
r = cvalue & mask

# Skip ahead some number of pre-set one bits for the quotient
position += q / 8
bit -= q % 8
if bit < 0:
bit += 8
position += 1

# If we have gone off the end of the buffer, extract
# the result and reset buffer pointers
while position - buff >= BUFFER_SIZE:
block = cpython.string.PyString_FromStringAndSize(
buff, BUFFER_SIZE)
result = result + block

libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = position - BUFFER_SIZE

# Clear the final bit to indicate the end of the quotient
position[0] = position[0] ^ (1 << bit)
if bit > 0:
bit = bit - 1
else:
position += 1
bit = 7

# Check for buffer overflow
if position - buff >= BUFFER_SIZE:
block = cpython.string.PyString_FromStringAndSize(
buff, BUFFER_SIZE)
result = result + block

libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = buff

# Encode the remainder bits one by one
for nbit in xrange(k - 1, -1, -1):
position[0] = (position[0] & ~(1 << bit)) | \
(((r >> nbit) & 1) << bit)

if bit > 0:
bit = bit - 1
else:
position += 1
bit = 7

# Check for buffer overflow
if position - buff >= BUFFER_SIZE:
block = cpython.string.PyString_FromStringAndSize(
buff, BUFFER_SIZE)
result = result + block

libc.string.memset(buff, 0xFF, BUFFER_SIZE)
position = buff

# Advance if we have partially used the last byte
if bit < 7:
position = position + 1

# Extract the used portion of the buffer
block = cpython.string.PyString_FromStringAndSize(
buff, position - buff)
result = result + block

return result

finally:
libc.stdlib.free(buff)


def test():
a = struct.pack('BBB', 0b00010010, 0b00111001, 0b01111111) #see http://fr.wikipedia.org/wiki/Codage_de_Rice#Exemples
b = compress([1,2,3,10],k = 3)

assert a == b

关于python - Cython 中的水稻编码,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/22737856/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com