- c - 在位数组中找到第一个零
- linux - Unix 显示有关匹配两种模式之一的文件的信息
- 正则表达式替换多个文件
- linux - 隐藏来自 xtrace 的命令
我有一个基于 LZW 压缩/解压缩算法的实现,并且大部分都将其平方。但是,我在测试的其中一个文件中遇到了问题。以下是所述文件的文本
#include "bits.h"
int check_endianness(){
int i = 1;
我的实现遇到的问题是 int i = 1;
之前的空格组 下面我分别包含了我的压缩和解压缩循环以及它们的相关调试输出。
压缩循环
i=0;
while(i < input_len && ret == LZW_ERR_OK){
//get next byte
char curChar = input_char(f_input, &io_flags);
i++;
//not necessary to check for stream end here since the while condition does that
if(io_flags == STREAM_ERR_READ){
ret = LZW_ERR_READ;
break;
}
seqset(&temp, &curChar, 1);
//add bytes to temp until a sequence is found that is not in lookup
while(i < input_len && dictionary_has_entry(lookup, temp)){
char curChar = input_char(f_input, &io_flags);
i++;
//check for errors / end of file
if(io_flags != STREAM_ERR_OK){
if(io_flags == STREAM_ERR_READ)
ret = LZW_ERR_READ;
break;
}
seqadd(&temp, &curChar, 1);
}
if(temp.length > 1){
#ifdef DEBUG
printf("Adding entry %d: ", lookup->next_value);
for(int j = 0; j < temp.length; j++)
printf("%.4x ", temp.data[j]);
printf("\n");
#endif // DEBUG
dictionary_set_entry(lookup, temp, DICT_SET_FOREVER);
temp.length--; //if temp.length == 1, then the EOF was probably reached
i--; //This is important so that the entire file is read
}
fseek(f_input, -1, SEEK_CUR);
output_short(f_output, dictionary_get_entry_byKey(lookup, temp)->value, STREAM_USE_ENDIAN);
#ifdef DEBUG
printf("Writing code: %d\n", dictionary_get_entry_byKey(lookup, temp)->value);
#endif // DEBUG
}
压缩输出
Adding entry 297: 007b 000d
Writing code: 123
Adding entry 298: 000d 000a 0020
Writing code: 273
Adding entry 299: 0020 0020
Writing code: 32
Adding entry 300: 0020 0020 0020
Writing code: 299
Adding entry 301: 0020 0069
Writing code: 32
Adding entry 302: 0069 006e 0074 0020
减压循环
i=0;
while(i < input_len && ret == LZW_ERR_OK){
short code;
entry *e;
code = input_short(f_input, &io_flags, STREAM_USE_ENDIAN);
if(io_flags == STREAM_ERR_READ){
ret = LZW_ERR_READ;
break;
}
i += 2;
//e should never be NULL
printf("Reading code: %d\n", code);
e = dictionary_get_entry_byValue(lookup, code);
assert(e != NULL);
seqset(&temp, e->key.data, e->key.length);
//requires a slightly different approach to the lookup loop in lzw_encode
while(i < input_len && e != NULL){
code = input_short(f_input, &io_flags, STREAM_USE_ENDIAN);
//check for errors / end of file
if(io_flags != STREAM_ERR_OK){
if(io_flags == STREAM_ERR_READ)
ret = LZW_ERR_READ;
break;
}
i += 2;
printf("Reading code: %d\n", code);
//e should never be NULL
e = dictionary_get_entry_byValue(lookup, code);
assert(e != NULL); <------------ This is where it is failing
//start adding bytes to temp
for(size_t j = 0; j < e->key.length; j++){
seqadd(&temp, &e->key.data[j], 1);
if(dictionary_get_entry_byKey(lookup, temp) == NULL){
//sequence not found, add it to dictionary
printf("Adding entry %d: ", lookup->next_value);
dictionary_set_entry(lookup, temp, DICT_SET_FOREVER);
for(int k = 0; k < temp.length; k++)
printf("%.4x ", temp.data[k]);
printf("\n");
e = NULL; //to escape from while
break;
}
}
}
//if more than one code was read go back by two bytes
if(e == NULL){
i -= 2;
fseek(f_input, -2, SEEK_CUR);
}
//only write up to the characters that made a known sequence
temp.length--;
for(size_t j = 0; j < temp.length; j++){
output_char(f_output, temp.data[j]);
#ifdef DEBUG
//printf("%c", temp.data[j]);
#endif // DEBUG
}
}
解压输出
Reading code: 123
Reading code: 273
Adding entry 297: 007b 000d
Reading code: 273
Reading code: 32
Adding entry 298: 000d 000a 0020
Reading code: 32
Reading code: 299
299, 299 <----error output from dictionary (code asked > next value)
Assertion failed: e != NULL, file lzw.c, line 212
如有任何帮助,我们将不胜感激。
最佳答案
您遇到了 Lempel Ziv Welch 减压算法中臭名昭著的 KwKwK 问题。
来自原始文章,A Technique for High-Performance Data Compression ,Terry A. Welch,IEEE 计算机,1984 年 6 月,第 8-19 页:
The abnormal case occurs whenever an input character string contains the sequence KwKwK, where Kw already appears in the compressor string table. The compressor will parse out Kw, send CODE(Kw), and add KwK to its string table. Next it will parse out KwK and send the just-generated CODE(KwK). The decompressor, on receiving CODE(KwK), will not yet have added that code to its string table because it does not yet know an extension character for the prior string.
文章解释了如何解决这个问题。
关于c - Lempel-Ziv-Welch 减压不存在指数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42130786/
我是一名优秀的程序员,十分优秀!