gpt4 book ai didi

c - 为什么 Horspool 不适用于二进制文件?

转载 作者:塔克拉玛干 更新时间:2023-11-03 05:52:03 25 4
gpt4 key购买 nike

我正在尝试用 C 编写一个快速简单的签名检测程序。它应该读取二进制文件(.exe、ELF、库等)并搜索二进制数据(有时是字符串,有时是字节);

我有一个简单的 C 测试程序:

#include <stdio.h>
#include <unistd.h>

const char *str = "TestingOneTwoThree";

int main()
{
while(1)
{
fprintf(stdout, "%s %ld\n", str, (long)getpid());
sleep(1);
}

}

这是我正在使用的 horspool 算法。我直接从此处找到的维基百科伪代码对其进行了改编:https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define HORSPOOL_COUNT 256
#define BLOCK_SIZE 1024
#define MAX(a, b) a > b ? a : b

ssize_t horspool_find(const char *buf, size_t buflen, const char *egg, size_t egglen)
{
int table[HORSPOOL_COUNT];
ssize_t shift = 0, i, tmp;

for(i = 0; i < HORSPOOL_COUNT; ++i)
{
table[i] = (int)egglen;
}

for(i = 0; i < egglen - 1; ++i)
{
table[(int)egg[i]] = egglen - i - 1;
}

while(shift <= buflen - egglen)
{
i = egglen - 1;
while(buf[shift + i] == egg[i])
{
if(i == 0)
{
return shift;
}
i--;
}
shift += MAX(1, table[(int)buf[shift + egglen - 1]]);
}
return -1;
}

char *readfile(const char *filename, size_t *size)
{
int ch;
size_t used = 0, allocated = 0;
char *buf = NULL, *tmp = NULL;
FILE *f;

if((f = fopen(filename, "rb")) == NULL)
{
if(size) *size = 0;
return perror("fopen"), NULL;
}

while((ch=fgetc(f)) != EOF)
{
if(used >= allocated)
{
allocated += BLOCK_SIZE;
tmp = realloc(buf, allocated);
if(tmp == NULL)
{
free(buf);
if(size) *size = 0;
fclose(f);
return perror("realloc"), NULL;
}
buf = tmp;
}
buf[used++] = (char)ch;
}

fclose(f);
if(size) *size = used;
return realloc(buf, used);
}

ssize_t naivealg_find(const char *buf, size_t buflen, const char *find, size_t findlen)
{
size_t i, j, diff = buflen - findlen;
for(i = 0; i < diff; ++i)
{
for(j = 0; j < findlen; ++j)
{
if(buf[i+j] != find[j])
{
break;
}
}
if(j == findlen)
{
return (ssize_t)i;
}
}
return -1;
}

int main()
{
size_t size;
char *buf = readfile("./a.out", &size);
char *pat = "TestingOneTwoThree";
ssize_t pos1 = horspool_find(buf, size, pat, strlen(pat));
ssize_t pos2 = naivealg_find(buf, size, pat, strlen(pat));
fprintf(stdout, "Offsets: %zd ~ %zd\n", pos1, pos2);
return 0;
}

输出是这样的:

偏移量:-1 ~ 2052

注意事项:

  • 相同的缓冲区和“egg”与朴素搜索实现一起工作。
  • horspool 实现似乎可以将普通字符串作为 bufegg 参数正常工作。

最佳答案

代码使用了签名 char并且对于二进制数据,有时会使用负索引进行错误索引。

// table[(int)buf[shift + egglen - 1]]
table[(unsigned char )buf[shift + egglen - 1]]

这个问题也存在于egg模式。

// table[(int) egg[i]] = egglen - i - 1;
table[(unsigned char) egg[i]] = egglen - i - 1;

buflen < egglen 时出现其他标志问题

// while (shift <= buflen - egglen)
// change to avoid underflow
while (shift + egglen <= buflen)

还可以考虑以二进制方式打开文件,并且:

ssize_t shift,i; --> size_t shift,i;

int table[HORSPOOL_COUNT]; -- > size_t table[HORSPOOL_COUNT];

添加() s 至 #define MAX(a, b) (((a) > (b)) ? (a) : (b))

关于c - 为什么 Horspool 不适用于二进制文件?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47723032/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com