gpt4 book ai didi

c - 优化 btwoc()

转载 作者:太空宇宙 更新时间:2023-11-04 04:04:59 25 4
gpt4 key购买 nike

根据 OpenID Authentication 2.0 , 第 4.2 节,

Arbitrary precision integers MUST be encoded as big-endian signed two's complement binary strings. Henceforth, "btwoc" is a function that takes an arbitrary precision integer and returns its shortest big-endian two's complement representation. All integers that are used with Diffie-Hellman Key Exchange are positive. This means that the left-most bit of the two's complement representation MUST be zero. If it is not, implementations MUST add a zero byte at the front of the string.

Non-normative example:

Base 10 number | btwoc string representation
---------------+----------------------------
0 | "\x00"
127 | "\x7F"
128 | "\x00\x80"
255 | "\x00\xFF"
32768 | "\x00\x80\x00"

我已经尝试用 C 编写我自己的 btwoc 实现,这就是我所拥有的:

typedef struct {
uint8_t *data;
uintmax_t length;
} oid_data;

oid_data *oid_btwoc_r(uintmax_t value, oid_data *ret) {
unsigned fnz = sizeof(uintmax_t) + 1,
i = sizeof(uintmax_t) * 8;
while (--fnz && (!(value >> (i -= 8) & 0xFF)));
/*
If `value' is non-zero, `fnz' now contains the index of the first
non-zero byte in `value', where 1 refers to the least-significant byte.
`fnz' will therefore be in the range [1 .. sizeof(uintmax_t)]. If
`value' is zero, then `fnz' is zero.
*/
if (!value) {
/* The value is zero */
ret->length = 1;
ret->data[0] = 0;
} else if (value >> ((fnz - 1) * 8 + 7)) {
/* The most significant bit of the first non-zero byte is 1 */
ret->length = fnz + 1;
ret->data[0] = 0;
for (i = 1; i <= fnz; i++)
ret->data[1 + fnz - i] =
value >> ((i - 1) * 8);
} else {
/* The most significant bit of the first non-zero byte is 0 */
ret->length = fnz;
for (i = 1; i <= fnz; i++)
ret->data[fnz - i] =
value >> ((i - 1) * 8);
}
return ret;
}

ret->data 应该指向至少 sizeof(uintmax_t) + 1 字节的有效内存。

它运行良好,我还没有发现实现中的任何错误,但是它可以优化吗?

最佳答案

如果您将零作为一种特殊情况,您一定要在 while 循环之前处理它。 (个人厌恶:我不喜欢 (!value) 因为 (value == 0)。)


我还没有尝试计时,但这段代码产生的答案与你的相同(至少在我测试的值上;见下文)并且对我来说看起来更简单,尤其是因为它没有加倍处理高位设置在最高有效字节的情况的代码:

void oid_btwoc_r2(uintmax_t value, oid_data *ret)
{
if (value == 0)
{
ret->data[0] = 0;
ret->length = 1;
return;
}

uintmax_t v0 = value;
uintmax_t v1 = v0;
unsigned n = 0;

while (v0 != 0)
{
n++;
v1 = v0;
v0 >>= 8;
}
//printf("Value: 0x%" PRIXMAX ", v1 = 0x%" PRIXMAX "\n", value, v1);

assert(v1 < 0x100 && v1 != 0);
if (v1 > 0x7F)
n++;
//printf("N = %u\n", n);

for (unsigned i = n; i != 0; i--)
{
ret->data[i-1] = (value & 0xFF);
value >>= 8;
}
ret->length = n;
}

我用来针对您的版本进行测试的代码是:

#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <string.h>

#define DIM(x) (sizeof(x) / sizeof(*(x)))

static void dump_oid_data(FILE *fp, const char *tag, const oid_data *data)
{
fprintf(fp, "%s: length %" PRIuMAX ":", tag, data->length);
for (uintmax_t i = 0; i < data->length; i++)
fprintf(fp, " 0x%02X", data->data[i]);
fputc('\n', fp);
}

int main(void)
{
uintmax_t list[] = { 0, 0x7F, 0x80, 0xFF, 0x100, 0x7FFF, 0x8000, 0xFFFF,
0x10000, 0x7FFFFF, 0x800000, 0xFFFFFF };

for (size_t i = 0; i < DIM(list); i++)
{
uint8_t b1[sizeof(uintmax_t) + 1];
uint8_t b2[sizeof(uintmax_t) + 1];
oid_data v1 = { b1, sizeof(b1) };
oid_data v2 = { b2, sizeof(b2) };
oid_btwoc_r(list[i], &v1);
oid_btwoc_r2(list[i], &v2);
printf("Value: 0x%" PRIXMAX ": ", list[i]);
if (v1.length != v2.length)
printf("Lengths differ (%" PRIuMAX " vs %" PRIuMAX ")\n", v1.length, v2.length);
else if (memcmp(v1.data, v2.data, v1.length) != 0)
{
printf("Data differs!\n");
dump_oid_data(stdout, "oid_btwoc_r1()", &v1);
dump_oid_data(stdout, "oid_btwoc_r2()", &v2);
}
else
{
printf("Values are the same\n");
dump_oid_data(stdout, "oid_btwoc_rN()", &v2);
}
}
return(0);
}

是的;代码的早期版本需要转储功能来查看出了什么问题!

时机

我把原来的oid_btwoc_r()改成了一个返回void的函数(没有final return)并重命名为oid_btwoc_r1() 。然后我运行了计时测试(在运行 MacOS X Lion 10.7.1 的 Mac Mini 上),并得到了计时结果:

oid_btwoc_r1(): 4.925386
oid_btwoc_r2(): 4.022604
oid_btwoc_r1(): 4.930649
oid_btwoc_r2(): 4.004344
oid_btwoc_r1(): 4.927602
oid_btwoc_r2(): 4.005756
oid_btwoc_r1(): 4.923356
oid_btwoc_r2(): 4.007910
oid_btwoc_r1(): 4.984037
oid_btwoc_r2(): 4.202986
oid_btwoc_r1(): 5.015747
oid_btwoc_r2(): 4.067265
oid_btwoc_r1(): 4.982333
oid_btwoc_r2(): 4.019807
oid_btwoc_r1(): 4.957866
oid_btwoc_r2(): 4.074712
oid_btwoc_r1(): 4.993991
oid_btwoc_r2(): 4.042422
oid_btwoc_r1(): 4.970930
oid_btwoc_r2(): 4.077203

因此,oid_btwoc_r2() 函数似乎比原来的函数快了大约 20% - 至少在测试数据上是这样。更大的数字改变了平衡,有利于 oid_btwoc_r(),然后 `oid_btwoc_r1() 快了大约 20%:

oid_btwoc_r1(): 3.671201
oid_btwoc_r2(): 4.605171
oid_btwoc_r1(): 3.669026
oid_btwoc_r2(): 4.575745
oid_btwoc_r1(): 3.673729
oid_btwoc_r2(): 4.659433
oid_btwoc_r1(): 3.684662
oid_btwoc_r2(): 4.671654
oid_btwoc_r1(): 3.730757
oid_btwoc_r2(): 4.645485
oid_btwoc_r1(): 3.764600
oid_btwoc_r2(): 4.673244
oid_btwoc_r1(): 3.669582
oid_btwoc_r2(): 4.610177
oid_btwoc_r1(): 3.664248
oid_btwoc_r2(): 4.813711
oid_btwoc_r1(): 3.675927
oid_btwoc_r2(): 4.630148
oid_btwoc_r1(): 3.681798
oid_btwoc_r2(): 4.614129

由于在这种情况下大数字可能比小数字更有可能,oid_btwoc_r1() - 或原始 oid_btwoc_r() - 可以说是更好的选择。

测试代码如下。未注释的 for 循环是“大量”版本,它显示 oid_btwoc_r1()oid_btwoc_r2() 工作得更快;注释掉的 for 循环是“小数字”版本,它显示 oid_btwoc_r2()oid_btowc_r1() 工作得更快。

static void test_converter(const char *tag, void (*function)(uintmax_t, oid_data *))
{
Clock clk;
clk_init(&clk);
clk_start(&clk);
for (uintmax_t i = 0x100000000; i < 0x1000000000000000; i += 0x170000000)
//for (uintmax_t i = 0; i < 0x100000000; i += 17)
{
uint8_t b1[sizeof(uintmax_t) + 1];
oid_data v1 = { b1, sizeof(b1) };
(*function)(i, &v1);
}
clk_stop(&clk);
char buffer[32];
printf("%s: %s\n", tag, clk_elapsed_us(&clk, buffer, sizeof(buffer)));
}

int main(void)
{
for (int i = 0; i < 10; i++)
{
test_converter("oid_btwoc_r1()", oid_btwoc_r1);
test_converter("oid_btwoc_r2()", oid_btwoc_r2);
}
return(0);
}

关于c - 优化 btwoc(),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/7219114/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com