gpt4 book ai didi

c++ - 循环和if的优化

转载 作者:搜寻专家 更新时间:2023-10-30 23:57:11 24 4
gpt4 key购买 nike

我有一个程序如下所示:

void Process1(unsigned char* data)
{

}
void Process2(unsigned char* data)
{

}
void Process3(unsigned char* data)
{

}

#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)

void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}

看起来,flags & FLAG1 A.K.A b1 在所有循环中都不会改变。但是我们仍然必须在每个循环中进行分支。我只是想知道是否有办法动态地避免这个不必要的分支。

这是 Lundin 解决方案的演示。

#include <windows.h>
#include <stdio.h>
#include <time.h>
LARGE_INTEGER ls, le, ll;
#define START_CLOCK() QueryPerformanceCounter(&ls)
#define END_CLOCK() printf ("%.0lf ns\n", (QueryPerformanceCounter(&le), ((double)le.QuadPart - ls.QuadPart) / ll.QuadPart * 1000000));


void Process1(unsigned char* data)
{
(*data)++;
}
void Process2(unsigned char* data)
{
(*data)--;
}
void Process3(unsigned char* data)
{
(*data) *= (*data);
}

#define FLAG1 (1 << 1)
#define FLAG2 (1 << 2)
#define FLAG3 (1 << 3)

void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = !!(flags & FLAG1);
bool b2 = !!(flags & FLAG2);
bool b3 = !!(flags & FLAG3);
for (unsigned int i = 0; i < bytes; i ++)
{
if (b1) Process1(data + i);
if (b2) Process2(data + i);
if (b3) Process3(data + i);
}
}


typedef void (*proc_t)(unsigned char*);

inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}

void ProcessData_x(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;

proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;

for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}

int main()
{
if (!QueryPerformanceFrequency(&ll)) return 1;

const unsigned int bytes = 0xffff;
srand((unsigned int)time(NULL));
unsigned int flags = rand() & 0x7;
unsigned char* data = new unsigned char[bytes];
for (unsigned int i = 0; i < bytes; i++)
{
data[i] = (unsigned char)(rand() & 0xff);
}

START_CLOCK();

ProcessData(data, bytes, flags);

END_CLOCK();

START_CLOCK();

ProcessData_x(data, bytes, flags);

END_CLOCK();
}

这是输出:

134 ns
272 ns

跑了好几次了,没想到,竟然耗费了更多的时间:(..还编译成'vs2010 Release x86'

最佳答案

首先,如果不考虑特定系统就谈论优化是没有任何意义的......

话虽如此,我将通过以下方式优化分支:

typedef void (*proc_t)(unsigned char*);

inline static void do_nothing (unsigned char* ptr)
{
(void)ptr;
}

...

void ProcessData(unsigned char* data, unsigned int bytes, unsigned int flags)
{
bool b1 = (flags & FLAG1) != 0; // de-obfuscate the boolean logic
bool b2 = (flags & FLAG2) != 0;
bool b3 = (flags & FLAG3) != 0;

proc_t p1 = b1 ? Process1 : do_nothing;
proc_t p2 = b2 ? Process2 : do_nothing;
proc_t p3 = b3 ? Process3 : do_nothing;

for (unsigned int i = 0; i<bytes; i++)
{
p1(data + i);
p2(data + i);
p3(data + i);
}
}

关于c++ - 循环和if的优化,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25862137/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com