gpt4 book ai didi

c - SSE 中浮点到 uchar 的转换问题

转载 作者:行者123 更新时间:2023-11-30 17:27:24 26 4
gpt4 key购买 nike

嗨,
关于我之前的帖子,我解决了 SSE 中的比较操作。
但是在获得输出后,我观察到我的输出是浮点形式,而我的预期输出是 uchar 。
例如,我期望输出为 8,输出以 floatng 格式为 8.0(32 位浮点格式)。将该值转换为 1 字节无符号值后,这与 8 有很大不同。
PFB 我在 C 中的原始代码及其在 SSE 中的相应代码:

C 代码:

unsigned char *destination_buff = (unsigned char *)malloc(sizeof(unsigned char)*height*width);
float *d1 = inputbuffer;
float *d2 = d1 + width;
float *d3 = d2 + width;

for(int i=1;i<height;i++)
{
for(int j=1;j<width;j++)
{
int val = d2[j];
int temp1 = 0x00FF;
int temp2 = 0;
if(val <= d1[j-1]) temp2 += 0x80;
if(val <= d1[j]) temp2 += 0x40;
if(val <= d1[j+1]) temp2 += 0x20; }
if(val <= d2[j-1]) temp2 += 0x10;}
if(val <= d2[j+1]) temp2 += 0x08;
if(val <= d3[j-1]) temp2 += 0x04;
if(val <= d3[j]) temp2 += 0x02;
if(val <= d3[j+1]) temp2 ++;
temp1 &= (~temp2);
destination_buff[j-1] = temp1;
}
d1 += width;
d2 += width;
d3 += width;

destination_buff += (width);
}

这是我的 SSE 代码:

float *destination_buff = (float *)malloc(sizeof(float)*height*width);

uchar *dst_d = outputbuffer; //Pointer to the destination buffer which is already present and need to fill the output data in this
float *CT_image_0 = m_dat;
float *CT_image_1 = CT_image_0 + width;
float *CT_image_2 = CT_image_1 + width;

for(int i=1;i<height;++i)
{
for(int j=1;j<width;j+=4)
{
__m128 CT_current_00 = _mm_loadu_ps((CT_image_0+j-1));
__m128 CT_current_10 = _mm_loadu_ps((CT_image_1+j-1));
__m128 CT_current_20 = _mm_loadu_ps((CT_image_2+j-1));

__m128 CT_current_01 = _mm_loadu_ps(((CT_image_0+1)+j-1));
__m128 CT_current_11 = _mm_loadu_ps(((CT_image_1+1)+j-1));
__m128 CT_current_21 = _mm_loadu_ps(((CT_image_2+1)+j-1));

__m128 CT_current_02 = _mm_loadu_ps(((CT_image_0+2)+j-1));
__m128 CT_current_12 = _mm_loadu_ps(((CT_image_1+2)+j-1));
__m128 CT_current_22 = _mm_loadu_ps(((CT_image_2+2)+j-1));

__m128 val = CT_current_11;

__m128 t1 = _mm_set1_ps(0x80);
__m128 t2 = _mm_set1_ps(0x40);
__m128 t3 = _mm_set1_ps(0x20);
__m128 t4 = _mm_set1_ps(0x10);
__m128 t5 = _mm_set1_ps(0x08);
__m128 t6 = _mm_set1_ps(0x04);
__m128 t7 = _mm_set1_ps(0x02);
__m128 t8 = _mm_set1_ps(0x01);

__m128 out = _mm_setzero_ps(); // init output flags to all zeroes


__m128 sample = _mm_cmple_ps(val,CT_current_00);
sample = _mm_and_ps(sample,t1);
out = _mm_or_ps(out,sample);
sample = _mm_cmple_ps(val,CT_current_01);
sample = _mm_and_ps(sample,t2);
out = _mm_or_ps(out,sample);
sample = _mm_cmple_ps(val,CT_current_02);
sample = _mm_and_ps(sample,t3);
out = _mm_or_ps(out,sample);

sample = _mm_cmple_ps(val,CT_current_10);
sample = _mm_and_ps(sample,t4);
out = _mm_or_ps(out,sample);
sample = _mm_cmple_ps(val,CT_current_12);
sample = _mm_and_ps(sample,t5);
out = _mm_or_ps(out,sample);

sample = _mm_cmple_ps(val,CT_current_20);
sample = _mm_and_ps(sample,t6);
out = _mm_or_ps(out,sample);
sample = _mm_cmple_ps(val,CT_current_21);
sample = _mm_and_ps(sample,t7);
out = _mm_or_ps(out,sample);
sample = _mm_cmple_ps(val,CT_current_22);
sample = _mm_and_ps(sample,t8);
out = _mm_or_ps(out,sample);

_mm_storeu_ps((destination_buff+(j-1)),out);
dst_d = (uchar *)destination_buff;

}

CT_image_0 += width;
CT_image_1 += width;
CT_image_2 += width;

dst_d += (width);

}

所有存储操作都在 float 和 __m128i 上。如何将结果存储到 uchar 中?

最佳答案

您可以进行压缩比较来获取掩码,然后将该掩码与整数操作一起使用。 _mm_set1_ps(0x80) 表示您正在做一些奇怪的事情。您可能不应该将 2 的幂位掩码转换为浮点,因为用 _mm_add_ps 添加它们比用 _mm_or_si128 组合它们要慢得多。

对于某些偏移负载,您可能还最好使用 palignr 来平衡负载端口和 ALU 端口之间的代码。

关于c - SSE 中浮点到 uchar 的转换问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/26406449/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com