gpt4 book ai didi

c++ - 使用 SSE 通过 uint64[] 进行线性搜索

转载 作者:太空狗 更新时间:2023-10-29 20:25:42 24 4
gpt4 key购买 nike

我正在尝试使用 SSE 通过 uint64 数组实现线性搜索指示。我得到了适用于 uint16 和 uint32 的东西,但我得到了编译器uint64 代码错误(linux、gcc - 请参阅最后的规范)。

我正在尝试比较 2x2 64 位数字,然后以某种方式转换结果在我的数组的索引中。这适用于 uint32(学分去 http://schani.wordpress.com/2010/04/30/linear-vs-binary-search/ ):

#include <xmmintrin.h>
#include <smmintrin.h>

typedef ham_u64_t vec2uint64 __attribute__ ((vector_size (16)));
typedef ham_u32_t vec4uint32 __attribute__ ((vector_size (16)));
typedef float vec4float __attribute__ ((vector_size (16)));
typedef ham_u16_t vec8uint16 __attribute__ ((vector_size (16)));
typedef ham_u8_t vec16uint8 __attribute__ ((vector_size (16)));

// ...

vec4uint32 v1 = _mm_loadu_si128((const __m128i *)&data[start + i + 0]);
vec4uint32 v2 = _mm_loadu_si128((const __m128i *)&data[start + i + 4]);
vec4uint32 v3 = _mm_loadu_si128((const __m128i *)&data[start + i + 8]);
vec4uint32 v4 = _mm_loadu_si128((const __m128i *)&data[start + i + 12]);

vec4uint32 cmp0 = _mm_cmpeq_epi32(key4, v1);
vec4uint32 cmp1 = _mm_cmpeq_epi32(key4, v2);
vec4uint32 cmp2 = _mm_cmpeq_epi32(key4, v3);
vec4uint32 cmp3 = _mm_cmpeq_epi32(key4, v4);

vec8uint16 pack01 = __builtin_ia32_packssdw128(cmp0, cmp1);
vec8uint16 pack23 = __builtin_ia32_packssdw128(cmp2, cmp3);
vec16uint8 pack0123 = __builtin_ia32_packsswb128(pack01, pack23);

int res = __builtin_ia32_pmovmskb128(pack0123);
if (res > 0) {
int czt = __builtin_ctz(~res + 1);
return (start + i + czt);
}

这是我到目前为止为 uint64 想到的。比较有效,我只是不知道如何处理结果,以及 __builtin_ia32_packssdw() 调用不编译:

vec2uint64 v1 = _mm_loadu_si128((const __m128i *)&data[start + i + 0]);
vec2uint64 v2 = _mm_loadu_si128((const __m128i *)&data[start + i + 2]);

vec2uint64 cmp0 = _mm_cmpeq_epi64(key2, v1);
vec2uint64 cmp1 = _mm_cmpeq_epi64(key2, v2);

vec4uint32 pack01 = __builtin_ia32_packssdw(cmp0, cmp1); // error
vec4uint32 pack23 = _mm_set1_epi32(0);
vec16uint8 pack0123 = __builtin_ia32_packsswb128(pack01, pack23);

int res = __builtin_ia32_pmovmskb128(pack0123);
if (res > 0) {
int czt = __builtin_ctz(~res + 1);
return (start + i + czt);
}

错误说:

error: cannot convert 'vec1uint64 {aka __vector(2) long unsigned int}'
to '__vector(2) int' for argument '1' to '__vector(4) short int
__builtin_ia32_packssdw(__vector(2) int, __vector(2) int)'

(vec2uint64 的类型定义在顶部,在 uint32 的代码中。)

我的环境:

Linux ws4484 3.5.0-48-generic #72~precise1-Ubuntu SMP Tue Mar 11 20:09:08 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux

gcc version 4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5)

我的问题不仅仅是我如何修复编译器错误,而是如果有人有更好的主意来获得匹配的数组索引,也许没有整个打包东西?

提前致谢!

最佳答案

我建议不要使用内置的内在函数和隐式 vector 。这仅在您不使用非 GCC 内在函数(例如 _mm_cmpeq_epi32)并且只想坚持使用 GCC 时才有意义。你可以像这样做你想做的事

__m128i key2 = _mm_set1_epi64x(key);
__m128i v1 = _mm_loadu_si128((const __m128i *)&data[start + i + 0]);
__m128i v2 = _mm_loadu_si128((const __m128i *)&data[start + i + 2]);

__m128i cmp0 = _mm_cmpeq_epi64(key2, v1);
__m128i cmp1 = _mm_cmpeq_epi64(key2, v2);

__m128i low2 = _mm_shuffle_epi32(cmp0,0xD8);
__m128i high2 = _mm_shuffle_epi32(cmp1,0xD8);
__m128i pack = _mm_unpacklo_epi64(low2,high2);

__m128i pack01 = _mm_packs_epi32(pack, _mm_setzero_si128());
__m128i pack0123 = _mm_packs_epi16(pack01, _mm_setzero_si128());

int res = _mm_movemask_epi8(pack0123);

您可能会找到一个更有效的版本来避免打包,但是您将不得不使用与 __builtin_ctz 不同的函数。

对于 32 位整数,我建议

__m128i key4 = _mm_set1_epi32(key);
__m128i v1 = _mm_loadu_si128((const __m128i *)&data[start + i + 0]);
__m128i v2 = _mm_loadu_si128((const __m128i *)&data[start + i + 4]);
__m128i v3 = _mm_loadu_si128((const __m128i *)&data[start + i + 8]);
__m128i v4 = _mm_loadu_si128((const __m128i *)&data[start + i + 12]);

__m128i cmp0 = _mm_cmpeq_epi32(key4, v1);
__m128i cmp1 = _mm_cmpeq_epi32(key4, v2);
__m128i cmp2 = _mm_cmpeq_epi32(key4, v3);
__m128i cmp3 = _mm_cmpeq_epi32(key4, v4);

__m128i pack01 = _mm_packs_epi32(cmp0, cmp1);
__m128i pack23 = _mm_packs_epi32(cmp2, cmp3);
__m128i pack0123 = _mm_packs_epi16(pack01, pack23);

int res = _mm_movemask_epi8(pack0123);

关于c++ - 使用 SSE 通过 uint64[] 进行线性搜索,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23077025/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com