gpt4 book ai didi

simd - 将 MMX/SSE 指令移植到 AltiVec

转载 作者:行者123 更新时间:2023-12-04 18:25:50 25 4
gpt4 key购买 nike

让我先介绍一下……我对 ASM 的经验极其有限,对 SIMD 的经验更甚。

但碰巧我有以下 MMX/SSE 优化代码,我想将其移植到 AltiVec 指令以在 PPC/Cell 处理器上使用。

这可能是一个很大的问题......尽管它只有几行代码,但我一直在努力弄清楚这里发生了什么。

原函数:

static inline int convolve(const short *a, const short *b, int n)
{
int out = 0;
union {
__m64 m64;
int i32[2];
} tmp;
tmp.i32[0] = 0;
tmp.i32[1] = 0;
while (n >= 4) {
tmp.m64 = _mm_add_pi32(tmp.m64,
_mm_madd_pi16(*((__m64 *)a),
*((__m64 *)b)));
a += 4;
b += 4;
n -= 4;
}
out = tmp.i32[0] + tmp.i32[1];
_mm_empty();

while (n --)
out += (*(a++)) * (*(b++));
return out;
}

关于如何重写它以使用 AltiVec 指令的任何提示?

我的第一次尝试(一次非常错误的尝试)看起来像这样......但它并不完全(甚至是远程)正确。
static inline int convolve_altivec(const short *a, const short *b, int n)
{
int out = 0;
union {
vector unsigned int m128;
int i64[2];
} tmp;

vector unsigned int zero = {0, 0, 0, 0};

tmp.i64[0] = 0;
tmp.i64[1] = 0;
while (n >= 8) {
tmp.m128 = vec_add(tmp.m128,
vec_msum(*((vector unsigned short *)a),
*((vector unsigned short *)b), zero));

a += 8;
b += 8;
n -= 8;
}
out = tmp.i64[0] + tmp.i64[1];
#endif
while (n --)
out += (*(a++)) * (*(b++));
return out;
}

最佳答案

你离你不远了 - 我修复了一些小问题,稍微清理了代码,添加了一个测试工具,现在它似乎可以正常工作了:

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <altivec.h>

static int convolve_ref(const short *a, const short *b, int n)
{
int out = 0;
int i;

for (i = 0; i < n; ++i)
{
out += a[i] * b[i];
}

return out;
}

static inline int convolve_altivec(const short *a, const short *b, int n)
{
int out = 0;
union {
vector signed int m128;
int i32[4];
} tmp;

const vector signed int zero = {0, 0, 0, 0};

assert(((unsigned long)a & 15) == 0);
assert(((unsigned long)b & 15) == 0);

tmp.m128 = zero;

while (n >= 8)
{
tmp.m128 = vec_msum(*((vector signed short *)a),
*((vector signed short *)b), tmp.m128);

a += 8;
b += 8;
n -= 8;
}

out = tmp.i32[0] + tmp.i32[1] + tmp.i32[2] + tmp.i32[3];

while (n --)
out += (*(a++)) * (*(b++));

return out;
}

int main(void)
{
const int n = 100;

vector signed short _a[n / 8 + 1];
vector signed short _b[n / 8 + 1];

short *a = (short *)_a;
short *b = (short *)_b;

int sum_ref, sum_test;

int i;

for (i = 0; i < n; ++i)
{
a[i] = rand();
b[i] = rand();
}

sum_ref = convolve_ref(a, b, n);
sum_test = convolve_altivec(a, b, n);

printf("sum_ref = %d\n", sum_ref);
printf("sum_test = %d\n", sum_test);

printf("%s\n", sum_ref == sum_test ? "PASS" : "FAIL");

return 0;
}

关于simd - 将 MMX/SSE 指令移植到 AltiVec,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/4351383/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com