gpt4 book ai didi

c++ - NEON中的_mm_hadd_ps等于多少?

转载 作者:行者123 更新时间:2023-12-01 15:11:09 27 4
gpt4 key购买 nike

我正在尝试将以下代码从SSE转换为Apple的64位iOS设备的NEON:

void Matrix::TransformPoint( const float vec[ 4 ], const Matrix& matTrans, float out[ 4 ] )
{
alignas( 16 ) float v4[ 4 ] = { vec[ 0 ], vec[ 1 ], vec[ 2 ], vec[ 3 ] };

__m128 vec4 = _mm_load_ps( v4 );
__m128 row1 = _mm_load_ps( &matTrans.m[ 0 ] );
__m128 row2 = _mm_load_ps( &matTrans.m[ 4 ] );
__m128 row3 = _mm_load_ps( &matTrans.m[ 8 ] );
__m128 row4 = _mm_load_ps( &matTrans.m[ 12 ] );

__m128 r1 = _mm_mul_ps( row1, vec4 );
__m128 r2 = _mm_mul_ps( row2, vec4 );
__m128 r3 = _mm_mul_ps( row3, vec4 );
__m128 r4 = _mm_mul_ps( row4, vec4 );

__m128 sum_01 = _mm_hadd_ps( r1, r2 );
__m128 sum_23 = _mm_hadd_ps( r3, r4 );
__m128 result = _mm_hadd_ps( sum_01, sum_23 );
_mm_store_ps( out, result );
}

这是我到目前为止所拥有的:
alignas( 16 ) float v4[ 4 ] = { vec[ 0 ], vec[ 1 ], vec[ 2 ], vec[ 3 ] };

float32x4_t vec4 = vld1q_f32( v4 );
float32x4_t row1 = vld1q_f32( &mat.m[ 0 ] );
float32x4_t row2 = vld1q_f32( &mat.m[ 4 ] );
float32x4_t row3 = vld1q_f32( &mat.m[ 8 ] );
float32x4_t row4 = vld1q_f32( &mat.m[ 12 ] );

float32x4_t r1 = vmulq_f32( row1, vec4 );
float32x4_t r2 = vmulq_f32( row2, vec4 );
float32x4_t r3 = vmulq_f32( row3, vec4 );
float32x4_t r4 = vmulq_f32( row4, vec4 );

float32x4_t sum_01 = ??? <-- How to write this?
float32x4_t sum_23 = ??? <-- How to write this?
float32x4_t result = ??? <-- How to write this?
vst1q_f32( out, result );

如何替换 _mm_hadd_ps

最佳答案

这是典型的矩阵 vector 乘法,最好进行矩阵转置,然后再进行一系列 vector 标量乘法累加运算,从而避免了费时的水平加法运算:

float32x4x4_t mat;
float32x4_t vec4, result;

mat = vld4q_f32(pMat);
vec4 = vld1q_f32(pVec);

result = vmulq_lane_f32(mat.val[0], vget_low_f32(vec4), 0);
result = vmlaq_lane_f32(result, mat.val[1], vget_low_f32(vec4), 1);
result = vmlaq_lane_f32(result, mat.val[2], vget_high_f32(vec4), 0);
result = vmlaq_lane_f32(result, mat.val[3], vget_high_f32(vec4), 1);

vst1q_f32(pDst, result);

关于c++ - NEON中的_mm_hadd_ps等于多少?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59489424/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com