gpt4 book ai didi

c - 线程内读取错误的数组

转载 作者:行者123 更新时间:2023-11-30 18:35:11 25 4
gpt4 key购买 nike

我有一个多线程 C 程序,其中有 4 个线程使用一些全局数组进行一些算术计算。这是代码示例。

__m256 *array_1;
__m256 *array_2;
__m256 *array_3;
#define ALIGNMENT 32
#define SIMD_STEP 8

void Init_arrays()
{
int i;
posix_memalign((void**) &array_1, ALIGNMENT, 32*sizeof(__m256));
posix_memalign((void**) &array_2, ALIGNMENT, 4 *sizeof(__m256));
posix_memalign((void**) &array_3, ALIGNMENT, 2 *sizeof(__m256));

for(i=0;i < 256; i+= SIMD_STEP)
{
// Filling array for the 1st stage
}
for(i=0;i < 64; i+= SIMD_STEP)
{
// Filling array for the 2nd stage
}
for(i=0;i < 16; i+= SIMD_STEP)
{
// Filling array for the 3rd stage
}
}

void *routine(void *thread_info)
{
int n;
unsigned t_start,t_stop;
unsigned ind1, ind2, ind3;
float *arr_in , *arr_out;
struct thread_data *mydata;

mydata = (struct thread_data*) thread_info;
t_start = mydata->start;
t_stop = mydata->stop;
arr_in = mydata->input;
arr_out = mydata->output;

for (n = t_start; n < t_stop; n += 8)
{
ind1 = 256 + n;
ind2 = 512 + n;

vec_a = _mm256_load_ps((float *) (&arr_in[n ]) );
vec_b = _mm256_load_ps((float *) (&arr_in[ind1]) );
vec_c = _mm256_load_ps((float *) (&arr_in[ind2]) );

T_fac1 = array_1[n];
T_fac2 = array_2[n];
T_fac3 = array_3[n];
// print data 'printf()'

// further computations

_mm256_store_ps((float *) (&arr_out[n ]), (vec_a) );
_mm256_store_ps((float *) (&arr_out[ind1]), (vec_b) );
_mm256_store_ps((float *) (&arr_out[ind2]), (vec_c) );
}
pthread_exit(NULL);
}

void foo(float* in,float* out)
{
unsigned t,i=0;
for(t=0;t<256;t+=64)
{
thread_data_array[i].start = t;
thread_data_array[i].stop = t+QUARTER;
thread_data_array[i].input = in;
thread_data_array[i].output = out;
pthread_create(&threads[i],NULL,routine,(void*)&thread_data_array[i]);
i++;
}
for(i=0; i<NUM_THREADS; i++)
{
int rc = pthread_join(threads[i], NULL);
if (rc)
{
fprintf(stderr, "failed to join thread #%u - %s\n",i, strerror(rc));
}
}
}

int main()
{
float *data1;
float *data2;

posix_memalign((void**)&data1, 32, 1024 * sizeof(float));
posix_memalign((void**)&data2, 32, 1024 * sizeof(float));

Load_inputs(reals,imags);//load data into the two arrays
Init_arrays();
// print data 'printf()'
foo(data1,data2);
return EXIT_SUCCESS;
}

由于某种原因,从 array_1 读取数据不起作用,因为它应该在线程内部,而且我不知道其背后的原因。这是 array_1 的显示,它应该是这样的

     Display from the main                Display from the thread
RE = 1.000000 IM = -0.000000 RE = 1.000000 IM = -0.000000
RE = 0.999981 IM = -0.006136 RE = 0.399624 IM = 0.671559
RE = 0.999925 IM = -0.012272 RE = 0.416430 IM = 0.634393
RE = 0.999831 IM = -0.018407 RE = 0.433094 IM = 0.595699
RE = 0.999699 IM = -0.024541 RE = 0.449612 IM = 0.555570
RE = 0.999529 IM = -0.030675 RE = 0.465977 IM = 0.514103
RE = 0.999322 IM = -0.036807 RE = 0.482184 IM = 0.471397
RE = 0.999078 IM = -0.042938 RE = 0.498228 IM = 0.427555
RE = 0.998795 IM = -0.049068 // the same
RE = 0.998476 IM = -0.055195 // the same
RE = 0.998118 IM = -0.061321 // the same
RE = 0.997723 IM = -0.067444 // the same
RE = 0.997290 IM = -0.073565 // the same
RE = 0.996820 IM = -0.079682 // the same
RE = 0.996313 IM = -0.085797 // the same
RE = 0.995767 IM = -0.091909 // the same

有人知道这个错误结果背后的原因是什么吗?

最佳答案

给定

__m256 *array_1;
__m256 *array_2;
__m256 *array_3;
#define ALIGNMENT 32
#define SIMD_STEP 8

void Init_arrays()
{
int i;
posix_memalign((void**) &array_1, ALIGNMENT, 32*sizeof(__m256));
posix_memalign((void**) &array_2, ALIGNMENT, 4 *sizeof(__m256));
posix_memalign((void**) &array_3, ALIGNMENT, 2 *sizeof(__m256));
.
.
.

此循环引用远远超出范围的数组元素:

for (n = t_start; n < t_stop; n += 8)
{
.
.
.
T_fac1 = array_1[n];
T_fac2 = array_2[n];
T_fac3 = array_3[n];

array_3 具有所有两个 成员:2 *sizeof(__m256),但索引增加8>?

关于c - 线程内读取错误的数组,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47555306/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com