gpt4 book ai didi

c - 如何在 OpenMP 并行区域内声明和 malloc 指针? (错误 : Segment violation ('core' generated))

转载 作者:太空宇宙 更新时间:2023-11-04 07:51:42 24 4
gpt4 key购买 nike

我是这样做的:

void calculateClusterCentroIDs(int numCoords, int numObjs, int numClusters, float * dataSetMatrix, int * clusterAssignmentCurrent, float *clustersCentroID) {

int * clusterMemberCount = (int *) calloc (numClusters,sizeof(int));

#pragma omp parallel
{
int ** localClusterMemberCount;
int * activeCluster;
#pragma omp single
{
localClusterMemberCount = (int **) malloc (omp_get_num_threads() * sizeof(int *));
//localClusterMemberCount[0] = (int *) calloc (omp_get_num_threads()*numClusters,sizeof(int));
for (int i = 0; i < omp_get_num_threads(); ++i) {
localClusterMemberCount[i] = calloc (numClusters,sizeof(int));
//localClusterMemberCount[i] = localClusterMemberCount[i-1] + numClusters;
}
activeCluster = (int *) calloc (omp_get_num_threads(),sizeof(int));
}

// sum all points
// for every point
for (int i = 0; i < numObjs; ++i) {
// which cluster is it in?
activeCluster[omp_get_thread_num()] = clusterAssignmentCurrent[i];
// update count of members in that cluster
++localClusterMemberCount[omp_get_thread_num()][activeCluster[omp_get_thread_num()]];
// sum point coordinates for finding centroid
for (int j = 0; j < numCoords; ++j)
#pragma omp atomic
clustersCentroID[activeCluster[omp_get_thread_num()]*numCoords + j] += dataSetMatrix[i*numCoords + j];
}

// now divide each coordinate sum by number of members to find mean/centroid
// for each cluster
for (int i = 0; i < numClusters; ++i) {
if (localClusterMemberCount[omp_get_thread_num()][i] != 0)
// for each numCoordsension
for (int j = 0; j < numCoords; ++j)
#pragma omp atomic
clustersCentroID[i*numCoords + j] /= localClusterMemberCount[omp_get_thread_num()][i]; /// XXXX will divide by zero here for any empty clusters!
}

// free memory
#pragma omp single
{
free (localClusterMemberCount[0]);
free (localClusterMemberCount);
free (activeCluster);
}
}
free(clusterMemberCount);

但我收到错误:Segment violation ('core' generated) 所以我做错了,我认为错误是在 mallocing 指针上,因为我已经尝试了顺序代码,它是工作正常。我也尝试过并行代码但没有 mallocs(使用带有原子的全局变量)并且也工作正常。只有当我尝试创建私有(private)指针并对其进行 malloc 时,才会出现该错误。

知道如何解决吗?

最佳答案

段错误的两个原因:

  • localClusterMemberCount 应该是在并行区域外声明的共享变量,由单个线程在并行区域内初始化。否则,每个线程都有自己的变量副本,并且对于除了已经通过单个部分的线程之外的所有线程,它都指向内存的随机位置。
  • 在释放指针的代码段之前需要隐式或显式屏障。在可以取消分配内存之前,所有线程都需要确定完成,否则一个线程可能会释放仍在被其他线程使用的指针。

代码几乎没有其他问题。请参阅下面带有 *** 标记的我自己的评论:

void calculateClusterCentroIDs(int numCoords, int numObjs, int numClusters, float * dataSetMatrix, int * clusterAssignmentCurrent, float *clustersCentroID) {

int * clusterMemberCount = (int *) calloc (numClusters,sizeof(int));

/* ***
* This has to be a shared variable that each thread can access
* If declared inside the parallel region, it will be a thread-local variable
* which is left un-initialized for all but one thread. Further attempts to access
* that variable will lead to segfaults
*/
int ** localClusterMemberCount;
#pragma omp parallel shared(localClusterMemberCount,clusterMemberCount)
{

// *** Make activeCluster a thread-local variable rather than a shared array (shared array will result in false sharing)
int activeCluster;
#pragma omp single
{
localClusterMemberCount = (int **) malloc (omp_get_num_threads() * sizeof(int *));
//localClusterMemberCount[0] = (int *) calloc (omp_get_num_threads()*numClusters,sizeof(int));
for (int i = 0; i < omp_get_num_threads(); ++i) {
localClusterMemberCount[i] = calloc (numClusters,sizeof(int));
//localClusterMemberCount[i] = localClusterMemberCount[i-1] + numClusters;
}
}

// sum all points
// for every point
for (int i = 0; i < numObjs; ++i) {
// which cluster is it in?
activeCluster = clusterAssignmentCurrent[i];
// update count of members in that cluster
++localClusterMemberCount[omp_get_thread_num()][activeCluster];
// sum point coordinates for finding centroid

// *** This may be slower in parallel because of the atomic operation
for (int j = 0; j < numCoords; ++j)
#pragma omp atomic
clustersCentroID[activeCluster*numCoords + j] += dataSetMatrix[i*numCoords + j];
}

/* ***
* Missing: one reduction step
* The global cluster member count needs to be updated
* one option is below :
*/
#pragma omp critical
for (int i=0; i < numClusters; ++i) clusterMemberCount+=localClusterMemberCount[omp_get_thread_num()];
#pragma omp barrier // wait here before moving on



// *** The code below was wrong; to compute the average, coordinates should be divided by the global count
// *** Sucessive divisions by local count will fail. Like, 1/(4+6) is not the same as (1/4)/6

// now divide each coordinate sum by number of members to find mean/centroid
// for each cluster
#pragma omp for
for (int i = 0; i < numClusters; ++i) {
if (clusterMemberCount != 0)
// for each numCoordsension
#pragma omp simd //not sure this will help, the compiler may already vectorize that
for (int j = 0; j < numCoords; ++j)
clustersCentroID[i*numCoords + j] /= clusterMemberCount[i]; /// XXXX will divide by zero here for any empty clusters!
// *** ^^ atomic is not needed
// *** only one thread will access each value of clusterCentroID

}

#pragma omp barrier
/* ***
* A barrier is needed otherwise the first thread arriving there will start to free the memory
* Other threads may still be in the previous loop attempting to access localClusterMemberCount
* If the pointer has been freed already, this will result in a segfault
*
* With the corrected code, the implicit barrier at the end of the distributed
* for loop would be sufficient. With your initial code, an explicit barrier
* would have been needed.
*/

// free memory
#pragma omp single
{
// *** Need to free all pointers and not only the first one
for (int i = 0; i < omp_get_num_threads(); ++i) free (localClusterMemberCount[i]);
free (localClusterMemberCount);
}
}
free(clusterMemberCount);

关于c - 如何在 OpenMP 并行区域内声明和 malloc 指针? (错误 : Segment violation ('core' generated)),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53412033/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com