gpt4 book ai didi

c - 使用 openmp 并行化 C 代码

转载 作者:行者123 更新时间:2023-11-30 15:44:32 24 4
gpt4 key购买 nike

我正在尝试使用 OpenMP 并行执行以下程序:

#include <time.h>

// Program computes the total number of primes larger than 100000001 and smaller than 16000001.
main() {

int number = 100000001;
int primes[20];
int i, j, is_prime, index = 0, nprimes = 0;
time_t start_time, end_time;

start_time = time(NULL);
for (i = 0; i < 3000000; i++) {
// get the next number to check if it is a prime
number += 2;
is_prime = 1;
for (j = 2; j < 10001; j++) {
if ((number % j) == 0) {
is_prime = 0;
break;
}
}
// f0und a prime number. Count it and save the first 20 primes
if (is_prime) nprimes++;
if (is_prime && (index < 20)) {
primes[index] = number;
index++;
}
}
for (i = 0; i < 20; i++)
printf("%d is prime\n", primes[i]);
end_time = time(NULL);
printf("number of primes = %d, elapsed time is %d seconds\n", nprimes, end_time - start_time);
}

我所做的是:

#include <stdio.h>
#include <time.h>
#include <omp.h>
#define CHUNKSIZE 750000
//#define CHUNKSIZE2 2500

// Program computes the total number of primes larger than 100000001 and smaller than 16000001.
int main() {

int number = 100000001;
int primes[20];
int i, j, is_prime, index = 0, nprimes = 0;
time_t start_time, end_time;

start_time = time(NULL);
int chunk = CHUNKSIZE;
//int chunk2 = CHUNKSIZE2;
#pragma omp parallel shared(number, index, nprimes, chunk) private(i, j, is_prime)
{
#pragma omp parallel for schedule (dynamic, chunk)
for (i = 0; i < 3000000; i++) {
// get the next number to check if it is a prime
number += 2;
is_prime = 1;
//#pragma omp parallel for schedule (dynamic, chunk2)
for (j = 2; j < 10001; j++) {
if ((number % j) == 0) {
is_prime = 0;
break;
}
}
// f0und a prime number. Count it and save the first 20 primes
if (is_prime) nprimes++;
if (is_prime && (index < 20)) {
primes[index] = number;
index++;
}
}

for (i = 0; i < 20; i++)
printf("%d is prime\n", primes[i]);
end_time = time(NULL);
printf("number of primes = %d, elapsed time is %d seconds\n", nprimes, end_time - start_time);
//return 0;
}

我尝试了很多事情,但大多数都给了我更长或相同的时间!!!

最佳答案

number 变量全局递增,因此创建了一个屏障;无法并行计算,每个线程必须等待前一个线程结束,以便 number+=2 部分保持一致。

您可以通过创建另一个线程特定的变量(此处为 n)来规避此问题,该变量的值基于循环索引 (i)

一个 pragma omp parallel for 就足够了:

#include <stdio.h>
#include <time.h>
#include <omp.h>
#define CHUNKSIZE 750000
//#define CHUNKSIZE2 2500

// Program computes the total number of primes larger than 100000001 and smaller than 16000001.
int main() {

int number = 100000001;
int n;
int primes[20];
int i, j, is_prime, index = 0, nprimes = 0;
time_t start_time, end_time;

start_time = time(NULL);
int chunk = CHUNKSIZE;
//int chunk2 = CHUNKSIZE2;

#pragma omp parallel for private(n, is_prime, j)
for (i = 0; i < 300000; i++) {
// get the next number to check if it is a prime
//number += 2;
n = number + i*2;
is_prime = 1;
//#pragma omp parallel for schedule (dynamic, chunk2)
for (j = 2; j < 10001; j++) {
if ((n % j) == 0) {
is_prime = 0;
break;
}
}
// f0und a prime number. Count it and save the first 20 primes
if (is_prime) nprimes++;
if (is_prime && (index < 20)) {
primes[index] = n;
index++;
}
}

for (i = 0; i < 20; i++)
printf("%d is prime\n", primes[i]);
end_time = time(NULL);
printf("number of primes = %d, elapsed time is %d seconds\n", nprimes, end_time - start_time);
//return 0;

}

使用 gcc 和精简计算的结果,以避免等待太多:

$ gcc -fopenmp -o tt tt.c
$ time OMP_NUM_THREADS=1 ./tt
100000007 is prime
[...]
100000393 is prime
number of primes = 326390, elapsed time is 21 seconds

real 0m20.507s
user 0m20.492s
sys 0m0.001s
$ time OMP_NUM_THREADS=8 ./tt
101500027 is prime
[...]
105250049 is prime
number of primes = 325580, elapsed time is 3 seconds
real 0m3.041s
user 0m24.284s
sys 0m0.002s

关于c - 使用 openmp 并行化 C 代码,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19435409/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com