gpt4 book ai didi

c - OpenAcc 归并排序程序中的浮点异常

转载 作者:行者123 更新时间:2023-11-30 16:47:03 25 4
gpt4 key购买 nike

#include<stdlib.h>
#include<stdio.h>
#include <time.h>
#include <omp.h>
#include <openacc.h>
#define THR 10


//Function to test if the output is in asending order or not
void test(int a[], int n) {
int i;
for (i=1;i<n;++i) {
if (a[i]<a[i-1]) {
break;
}
}
if (i<n) {
for (i=1;i<n;++i) {
if (a[i]>a[i-1]){
break;
}
}
if (i<n) {
printf("\nArray is not sorted\n");
}
}
else {
printf("\nArray is sorted\n");
}
}

/* Function to sort an array using insertion sort in serial*/
void isort (int *array, int low, int mid, int high) {

for (int i = mid; i <= high; i++) {
for (int j = i - 1; j >= 0; j--) {
if (array[i] < array [j]) {
int holder = array[j];
array[j] = array[i];
array[i] = holder;
i--;
}
}
}
}
/* Function to merge */
void merge(int arr[], int l, int m, int r);

// Utility function to find minimum of two integers
#pragma acc routine seq
int min(int x, int y) { return (x<y)? x :y; }

/* Iterative mergesort function to sort arr[0...n-1] */
void mergeSort(int arr[], int n)
{
int curr_size; // For current size of subarrays to be merged
// curr_size varies from 1 to n/2
int left_start; // For picking starting index of left subarray
// to be merged
#pragma acc kernels //pcopy(arr[0:n1])// pcopying (R[0:n2])
{
#pragma acc loop independent
for (curr_size=1; curr_size<=n-1; curr_size = 2*curr_size)
{
#pragma acc loop independent
// Pick starting point of different subarrays of current size
for (left_start=0; left_start<n-1; left_start += 2*curr_size)
{
// Find ending point of left subarray. mid+1 is starting
// point of right
int mid = left_start + curr_size - 1;

int right_end = min(left_start + 2*curr_size - 1, n-1);

// Merge Subarrays arr[left_start...mid] & arr[mid+1...right_end]
merge(arr, left_start, mid, right_end);
}
}
}}

/* Function to merge the two haves arr[l..m] and arr[m+1..r] of array arr[]
*/
#pragma acc routine vector
void merge(int arr[], int l, int m, int r)
{
int i, j, k;
int n1 = m - l + 1;
int n2 = r - m;

/* create temp arrays */
int *L, *R;
L = (int *)malloc(sizeof(int) * n1);
R = (int *)malloc(sizeof(int) * n2);
/* Copy data to temp arrays L[] and R[] */
#pragma acc loop independent
for (i = 0; i < n1; i++)
L[i] = arr[l + i];
#pragma acc loop independent
for (j = 0; j < n2; j++)
R[j] = arr[m + 1+ j];

/* Merge the temp arrays back into arr[l..r]*/
i = 0;
j = 0;
k = l;

#pragma acc loop independent private(k) reduction(+:j) private (L[0:n1]) private (R[0:n1]) private (arr[0:n1])reduction(+:i)
// private(k) reduction(+:j)
while (i < n1 && j < n2)
{
if (L[i] <= R[j])
{
arr[k] = L[i];
i++;
}
else
{
arr[k] = R[j];
j++;
}
k++;
}

/* Copy the remaining elements of L[], if there are any */

#pragma acc loop seq private (arr[0:n2])
while (i < n1)
{
arr[k] = L[i];
i++;
k++;
}

/* Copy the remaining elements of R[], if there are any */
#pragma acc loop seq private (arr[0:n2])
while (j < n2)
{
arr[k] = R[j];
j++;
k++;
}
free(L);
free(R);
}


/* Function to print an array */
void printArray(int A[], int size)
{
int i;
for (i=0; i < size; i++)
printf("%d ", A[i]);
printf("\n");
}

/* Driver program to test above functions */
int main()
{
int i, n=20, *a;
double startTime, endTime;

printf("How many elements in the array? ");

a = (int *)malloc(sizeof(int) * n);
srand(time(0));
for(i=0;i<n;i++)
{
a[i]=rand()%1000;
}
printf("List Before Sorting...\n");
printArray(a, n);
if (n<=THR)
{
startTime = omp_get_wtime();
isort(a,0,0,n);
endTime = omp_get_wtime();
printf("\nSorted array: ");
printArray(a,n);
printf("\n");
test(a,n);
printf("IN");
printf("\nTime: %g\n",endTime-startTime);
exit(0);
}

else
{
startTime = omp_get_wtime();
mergeSort(a,n);
endTime = omp_get_wtime();
printf("\nSorted array: ");
printArray(a,n);
printf("\n");
test(a,n);
printf("ACC");
printf("\nTime: %g\n",endTime-startTime);
printf("\nSize of the array is %d",n);

exit(0);
}
}

在阅读了 PGI 论坛中的大量文章以及 stackoverflow 中 @MAT 的帮助后,我设法解决了大部分错误,现在我使用迭代合并排序而不是递归,因为我发现 OpenAcc 不能很好地与递归配合使用功能。现在,我只剩下一个错误,这是合并函数中的错误:-

109、没有整数行程计数的循环将以顺序模式执行

我读到http://www.pgroup.com/userforum/viewtopic.php?p=17748&sid=0aa1537bdc68fc1f6fac74f66a788970也但无法确定在我的程序中使用它。由于这个错误,我的结果是:-

数组中有多少个元素?排序前列出...801 673 288 374 516 908 473 130 874 928 491 406 276 302 186 442 865 341 624 725浮点异常

即使我在程序中使用整数,也会出现浮点异常。

最佳答案

这里有一些问题。

看起来可能存在“mid”大于“right_end”的情况,这将导致 malloc 的大小为负值。虽然我不确定这是正确的修复,但我添加了一个 if 语句来在 mid>right_end 时跳过合并。

并行循环必须是可数的,即迭代次数在循环开始时已知。因此,您不能在 while 循环周围放置“loop”指令。

将“arr”放入私有(private)子句中并不是您想要的。您想要更新全局数组,而不是私有(private)副本。 Private 将为每个帮派、 vector 或 worker 创建变量的私有(private)副本(取决于带有 private 子句的循环的时间表)。私有(private)变量在内核执行后消失。

此外,您的“curr_size”for 循环具有依赖性,因此无法并行化。这里的问题是它跨步为“curr_size=2*curr_size”。每次迭代都需要前一次迭代的 curr_size 值,然后才能计算自己的 curr_size。

请注意,在计算区域内使用“malloc”是有问题的。首先,设备端 malloc 非常慢,因此会对性能产生不利影响。此外,设备上的堆非常小(默认为 8MB,但可以通过设置环境变量 PGI_ACC_HEAPSIZE 来增加),因此非常大的 n 值可能会溢出堆并给出非法地址错误。

这是固定代码:

% cat test.c
#include<stdlib.h>
#include<stdio.h>
#include <time.h>
#include <omp.h>
#include <openacc.h>
#define THR 10


//Function to test if the output is in asending order or not
void test(int a[], int n) {
int i;
for (i=1;i<n;++i) {
if (a[i]<a[i-1]) {
break;
}
}
if (i<n) {
for (i=1;i<n;++i) {
if (a[i]>a[i-1]){
break;
}
}
if (i<n) {
printf("\nArray is not sorted\n");
}
}
else {
printf("\nArray is sorted\n");
}
}

/* Function to sort an array using insertion sort in serial*/
void isort (int *array, int low, int mid, int high) {

for (int i = mid; i <= high; i++) {
for (int j = i - 1; j >= 0; j--) {
if (array[i] < array [j]) {
int holder = array[j];
array[j] = array[i];
array[i] = holder;
i--;
}
}
}
}
/* Function to merge */
void merge(int arr[], int l, int m, int r);

// Utility function to find minimum of two integers
#pragma acc routine seq
int min(int x, int y) { return (x<y)? x :y; }

/* Iterative mergesort function to sort arr[0...n-1] */
void mergeSort(int arr[], int n)
{
int curr_size; // For current size of subarrays to be merged
// curr_size varies from 1 to n/2
int left_start; // For picking starting index of left subarray
// to be merged
#pragma acc data copy(arr[0:n])// pcopying (R[0:n2])
{
for (curr_size=1; curr_size<=n-1; curr_size = 2*curr_size)
{
#pragma acc parallel loop
// Pick starting point of different subarrays of current size
for (left_start=0; left_start<n-1; left_start += 2*curr_size)
{
// Find ending point of left subarray. mid+1 is starting
// point of right
int mid = left_start + curr_size - 1;

int right_end = min(left_start + 2*curr_size - 1, n-1);

// Merge Subarrays arr[left_start...mid] & arr[mid+1...right_end]
if (mid < right_end) merge(arr, left_start, mid, right_end);
}
}
}}

/* Function to merge the two haves arr[l..m] and arr[m+1..r] of array arr[]
*/
#pragma acc routine(merge) vector
void merge(int arr[], int l, int m, int r)
{
int i, j, k;
int n1 = m - l + 1;
int n2 = r - m;

/* create temp arrays */
int *L, *R;
L = (int *)malloc(sizeof(int) * n1);
R = (int *)malloc(sizeof(int) * n2);
/* Copy data to temp arrays L[] and R[] */
#pragma acc loop independent
for (i = 0; i < n1; i++)
L[i] = arr[l + i];
#pragma acc loop independent
for (j = 0; j < n2; j++)
R[j] = arr[m + 1+ j];

/* Merge the temp arrays back into arr[l..r]*/
i = 0;
j = 0;
k = l;

while (i < n1 && j < n2)
{
if (L[i] <= R[j])
{
arr[k] = L[i];
i++;
}
else
{
arr[k] = R[j];
j++;
}
k++;
}

/* Copy the remaining elements of L[], if there are any */

while (i < n1)
{
arr[k] = L[i];
i++;
k++;
}

/* Copy the remaining elements of R[], if there are any */
while (j < n2)
{
arr[k] = R[j];
j++;
k++;
}
free(L);
free(R);
}


/* Function to print an array */
void printArray(int A[], int size)
{
int i;
for (i=0; i < size; i++)
printf("%d ", A[i]);
printf("\n");
}

/* Driver program to test above functions */
int main()
{
int i, n=100, *a;
double startTime, endTime;

printf("How many elements in the array? ");

a = (int *)malloc(sizeof(int) * n);
srand(time(0));
for(i=0;i<n;i++)
{
a[i]=rand()%1000;
}
printf("List Before Sorting...\n");
printArray(a, n);
if (n<=THR)
{
startTime = omp_get_wtime();
isort(a,0,0,n);
endTime = omp_get_wtime();
printf("\nSorted array: ");
printArray(a,n);
printf("\n");
test(a,n);
printf("IN");
printf("\nTime: %g\n",endTime-startTime);
exit(0);
}

else
{
startTime = omp_get_wtime();
mergeSort(a,n);
endTime = omp_get_wtime();
printf("\nSorted array: ");
printArray(a,n);
printf("\n");
test(a,n);
printf("ACC");
printf("\nTime: %g\n",endTime-startTime);
printf("\nSize of the array is %d\n",n);

exit(0);
}
}

% pgcc -Minfo=accel -acc test.c -fast -o gpu
min:
51, Generating acc routine seq
Generating Tesla code
mergeSort:
60, Generating copy(arr[:n])
64, Accelerator kernel generated
Generating Tesla code
66, #pragma acc loop gang /* blockIdx.x */
merge:
84, Generating Tesla code
95, #pragma acc loop vector /* threadIdx.x */
98, #pragma acc loop vector /* threadIdx.x */
106, #pragma acc loop seq
123, #pragma acc loop seq
131, #pragma acc loop seq
95, Loop is parallelizable
98, Loop is parallelizable
106, Loop carried scalar dependence for i at line 119,126,125
Loop carried scalar dependence for j at line 106,108
Loop carried scalar dependence for i at line 111,110
Loop carried scalar dependence for j at line 116
Scalar last value needed after loop for i at line 123
Loop carried scalar dependence for j at line 134,133
Loop carried scalar dependence for i at line 108
Loop carried scalar dependence for j at line 115
110, Accelerator restriction: induction variable live-out from loop: k
115, Accelerator restriction: induction variable live-out from loop: k
118, Accelerator restriction: induction variable live-out from loop: k
% ./gpu
How many elements in the array? List Before Sorting...
58 612 99 182 914 802 452 229 856 644 932 339 650 915 533 300 456 892 860 764 866 229 400 561 328 170 573 121 364 392 801 775 356 901 309 622 703 762 851 911 406 135 602 56 51 136 708 507 380 568 623 246 797 24 160 125 194 733 599 910 477 400 685 185 653 995 808 709 109 659 972 867 147 575 276 198 711 984 57 91 553 680 689 702 56 849 828 602 935 779 865 412 179 550 598 185 897 758 894 6

Sorted array: 6 24 51 56 56 57 58 91 99 109 121 125 135 136 147 160 170 179 182 185 185 194 198 229 229 246 276 300 309 328 339 356 364 380 392 400 400 406 412 452 456 477 507 533 550 553 561 568 573 575 598 599 602 602 612 622 623 644 650 653 659 680 685 689 702 703 708 709 711 733 758 762 764 775 779 797 801 802 808 828 849 851 856 860 865 866 867 892 894 897 901 910 911 914 915 932 935 972 984 995


Array is sorted
ACC
Time: 1.26239

Size of the array is 100

关于c - OpenAcc 归并排序程序中的浮点异常,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43504239/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com