gpt4 book ai didi

cuda - CUDA中偶数和奇数的分离

转载 作者:行者123 更新时间:2023-12-02 20:46:26 25 4
gpt4 key购买 nike

我有一个数字数组 {1,2,3,4,5,6,7,8,9,10},我想将偶数和奇数分隔为:

even = {2,4,6,8}

和:

odd = {1,3,5,7}

我知道 CUDA 中的原子操作,并且也知道输出不会受到竞争条件的影响。我不想使用原子操作。在不使用原子关键字的情况下如何实现这一目标?

代码:

#include <stdio.h>
#include <cuda.h>

// Kernel that executes on the CUDA device
__global__ void square_array(float *total,float *even,float *odd, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int a=total[idx];
if ((a%2)==0)
{
for (int i=0;i<=idx;i++)
{
int b = even[i];
if(b==0)
{
even[i] = total[idx];
break;

}
}
}
else
{
for (int i=0;i<idx;i++)
{
int c = odd[i];

odd[i] = total[idx];
break;
}
}
}

// main routine that executes on the host
int main(void)
{
float *total_h,*even_h, *odd_h,*total_d, *even_d,*odd_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);


total_h = (float *)malloc(size); // Allocate array on host
even_h = (float *)malloc(size); // Allocate array on host
odd_h = (float *)malloc(size); // Allocate array on host

cudaMalloc((void **) &total_d, size);
cudaMalloc((void **) &even_d, size);
cudaMemset(even_d,0,size);
cudaMalloc((void **) &odd_d, size); // Allocate array on device
cudaMemset(odd_d,0,size);


// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) total_h[i] = (float)i+1;
cudaMemcpy(total_d, total_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:

square_array <<< 1,10 >>> (total_d,even_d,odd_d, N);
// Retrieve result from device and store it in host array

cudaMemcpy(even_h, even_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
cudaMemcpy(odd_h, odd_d, sizeof(float)*N, cudaMemcpyDeviceToHost);

// Print results
printf("total Numbers\n");
for (int i=0; i<N; i++) printf("%f\n",total_h[i]);

printf("EVEN Numbers\n");
for (int i=0; i<N; i++) printf("%f\n",even_h[i]);

printf("ODD Numbers\n");
for (int i=0; i<N; i++) printf("%f\n",odd_h[i]);
// Cleanup
free(total_h);
free(even_h);
free(odd_h);


cudaFree(total_d);
cudaFree(even_d);
cudaFree(odd_d);
}

输出: enter image description here

最佳答案

正如 Jared Hoberock 所建议的,使用 CUDA Thrust 中提供的高效分区算法比开始开发自己的分区例程要容易得多。下面是一个完整的示例。

#include <thrust\device_vector.h>
#include <thrust\partition.h>
#include <thrust\execution_policy.h>

struct is_even { __host__ __device__ bool operator()(const int &x) { return (x % 2) == 0; } };

void main() {

const int N = 10;

thrust::host_vector<int> h_data(N);
for (int i=0; i<N; i++) h_data[i] = i;

thrust::device_vector<int> d_data(h_data);
thrust::device_vector<int> d_evens(N/2);
thrust::device_vector<int> d_odds(N/2);

thrust::partition_copy(d_data.begin(), d_data.end(), d_evens.begin(), d_odds.begin(), is_even());

printf("Even numbers\n");
for (int i=0; i<N/2; i++) {
int val = d_evens[i];
printf("evens[%i] = %i\n",i,val);
}

printf("Odd numbers\n");
for (int i=0; i<N/2; i++) {
int val = d_odds[i];
printf("odds[%i] = %i\n",i,val);
}

}

关于cuda - CUDA中偶数和奇数的分离,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24669567/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com