gpt4 book ai didi

c - clEnqueueNDRangeKernel 何时或为何将 Null 作为事件返回?

转载 作者:太空宇宙 更新时间:2023-11-04 04:26:09 25 4
gpt4 key购买 nike

您好,我正在尝试在 7 周内执行 7 Concurreny Models 一书中的示例代码。作者使用的是 macbook,而我使用的是带有 windows 10 的 dell xps。

我的程序崩溃了,因为在我调用函数 clEnqueueNDRangeKernel() 之后 timing_event 仍然为 null。

cl_event timing_event;
size_t work_units = NUM_ELEMENTS;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &work_units,
NULL, 0, NULL,&timing_event);

docs声明以下有关事件参数的信息

event

Returns an event object that identifies this particular kernel execution instance. Event objects are unique and can be used to identify a particular kernel execution instance later on. If event is NULL, no event will be created for this kernel execution instance and therefore it will not be possible for the application to query or queue a wait for this particular kernel execution instance.

谁能解释一下为什么这种情况发生在我的戴尔而不是作者的 macbook 上?

最佳答案

我找到了解决方案。问题并非来自 clEnqueueNDRangeKernel(),而是发生在 clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 中。我使用 clGetProgramBuildInfo() 检索了构建信息。问题是我的 multiply_arrays.cl 文件不是 utf 8 编码


对于所有刚接触 opencl 的人。每个 opencl 函数都会返回一个映射到特定错误代码的状态整数。如果函数确实返回但不返回状态代码,则可以将指针传递给该函数。请参阅我在下面链接的示例函数。这对调试您的程序非常有帮助。

Returns Status Code

Status Code by Reference

main.cpp

/***
* Excerpted from "Seven Concurrency Models in Seven Weeks",
* published by The Pragmatic Bookshelf.
* Copyrights apply to this code. It may not be used to create training material,
* courses, books, articles, and the like. Contact us if you are in doubt.
* We make no guarantees that this code is fit for any purpose.
* Visit http://www.pragmaticprogrammer.com/titles/pb7con for more book information.
***/
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <mach/mach_time.h>
#else
#include <CL/cl.h>
#include <Windows.h>

#endif

#include <stdio.h>
#include<iostream>
#include <inttypes.h>
#include <chrono>

#define NUM_ELEMENTS (100000)

char* read_source(const char* filename) {
FILE *h = fopen(filename, "r");
fseek(h, 0, SEEK_END);
size_t s = ftell(h);
rewind(h);
char* program = (char*)malloc(s + 1);
fread(program, sizeof(char), s, h);
program[s] = '\0';
fclose(h);
return program;
}

void random_fill(cl_float array[], size_t size) {
for (int i = 0; i < size; ++i)
array[i] = (cl_float)rand() / RAND_MAX;
}

int main() {
//Status for Errorhandling
cl_int status;

//Identify Platform
cl_platform_id platform;
clGetPlatformIDs(1, &platform, NULL);

//Get Id of GPU
cl_device_id device;
cl_uint num_devices = 0;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &num_devices);

// Create Context
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);


//Use context to create Command Queue
//Que enables us to send commands to the gpu device
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);

//Load Kernel
char* source = read_source("multiply_arrays.cl");
cl_program program = clCreateProgramWithSource(context, 1,
(const char**)&source, NULL, &status);
free(source);

// Build Program
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

size_t len;
char *buffer;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
buffer = (char *) malloc(len);
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, len, buffer, NULL);
printf("%s\n", buffer);

//Create Kernel
cl_kernel kernel = clCreateKernel(program, "multiply_arrays", &status);

// Create Arrays with random Numbers
cl_float a[NUM_ELEMENTS], b[NUM_ELEMENTS];
random_fill(a, NUM_ELEMENTS);
random_fill(b, NUM_ELEMENTS);

//uint64_t startGPU = mach_absolute_time();
auto start = std::chrono::high_resolution_clock::now();


//Create Readonly input Buffers with value from a and b
cl_mem inputA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * NUM_ELEMENTS, a, NULL);
cl_mem inputB = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float) * NUM_ELEMENTS, b, NULL);

//Create Output buffer write Only
cl_mem output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(cl_float) * NUM_ELEMENTS, NULL, NULL);

//set Kernel Arguments
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputA);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputB);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);

cl_event timing_event;
size_t work_units = NUM_ELEMENTS;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &work_units,
NULL, 0, NULL,&timing_event);

cl_float results[NUM_ELEMENTS];
//Calculate Results and copy from output buffer to results
clEnqueueReadBuffer(queue, output, CL_TRUE, 0, sizeof(cl_float) * NUM_ELEMENTS,
results, 0, NULL, NULL);

//uint64_t endGPU = mach_absolute_time();
auto finish = std::chrono::high_resolution_clock::now();
//printf("Total (GPU): %lu ns\n\n", (unsigned long)(endGPU - startGPU));
std::cout << "Total(GPU) :"<< std::chrono::duration_cast<std::chrono::nanoseconds>(finish - start).count() << "ns\n";

cl_ulong starttime;
clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &starttime, NULL);
cl_ulong endtime;
clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &endtime, NULL);
printf("Elapsed (GPU): %lu ns\n\n", (unsigned long)(endtime - starttime));
clReleaseEvent(timing_event);
clReleaseMemObject(inputA);
clReleaseMemObject(inputB);
clReleaseMemObject(output);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);

//uint64_t startCPU = mach_absolute_time();
start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < NUM_ELEMENTS; ++i)
results[i] = a[i] * b[i];

//uint64_t endCPU = mach_absolute_time();
finish = std::chrono::high_resolution_clock::now();
//printf("Elapsed (CPU): %lu ns\n\n", (unsigned long)(endCPU - startCPU));
std::cout << "Elapsed (CPU) :" << std::chrono::duration_cast<std::chrono::nanoseconds>(finish - start).count() << "ns\n";
return 0;
}

multiply_arrays.cl

__kernel void multiply_arrays(__global const float* inputA,
__global const float* inputB,
__global float* output) {

int i = get_global_id(0);
output[i] = inputA[i] * inputB[i];
}
//ö

关于c - clEnqueueNDRangeKernel 何时或为何将 Null 作为事件返回?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41211233/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com