c - 如何创建OpenCL命令队列？-6ren

c - 如何创建OpenCL命令队列？

转载作者：行者123 更新时间：2023-11-30 16:36:51

我正在尝试学习 OpenCL，但我什至无法制作一个简单的内核来工作。

下面的代码来自《OpenCL 示例编程》一书，我修改、修改、修改......但我仍然不知道问题出在哪里。

每次我在 PC(AMD Athlon 5350 APU 和 Radeon R3)中执行该程序时，它都会将结果打印为“0.0000”。如果我在另一台带有 NVIDIA 1080 TI 的计算机(该 HD 的克隆，因此一切都相同)中运行相同的可执行文件，程序将输出“3.000”作为结果。

我注意到编译器输出中出现警告，因此我将过时的 clCreateCommandQueue 调用更改为 clCreateCommandQueueWithProperties()。

现在......它只是段错误(通过 printf() 测试，我知道它在 clCreateCommandQueueWithProperties 期间/之后出现段错误)。

在配备 NVIDIA GPU 的系统上，它就可以正常工作。

我错过了什么？

#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <malloc.h>

#include <CL/cl.h>

#define VECTOR_NS 4096
#define VECTOR_SIZE (VECTOR_NS*sizeof(float))

static const char* saxpy_kernel =
    "__kernel void saxpy_kernel(__global float *A, __global float *B, __global float *C)\n"
    "{\n"
        "int index = get_global_id(0);\n"
        "C[0] = 3;\n"
    "}\n"
    ;

int main(void)
{
    int i;
    float total;
    float* A;
    float* B;
    float* C;
    float* Cmapped;
    cl_mem Acl;
    cl_mem Bcl;
    cl_mem Ccl;
    cl_context context;
    cl_platform_id* platforms;
    cl_uint num_platforms;
    cl_uint num_devices;
    cl_command_queue queue;
    cl_kernel kernel;
    cl_int clStatus;

    // Get platform and device information
    platforms = NULL;
    //Set up the Platform
    clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
    platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id)*num_platforms);
    clStatus = clGetPlatformIDs(num_platforms, platforms, NULL);
    //Get the devices list and choose the device you want to run on
    cl_device_id* device_list = NULL;

    clStatus = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
    device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices);

    clStatus = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
    // Create one OpenCL context for each device in the platform
    context = clCreateContext( NULL, num_devices, device_list, NULL, NULL, &clStatus);

    /* Create the command queue */
    //queue = clCreateCommandQueue(context, device_list[0], 0, &clStatus);
    queue = clCreateCommandQueueWithProperties(context, device_list[0], NULL, &clStatus);

    if(clStatus != CL_SUCCESS){
        fprintf(stderr, "ERROR: failed to execute the kernel: %d.\n", clStatus);
        exit(1);
    }

    /* */
    if((A = aligned_alloc(sysconf(_SC_PAGESIZE), VECTOR_SIZE)) == NULL){
        fprintf(stderr, "ERROR: failed to allocate memory.\n");
        exit(1);
    }
    if((B = aligned_alloc(sysconf(_SC_PAGESIZE), VECTOR_SIZE)) == NULL){
        fprintf(stderr, "ERROR: failed to allocate memory.\n");
        exit(1);
    }
    if((C = aligned_alloc(sysconf(_SC_PAGESIZE), VECTOR_SIZE)) == NULL){
        fprintf(stderr, "ERROR: failed to allocate memory.\n");
        exit(1);
    }

    /* Initialize it */
    i = 0;
    do {
        A[i] = 1;
        B[i] = 2;
        C[i] = 0;
    } while(++i != VECTOR_NS);

    Acl = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, VECTOR_SIZE, A, &clStatus); // CL_MEM_READ_ONLY
    Bcl = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, VECTOR_SIZE, B, &clStatus); // CL_MEM_READ_ONLY
    Ccl = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, VECTOR_SIZE, C, &clStatus); // CL_MEM_WRITE_ONLY

    // Create a program from the kernel source
    // Build the program
    cl_program program = clCreateProgramWithSource(context, 1, (const char**)&saxpy_kernel, NULL, &clStatus);

    if(clStatus != CL_SUCCESS){
        fprintf(stderr, "ERROR: failed to compile the OpenCL code.\n");
        exit(1);
    }

    clStatus = clBuildProgram(program, 1, device_list, NULL, NULL, NULL);

    if(clStatus != CL_SUCCESS){
        fprintf(stderr, "ERROR: failed to compile the OpenCL code.\n");
        exit(1);
    }

    // Create the OpenCL kernel
    kernel = clCreateKernel(program, "saxpy_kernel", &clStatus);
    // Set the arguments of the kernel
    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&Acl);
    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&Bcl);
    clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&Ccl);

    // Execute the OpenCL kernel on the list
    size_t global_size = VECTOR_NS; // Process the entire lists
    size_t local_size = 1;

    // Process one item at a time
    clStatus = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);

    if(clStatus != CL_SUCCESS){
        fprintf(stderr, "ERROR: failed to execute the kernel: %d.\n", clStatus);
        exit(1);
    }

    //* Clean up and wait for all the comands to complete. */
    clFlush(queue);

    /* Display the result to the screen */
    Cmapped = (float*) clEnqueueMapBuffer(queue, Ccl, CL_TRUE, CL_MAP_READ, 0, VECTOR_SIZE, 0, NULL, NULL, &clStatus); // CL_MEM_USE_HOST_PTR

    if(clStatus != CL_SUCCESS){
        fprintf(stderr, "ERROR: failed to execute the kernel: %d.\n", clStatus);
        exit(1);
    }

    total = 0;
    for(i = 0; i < VECTOR_NS; i++)
        total += C[i];

    printf("TOTAL: %f\n", total);

    /* Clean up and wait for all the comands to complete. */
    clFlush(queue);
    clFinish(queue);

    /* Finally release all OpenCL allocated objects and host buffers. */
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseMemObject(Acl);
    clReleaseMemObject(Bcl);
    clReleaseMemObject(Ccl);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    free(platforms);
    free(device_list);

    return 0;
}

最佳答案

虽然您的代码在我的机器上按原样工作，但我认为可能存在导致您出现问题的问题。通话

Cmapped = (float*)clEnqueueMapBuffer(queue, Ccl, CL_TRUE, CL_MAP_READ, 0, VECTOR_SIZE, 0, NULL, NULL, &clStatus);

更改了Cmapped，但您尝试从原始C缓冲区中读取

total = 0;
for(i = 0; i < VECTOR_NS; i++)
    total += C[i];

这可能应该是这样的

total = 0;
for(i = 0; i < VECTOR_NS; i++)
    total += Cmapped[i];

但是，由于您使用 CL_MEM_USE_HOST_PTR 标志创建了缓冲区，如果您使用 clEnqueueReadBuffer 来代替，您的 OpenCL 驱动程序也应该能够优化复制操作，如果您使用原始指针作为目的地:

clEnqueueReadBuffer(queue, Ccl, CL_TRUE, 0, VECTOR_SIZE, C, 0, NULL, NULL);

如果 OpenCL 实现尚未将数据缓存到设备内存，则这应该是无操作。

关于c - 如何创建OpenCL命令队列？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/48271444/

文章推荐： c - c中内存的重新分配/分配

文章推荐： c# - 对存储过程的结果进行排序

文章推荐： c# - 设计架构，分离项目。我是 "over thinking"吗？

文章推荐： c - 断点在带有 QEMU 模拟 cortex-a8 的 gdb 中不起作用

linux - ] 命令 & 命令 > [忽略输出]
关闭。这个问题是off-topic .它目前不接受答案。想改进这个问题吗？ Update the question所以它是on-topic用于堆栈溢出。关闭 9 年前。 Improve this
sql - 对多个表执行一个 SQL 命令(无需重写 SQL 命令)
我有一系列 SQL 命令，我想在大约 40 个不同的表上运行。必须有一种方法可以在不编写 40 条不同命令的情况下执行此操作... 我在 SQL Server 中运行它。所有表都有不同的名称，我要操作
c# - PHP 命令 mysql_insert_id() 的等效 C#/SQLite 命令？
我习惯在 PHP 中使用命令“mysql_insert_id()”来返回插入到我的数据库中的最后一行的 id。在 C# 中的 SQLite 中是否有等效的命令？谢谢! -阿德娜最佳答案选择 l
回填 Hive 表的 Bash 命令——运行多个具有更改日期变量的 Hive 命令
试图找出一种方法来回填 ds 分区 Hive 表的分区。我知道如何从 CLI 运行 Hive 命令，例如 $HIVE_HOME/bin/hive -e 'select a.col from tab1
windows - 如何在 ftp 命令 "bye"后返回执行 cmd 命令？
我有 .bat 文件。看起来像下一个 ....many commands1 ftp -i -s:copy.txt ...many commands2 copy.txt 包含下一个命令 open ...
linux - 如果存在 git 命令，则使 bash 命令回退到 git 命令
基本上我想输入 show 并检查是否有 show 命令或别名已定义并触发它，如果未定义则触发 git show 。例如 rm 应该执行 rm 但 checkout 应该执行 git checkout
mysql - 试图找到与 MySQL 等效的 iSeries SQL 命令。特别是 "LABEL ON"命令
我公司的主数据库是 iSeries 机器，我已经非常习惯使用 DB2 命令和结构。我现在正在尝试做一个小项目，更新一个包含超过 300 万条记录的表。我想出一种比较和“清理”数据的更快方法是使用 My
node.js - NodeJS - 运行 shell 命令，退出，转移到 shell 命令
我想在带有 Node 的终端中制作一个简单的按钮板，并“blessed”用于连接或运行不同的命令。 ----------------------------------------------- _
python - selenium IDE 命令 'openWindow' 是否有等效的 selenium python webdriver 命令？
我们有一个 selenium IDE 脚本，正在转换为 python webdriver。以下命令未转换: [openWindow | http://mywebsite.com/index.php |
node.js - Windows 上的 Git Bash 命令，找不到 yarn 命令
我正在学习这个关于从 GIT HUB 下载和安装 Web 文件的在线教程。我进入主题:启动我们的静态网站，系统提示我输入命令以下载和安装 Web 文件。但是，当我输入命令 yarn install 时
linux - 如何在 shell 脚本中编写 elif 命令，如 elseif 或在 fortran 中的 else 命令
我在 shell 脚本中使用 elif 命令时遇到问题，就像在 fortran 中一样。我有 100 家公司的员工名单。我想屏蔽那些员工少于 500 人的公司。我的脚本是 rm -f categor
python - 我有几个 Linux 命令，我想在 Windows 机器上运行。如何在 Windows 上从 Python 运行 Linux 命令
我有一些 Linux 命令可以生成 token 。我在 Linux 机器上使用操作系统库形式的 Python 自动化了这些命令。它工作正常。但是，当我在 Windows 中尝试相同的代码时，它没有返
分享几个你可能不知道的交互式Git 命令
本文分享自华为云社区《Git你有可能不知道交互式暂存》，作者：龙哥手记。本节中的几个交互式 Git 命令可以帮助你将文件的特定部分组合成提交。当你在修改了大量文件后，希望这些改动能拆分为若干提交而
MySQL "in"命令
我想知道如何使用 IN 比较语法来做到这一点。当前的 SQL 查询是: select * from employee where (employeeName = 'AJAY' and month(e
Hadoop 命令
我在这个位置安装了 Hadoop /usr/local/hadoop$ 现在我想列出 dfs 中的文件。我使用的命令是: hduser@ubuntu:/usr/local/hadoop$ bin/ha
清除所有内容的 Docker 命令
是否有一个单一的 docker 命令可用于清除所有内容？如果正在运行，请停止所有容器、删除所有图像、删除所有卷...等。最佳答案我认为没有一个命令可以做到这一点。您首先需要停止所有容器使用 $ d
用于评估缓冲区中的方案表达式并显示粘贴在缓冲区中的结果的 Emacs 命令
我基本上是在 clojure/nrepl 模式中寻找与 C-u C-x C-e 或 C-c C-p 等效的 Scheme。我想要一个 C-x C-e 将输出打印到缓冲区，而不是仅仅在 repl 中。
vim - 在真实终端中运行 :! 命令
我可以在 vim 中使用 pudb(一个 ncurses Python 调试器)，因为，例如，:!python %在实际的终端窗口中运行。我更喜欢使用 gvim，但 gvim 运行 :!python
用于自动视频分割和缩略图创建的 FFMPEG 命令
我正在尝试编写一个 FFMPEG 命令: 取为输入一个视频 input.mp4 和一个图像 pic.jpg 作为输出将 input.mp4 拆分为 20 秒的视频，按顺序重命名；对于每个分割视
ffmpeg vstats 命令
我想转储视频每帧的比特率。我正在尝试使用 -vstats 获取此信息命令。当我运行此命令时 - ffmpeg -i input.mp4 -vstats 它显示至少应该定义一个文件。如果有人能建议我任

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c - 如何创建OpenCL命令队列？