- android - RelativeLayout 背景可绘制重叠内容
- android - 如何链接 cpufeatures lib 以获取 native android 库?
- java - OnItemClickListener 不起作用,但 OnLongItemClickListener 在自定义 ListView 中起作用
- java - Android 文件转字符串
我是一名从事 OpenCL 工作的新人。当我尝试编译内核时遇到了一些奇怪的麻烦。
在 Nvidia 平台上,无论源代码中的什么代码,它总是显示 cl_success 并且日志只有“\n”;在Intel平台上,不管源码里是什么代码,clBuildProgram返回CL_INVALID_BINARY,clGetProgramBuildInfo和CL_PROGRAM_BUILD_STATUS返回CL_ERROR,看日志没发现错误:
fcl build 1 succeeded.\n fcl build 2 succeeded.\n bcl build succeeded.\n.
由于这是我的第一段复杂的内核代码,我知道它有很多错误。但是,这看起来不像是代码错误。为什么编译器会显示一些相互矛盾的信息?
这是我的代码:代码很长,我只发布可能与之相关的部分。 “...”表示跳过了某些内容。如果需要,请询问其余部分。绘制过程.ccp
#include <stdlib.h>
#include "Console.h"
#include "Renderer.h"
#include "Object.h"
#include "TertiaryArithmeticAlgorithms.h"
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/cl.hpp>
#else
#include <CL/cl.h>
#endif
#include "Camera.h"
cl_command_queue CommandQueue;
cl_mem BufIdx[8];
cl_kernel Rasterization;
bool Initialization()
{
ConWrite("======== OpenCL Initializing ========\n");
//
cl_platform_id ThePlatformID=NULL;
cl_uint NumPlatforms;
cl_int status;
if(CL_INVALID_VALUE==clGetPlatformIDs(NULL,NULL,&NumPlatforms))
{
ConWrite("ERROR: Fail to Get the Number of Available Items in Platform List! The Number of Available Items in Platform List Equal to 0 and Platform List is NULL OR Both Platform List and the Exact Number of Items in Platform List are NULL.\n");
ConWrite("=== OpenCL Initialization Failed! ===\n");
return 1;
}
else
{
ConWrite("The Number of Items in Platform List is ");
ConWrite(&NumPlatforms);
ConWrite(".\n");
}
//
cl_platform_id *PlatformList;
if(NumPlatforms>0)
{
PlatformList=(cl_platform_id*)malloc(NumPlatforms*sizeof(cl_platform_id));
if(CL_INVALID_VALUE==clGetPlatformIDs(NumPlatforms,PlatformList,NULL))
{
ConWrite("ERROR: Fail to Get the Platform List! The Number of Available Items in Platform List Equal to 0 and Platform List is NULL OR Both Platform List and the Exact Number of Items in Platform List are NULL.\n");
ConWrite("=== OpenCL Initialization Failed! ===\n");
return 1;
}
else
{
ConWrite("Platform List Obtained.\n");
}
}
else
{
ConWrite("ERROR: The Number of Available Items in Platform List is not Greater than 0!\n");
ConWrite("=== OpenCL Initialization Failed! ===\n");
return 1;
}
...
cl_program VertexProgram=clCreateProgramWithSource(Context,1,Cartography,NULL,NULL);
status=clBuildProgram(VertexProgram,LengthOfDevices/sizeof(cl_device_id*),DeviceList,NULL,NULL,NULL);
if(CL_SUCCESS==status)
{
ConWrite("CODE: CL_SUCCESS. OpenCL Program Built.\n");
}
else
{
switch(status)
{
case CL_INVALID_PROGRAM:
ConWrite("CODE: CL_INVALID_PROGRAM. ERROR: The Program is an Invalid Program Object!\n");
break;
case CL_INVALID_VALUE:
ConWrite("CODE: CL_INVALID_VALUE. ERROR: Device List is Unavailable and the Number of Devices is Greater Than Zero, OR Device List is NOT NULL and the Number of Devices is Zero, OR the Pointer to Notify is NULL But User Data is NOT NULL!\n");
break;
case CL_INVALID_DEVICE:
ConWrite("CODE: CL_INVALID_DEVICE. ERROR: OpenCL Devices listed in the Device List are NOT in the List of Devices Associated with the Program!\n");
break;
case CL_INVALID_BINARY:
ConWrite("CODE: CL_INVALID_BINARY. ERROR: The Program was Created with Binary and Devices Listed in the Device List do NOT Have a Valid Binary Program!\n");
break;
case CL_INVALID_BUILD_OPTIONS:
ConWrite("CODE: CL_INVALID_BUILD_OPTIONS. ERROR: The Build Options Specified by Options are Invalid!\n");
break;
case CL_INVALID_OPERATION:
ConWrite("CODE: CL_INVALID_OPERATION. ERROR: The Build of the Program Executable for Any of the Devices Listed in the Device List by a Previous Call to the Function for the Program has NOT Completed!\n");
break;
//case CL_COMPILER_NOT_AVAILABLE: if program is created with clCreateProgramWithSource and a compiler is not available i.e. CL_DEVICE_COMPILER_AVAILABLE specified in the table of OpenCL Device Queries for clGetDeviceInfo is set to CL_FALSE.
//case CL_BUILD_PROGRAM_FAILURE: if there is a failure to build the program executable. This error will be returned if clBuildProgram does not return until the build has completed.
//case CL_INVALID_OPERATION: if there are kernel objects attached to program.
//case CL_OUT_OF_HOST_MEMORY: if there is a failure to allocate resources required by the OpenCL implementation on the host.
}
}
cl_build_status *BudStat;
size_t StatusSize;
clGetProgramBuildInfo(VertexProgram,DeviceList[0],CL_PROGRAM_BUILD_STATUS,0,NULL,&StatusSize);
BudStat=(cl_build_status*)malloc(StatusSize);
clGetProgramBuildInfo(VertexProgram,DeviceList[0],CL_PROGRAM_BUILD_STATUS,StatusSize,BudStat,NULL);
switch (*BudStat)
{
case CL_BUILD_NONE:
ConWrite("CODE: CL_BUILD_NONE.\n");
break;
case CL_BUILD_ERROR:
ConWrite("CODE: CL_BUILD_ERROR.\n");
break;
case CL_BUILD_SUCCESS:
ConWrite("CODE: CL_BUILD_SUCCESS.\n");
break;
case CL_BUILD_IN_PROGRESS:
ConWrite("CODE: CL_BUILD_IN_PROGRESS.\n");
default:
break;
}
char *Log;
size_t LogSize;
status=clGetProgramBuildInfo(VertexProgram,DeviceList[0],CL_PROGRAM_BUILD_LOG,0,NULL,&LogSize);
if(status==CL_SUCCESS)
{
ConWrite("CODE: CL_SUCCESS. OpenCL Program Build Infomation Obtained.\n");
}
else
{
switch(status)
{
case CL_INVALID_DEVICE:
ConWrite("CODE: CL_INVALID_DEVICE. ERROR: The Device is NOT in the List of Devices Associated with the Program.\n");
break;
case CL_INVALID_VALUE:
ConWrite("CODE: CL_INVALID_VALUE. ERROR: The Parameter Name is Invalid, OR the Size in Bytes Specified by Parameter's Value Size is Less Than Size of Return Type and Parameter Value is NOT NULL.\n");
break;
case CL_INVALID_PROGRAM:
ConWrite("CODE: CL_INVALID_PROGRAM. ERROR: The Program is an Invalid Program Object.\n");
break;
}
}
Log=(char*)malloc(LogSize+1);
Log[LogSize]='0';
clGetProgramBuildInfo(VertexProgram,DeviceList[0],CL_PROGRAM_BUILD_LOG,LogSize+1,Log,NULL);
ConWrite(Log);
Rasterization=clCreateKernel(VertexProgram,"VertexRenderer",NULL);
...
这是我的内核:渲染器.h
#ifndef _1174_Renderer
#define _1174_Renderer
//------------------------------
const char *Cartography[]=
{
"#define COUNTER IdxVert\n",
"__kernel void VertexRenderer(",
"global float4 CamPos,", //X coordinate, Y coordinate, Z coordinate, SectorID
"global float4 CamAng,", //Horizontal Angle, Vertical Angle, Inclined Angle, Sight Angle
"global float4 CamNorV1,", //W represents horizontal resolution.
"global float4 CamNorV2,", //W represents vertical resolution.
"global float4 CamNorV3,", //W represents diagonal resolution.
"global float4 *Vertex,", //
"global uint IdxVert,",
"global uchar2 *ScrPos)\n", //
"{",
" half4 CpToV[COUNTER];", //CpToV.w is useless.
" int GID=(int)get_global_id(0);",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" CpToV[GID].xyz=Vertex[GID].xyz-CamPos.xyz;",
" half Distance[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" Distance[GID]=tan(acos((CamNorV1.x*CpToV[GID].x+CamNorV1.y*CpToV[GID].y+CamNorV1.z*CpToV[GID].z)*rsqrt(CamNorV1.x*CamNorV1.x+CamNorV1.y*CamNorV1.y+CamNorV1.z*CamNorV1.z)*rsqrt(CpToV[GID].x*CpToV[GID].x+CpToV[GID].y*CpToV[GID].y+CpToV[GID].z*CpToV[GID].z)))/tan(CamAng.w)*CamNorV3.w;",
" half Scale[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" Scale[GID]=(CamNorV1.x*CpToV[GID].x+CamNorV1.y*CpToV[GID].y+CamNorV1.z*CpToV[GID].z)/(CamNorV1.x*CamNorV1.x+CamNorV1.y*CamNorV1.y+CamNorV1.z*CamNorV1.z);",
" half4 MapVect[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" MapVect[GID].xyz=CpToV[GID].xyz-Scale*CamNorV1.xyz;",
" half Theta1[COUNTER];",
" half Theta2[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" Theta1[GID]=acos((CamNorV2.x*MapVect[GID].x+CamNorV2.y*MapVect[GID].y+CamNorV2.z*MapVect[GID].z)*rsqrt(CamNorV2.x*CamNorV2.x+CamNorV2.y*CamNorV2.y+CamNorV2.z*CamNorV2.z)*rsqrt(MapVect[GID].x*MapVect[GID].x+MapVect[GID].y*MapVect[GID].y+MapVect[GID].z*MapVect[GID].z));",
" Theta2[GID]=acos((CamNorV3.x*MapVect[GID].x+CamNorV3.y*MapVect[GID].y+CamNorV3.z*MapVect[GID].z)*rsqrt(CamNorV3.x*CamNorV3.x+CamNorV3.y*CamNorV3.y+CamNorV3.z*CamNorV3.z)*rsqrt(MapVect[GID].x*MapVect[GID].x+MapVect[GID].y*MapVect[GID].y+MapVect[GID].z*MapVect[GID].z));",
" half Theta[COUNTER];",
" constant half Pi=(half)3.1415926f;",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" (Theta1[GID]<=Pi/2)?(Theta[GID]=Theta2[GID]):(Theta[GID]=2*Pi-Theta2[GID]);",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" ScrPos[GID].x=(uchar)cos(Theta[GID])*Distance[GID]+CamNorV1.w;",
" ScrPos[GID].y=(uchar)sin(Theta[GID])*Distance[GID]+CamNorV2.w;",
"}"
"#define COUNTER Dlt\n",
"__kernel void Polarization(",
"global float4 *NmVect,",
"global float4 *AllVert,",
"global ushort4 *DltIdx,", //W represents the index of planar vectors of primarch.
"global uint Dlt)\n",
"{",
" int GID=(int)get_global_id(0);",
" half4 SPToCam[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" SPToCam[GID].xyz=CamPos.xyz-AllVert[DltIdx[GID].x].xyz;",
" half m[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" m[GID]=SPToCam[GID].x*NmVect[DltIdx[GID].w].x+SPToCam[GID].y*NmVect[DltIdx[GID].w].y+SPToCam[GID].z*NmVect[DltIdx[GID].w].z;",
" bool Polar[COUNTER];",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" (m>0)?(Polar=true):(Polar=false);",
" mem_fence(CLK_GLOBAL_MEM_FENCE);",
" ",
"}",
"__kernel void Hierarchization(",
"global ",
")\n",
"{",
" for(uint i=0;i<NumOfObj;i++){",
" for(uint k=0;k<NumOfLvInObj[IdxOfObj[i]];k++){",
" for(uint j=0;j<NumOfVtxInLv[k+IdxOfLv[IdxOfObj[i]]]-1;j++){",
" uint m=0;",
" (k==0)?():()",
" "
};
//------------------------------
#endif
不需要太在意内核。都错了……
还有我的硬件:我的桌面:
我的笔记本电脑:
另一个问题:当我在桌面上运行程序时,只能检测到Nvidia平台。 OpenCL 也可以在 CPU 上运行,不是吗?为什么检测不到Intel平台?
最佳答案
不过,我不确定 clCreateProgramWithSource
中的第二个参数看起来很奇怪:
cl_program VertexProgram=clCreateProgramWithSource(Context,1,Cartography,NULL,NULL);
它应该是你源代码中的一些行,所以我建议尝试
cl_program VertexProgram=clCreateProgramWithSource(Context,sizeof(Cartography)/sizeof(Cartography[0]),Cartography,NULL,NULL);
关于c++ - OpenCL 编译器异常情况,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32709799/
Intel、AMD 和 Khronos OpenCL 之间有什么区别。我对 OpenCL 完全陌生,想从它开始。我不知道在我的操作系统上安装哪个更好。 最佳答案 OpenCL 是 C 和 C++ 语言
我在这里的一篇文章中看到,我们可以从 OpenCL 内核调用函数。但是在我的情况下,我还需要并行化该复杂函数(由所有可用线程运行),所以我是否必须将该函数也设为内核并像从主内核中调用函数一样直接调
最近我看到一些开发板支持 OpenCL EP,例如 odroid XU。我知道的一件事是 OpenCL EP 适用于 ARM 处理器,但它与基于主要桌面的 OpenCL 在哪些特性上有所不同。 最佳答
我想知道在 OpenCL 中设置为内核函数的参数数量是否有任何限制。设置参数时出现 INVALID_ARG_INDEX 错误。我在内核函数中设置了 9 个参数。请在这方面帮助我。 最佳答案 您可以尝试
我对零拷贝的工作原理有点困惑。 1-要确认以下内容对应于opencl中的零拷贝。 ....................... . . . .
我是 OpenCL 的初学者,我很难理解某些东西。 我想改进主机和设备之间的图像传输。 我制定了一个计划以更好地了解我。 顶部:我现在拥有的 |底部:我想要的 HtD(主机到设备)和 DtH(设备到主
今天我又加了四个 __local变量到我的内核以转储中间结果。但是只需将另外四个变量添加到内核的签名并添加相应的内核参数就会将内核的所有输出呈现为“0”。没有一个 cl 函数返回错误代码。 我进一步尝
我知道工作项被分组到工作组中,并且您不能在工作组之外进行同步。 这是否意味着工作项是并行执行的? 如果是这样,使用 128 个工作项创建 1 个工作组是否可能/有效? 最佳答案 组内的工作项将一起安排
我相当确定经纱仅在 CUDA 中定义。但也许我错了。就 OpenCL 而言,什么是扭曲? 它与工作组不一样,是吗? 任何相关的反馈都受到高度赞赏。谢谢! 最佳答案 它没有在 OpenCL 标准中定义。
已结束。此问题正在寻求书籍、工具、软件库等的推荐。它不满足Stack Overflow guidelines 。目前不接受答案。 我们不允许提出寻求书籍、工具、软件库等推荐的问题。您可以编辑问题,以便
在OpenCL中,我的理解是可以使用barrier()函数来同步工作组中的线程。我(通常)确实了解它们的用途以及何时使用它们。我还知道工作组中的所有线程都必须遇到障碍,否则会出现问题。然而,到目前为止
我的主板上有 Nvidia 显卡 (GeForce GT 640)。我已经在我的盒子上安装了 OpenCL。当我使用“clGetPlatformInfo(参数)”查询平台时,我看到以下输出:-#可用平
我目前正在构建一个 ray marcher 来查看像 mandelbox 等东西。它工作得很好。但是,在我当前的程序中,它使用每个 worker 作为从眼睛转换的光线。这意味着每个 worker 有大
我编写了两个不同的 openCl 内核,使用 nvidia profiler 获取了有关它们的一些信息,发现两者每个工作项都使用 63 个寄存器。 我尝试了一切我能想到的方法来降低这个数字(用 ush
我的主板上有 Nvidia 显卡 (GeForce GT 640)。我已经在我的盒子上安装了 OpenCL。当我使用“clGetPlatformInfo(参数)”查询平台时,我看到以下输出:-#可用平
我目前正在构建一个 ray marcher 来查看像 mandelbox 等东西。它工作得很好。但是,在我当前的程序中,它使用每个 worker 作为从眼睛转换的光线。这意味着每个 worker 有大
我正在尝试使用 OpenCL 加速一些计算,算法的一部分包括矩阵求逆。是否有任何开源库或免费可用的代码来计算用 OpenCL 或 CUDA 编写的矩阵的 lu 分解(lapack dgetrf 和 d
我正在尝试在 OpenCL 内核中使用递归。编译成功,但运行时出现编译错误,所以我想知道,由于 CUDA 现在支持动态并行,OpenCL 是否支持动态并行? 最佳答案 OpenCL 不支持递归。请参阅
考虑以下代码,它从大小为 size 的 double 组创建缓冲区内存对象: coef_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM
OpenCL 中目标平台的示例是什么?例如,它是 Windows、Android、Mac 等操作系统,还是设备中的实际芯片? 最佳答案 OpenCL 平台本质上是一个 OpenCL 实现。它与操作系统
我是一名优秀的程序员,十分优秀!