c++ - 让 NVIDIA OpenCL 在 OpenCV 2.4.10 上运行-6ren

c++ - 让 NVIDIA OpenCL 在 OpenCV 2.4.10 上运行

转载作者：塔克拉玛干更新时间：2023-11-03 07:18:29

我正在尝试从 OpenCV 目录中获取示例 OpenCL 代码以使其工作。示例代码是“squares.cpp”:

// The "Square Detector" program.
// It loads several images sequentially and tries to find squares in
// each image

#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
#include <iostream>
#include <math.h>
#include <string.h>

using namespace cv;
using namespace std;

#define ACCURACY_CHECK

#ifdef ACCURACY_CHECK
// check if two vectors of vector of points are near or not
// prior assumption is that they are in correct order
static bool checkPoints(
    vector< vector<Point> > set1,
    vector< vector<Point> > set2,
    int maxDiff = 5)
{
    if(set1.size() != set2.size())
    {
        return false;
    }

    for(vector< vector<Point> >::iterator it1 = set1.begin(), it2 = set2.begin();
            it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++)
    {
        vector<Point> pts1 = *it1;
        vector<Point> pts2 = *it2;


        if(pts1.size() != pts2.size())
        {
            return false;
        }
        for(size_t i = 0; i < pts1.size(); i ++)
        {
            Point pt1 = pts1[i], pt2 = pts2[i];
            if(std::abs(pt1.x - pt2.x) > maxDiff ||
                    std::abs(pt1.y - pt2.y) > maxDiff)
            {
                return false;
            }
        }
    }
    return true;
}
#endif

int thresh = 50, N = 11;
const char* wndname = "OpenCL Square Detection Demo";


// helper function:
// finds a cosine of angle between vectors
// from pt0->pt1 and from pt0->pt2
static double angle( Point pt1, Point pt2, Point pt0 )
{
    double dx1 = pt1.x - pt0.x;
    double dy1 = pt1.y - pt0.y;
    double dx2 = pt2.x - pt0.x;
    double dy2 = pt2.y - pt0.y;
    return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
}


// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares( const Mat& image, vector<vector<Point> >& squares )
{
    squares.clear();
    Mat pyr, timg, gray0(image.size(), CV_8U), gray;

    // down-scale and upscale the image to filter out the noise
    pyrDown(image, pyr, Size(image.cols/2, image.rows/2));
    pyrUp(pyr, timg, image.size());
    vector<vector<Point> > contours;

    // find squares in every color plane of the image
    for( int c = 0; c < 3; c++ )
    {
        int ch[] = {c, 0};
        mixChannels(&timg, 1, &gray0, 1, ch, 1);

        // try several threshold levels
        for( int l = 0; l < N; l++ )
        {
            // hack: use Canny instead of zero threshold level.
            // Canny helps to catch squares with gradient shading
            if( l == 0 )
            {
                // apply Canny. Take the upper threshold from slider
                // and set the lower to 0 (which forces edges merging)
                Canny(gray0, gray, 0, thresh, 5);
                // dilate canny output to remove potential
                // holes between edge segments
                dilate(gray, gray, Mat(), Point(-1,-1));
            }
            else
            {
                // apply threshold if l!=0:
                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
                cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY);
            }

            // find contours and store them all as a list
            findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);

            vector<Point> approx;

            // test each contour
            for( size_t i = 0; i < contours.size(); i++ )
            {
                // approximate contour with accuracy proportional
                // to the contour perimeter
                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);

                // square contours should have 4 vertices after approximation
                // relatively large area (to filter out noisy contours)
                // and be convex.
                // Note: absolute value of an area is used because
                // area may be positive or negative - in accordance with the
                // contour orientation
                if( approx.size() == 4 &&
                        fabs(contourArea(Mat(approx))) > 1000 &&
                        isContourConvex(Mat(approx)) )
                {
                    double maxCosine = 0;

                    for( int j = 2; j < 5; j++ )
                    {
                        // find the maximum cosine of the angle between joint edges
                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
                        maxCosine = MAX(maxCosine, cosine);
                    }

                    // if cosines of all angles are small
                    // (all angles are ~90 degree) then write quandrange
                    // vertices to resultant sequence
                    if( maxCosine < 0.3 )
                        squares.push_back(approx);
                }
            }
        }
    }
}


// returns sequence of squares detected on the image.
// the sequence is stored in the specified memory storage
static void findSquares_ocl( const Mat& image, vector<vector<Point> >& squares )
{
    squares.clear();

    Mat gray;
    cv::ocl::oclMat pyr_ocl, timg_ocl, gray0_ocl, gray_ocl;

    // down-scale and upscale the image to filter out the noise
    ocl::pyrDown(ocl::oclMat(image), pyr_ocl);
    ocl::pyrUp(pyr_ocl, timg_ocl);

    vector<vector<Point> > contours;
    vector<cv::ocl::oclMat> gray0s;
    ocl::split(timg_ocl, gray0s); // split 3 channels into a vector of oclMat
    // find squares in every color plane of the image
    for( int c = 0; c < 3; c++ )
    {
        gray0_ocl = gray0s[c];
        // try several threshold levels
        for( int l = 0; l < N; l++ )
        {
            // hack: use Canny instead of zero threshold level.
            // Canny helps to catch squares with gradient shading
            if( l == 0 )
            {
                // do canny on OpenCL device
                // apply Canny. Take the upper threshold from slider
                // and set the lower to 0 (which forces edges merging)
                cv::ocl::Canny(gray0_ocl, gray_ocl, 0, thresh, 5);
                // dilate canny output to remove potential
                // holes between edge segments
                ocl::dilate(gray_ocl, gray_ocl, Mat(), Point(-1,-1));
                gray = Mat(gray_ocl);
            }
            else
            {
                // apply threshold if l!=0:
                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
                cv::ocl::threshold(gray0_ocl, gray_ocl, (l+1)*255/N, 255, THRESH_BINARY);
                gray = gray_ocl;
            }

            // find contours and store them all as a list
            findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);

            vector<Point> approx;
            // test each contour
            for( size_t i = 0; i < contours.size(); i++ )
            {
                // approximate contour with accuracy proportional
                // to the contour perimeter
                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);

                // square contours should have 4 vertices after approximation
                // relatively large area (to filter out noisy contours)
                // and be convex.
                // Note: absolute value of an area is used because
                // area may be positive or negative - in accordance with the
                // contour orientation
                if( approx.size() == 4 &&
                        fabs(contourArea(Mat(approx))) > 1000 &&
                        isContourConvex(Mat(approx)) )
                {
                    double maxCosine = 0;
                    for( int j = 2; j < 5; j++ )
                    {
                        // find the maximum cosine of the angle between joint edges
                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
                        maxCosine = MAX(maxCosine, cosine);
                    }

                    // if cosines of all angles are small
                    // (all angles are ~90 degree) then write quandrange
                    // vertices to resultant sequence
                    if( maxCosine < 0.3 )
                        squares.push_back(approx);
                }
            }
        }
    }
}


// the function draws all the squares in the image
static void drawSquares( Mat& image, const vector<vector<Point> >& squares )
{
    for( size_t i = 0; i < squares.size(); i++ )
    {
        const Point* p = &squares[i][0];
        int n = (int)squares[i].size();
        polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA);
    }
}


// draw both pure-C++ and ocl square results onto a single image
static Mat drawSquaresBoth( const Mat& image,
                            const vector<vector<Point> >& sqsCPP,
                            const vector<vector<Point> >& sqsOCL
)
{
    Mat imgToShow(Size(image.cols * 2, image.rows), image.type());
    Mat lImg = imgToShow(Rect(Point(0, 0), image.size()));
    Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size()));
    image.copyTo(lImg);
    image.copyTo(rImg);
    drawSquares(lImg, sqsCPP);
    drawSquares(rImg, sqsOCL);
    float fontScale = 0.8f;
    Scalar white = Scalar::all(255), black = Scalar::all(0);

    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2);
    putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);
    putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1);

    return imgToShow;
}


int main(int argc, char** argv)
{
    const char* keys =
        "{ i | input   |                    | specify input image }"
        "{ o | output  | squares_output.jpg | specify output save path}"
        "{ h | help    | false              | print help message }";
    CommandLineParser cmd(argc, argv, keys);
    string inputName = cmd.get<string>("i");
    string outfile = cmd.get<string>("o");

    if(cmd.get<bool>("help"))
    {
        cout << "Usage : squares [options]" << endl;
        cout << "Available options:" << endl;
        cmd.printParams();
        return EXIT_SUCCESS;
    }

    int iterations = 10;
    namedWindow( wndname, CV_WINDOW_AUTOSIZE );
    vector<vector<Point> > squares_cpu, squares_ocl;

    Mat image = imread(inputName, 1);
    if( image.empty() )
    {
        cout << "Couldn't load " << inputName << endl;
        return EXIT_FAILURE;
    }

    int j = iterations;
    int64 t_ocl = 0, t_cpp = 0;
    //warm-ups
    cout << "warming up ..." << endl;
    findSquares(image, squares_cpu);
    findSquares_ocl(image, squares_ocl);


#ifdef ACCURACY_CHECK
    cout << "Checking ocl accuracy ... " << endl;
    cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl;
#endif
    do
    {
        int64 t_start = cv::getTickCount();
        findSquares(image, squares_cpu);
        t_cpp += cv::getTickCount() - t_start;


        t_start  = cv::getTickCount();
        findSquares_ocl(image, squares_ocl);
        t_ocl += cv::getTickCount() - t_start;
        cout << "run loop: " << j << endl;
    }
    while(--j);
    cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl;
    cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl;

    Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl);
    imshow(wndname, result);
    imwrite(outfile, result);
    cvWaitKey(0);

    return EXIT_SUCCESS;
}

我已经安装了cuda框架；但是当我尝试在 visual studio 2013 中运行代码时出现以下错误:

warming up ...
OpenCV Error: Gpu API call (CL_INVALID_VALUE) in cv::ocl::ContextImpl::ContextIm
pl, file C:\builds\2_4_PackSlave-win64-vc12-shared\opencv\modules\ocl\src\cl_con
text.cpp, line 578
ERROR: Can't select OpenCL device: GeForce GTX 650 Ti BOOST(NVIDIA CUDA)
ERROR: Required OpenCL device not found, check configuration:
    Platform: any
    Device types: GPU CPU
    Device name: any
OpenCV Error: Unknown error code -221 (Can't select OpenCL device) in cv::ocl::C
ontextImpl::getContext, file C:\builds\2_4_PackSlave-win64-vc12-shared\opencv\mo
dules\ocl\src\cl_context.cpp, line 684

更新:这是 CLinfo 的输出:

Number of platforms:    1
        CL_PLATFORM_PROFILE:    FULL_PROFILE
        CL_PLATFORM_VERSION:    OpenCL 1.2 CUDA 7.0.0
        CL_PLATFORM_VENDOR:     NVIDIA Corporation
        CL_PLATFORM_EXTENSIONS: cl_khr_byte_addressable_store cl_khr_icd cl_khr_
gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unro
ll cl_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_shari
ng cl_nv_copy_opts
        Number of devices:      1
                CL_DEVICE_TYPE: CL_DEVICE_TYPE_GPU
                CL_DEVICE_VENDOR_ID:    4318
                CL_DEVICE_MAX_COMPUTE_UNITS:    4
                CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:     3
        CL_DEVICE_MAX_WORK_ITEM_SIZES:  1024 1024 64
                CL_DEVICE_MAX_WORK_GROUP_SIZE:  1024
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:  1
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: 1
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:   1
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:  1
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: 1
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:        1
                CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:  0
                CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:     1
                CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:    1
                CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:      1
                CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG:     1
                CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT:    1
                CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:   1
                CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:     0
                CL_DEVICE_MAX_CLOCK_FREQUENCY:  1097
                CL_DEVICE_ADDRESS_BITS: 64
                CL_DEVICE_MAX_MEM_ALLOC_SIZE:   536870912
                CL_DEVICE_IMAGE_SUPPORT:        1
                CL_DEVICE_MAX_READ_IMAGE_ARGS:  256
                CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 16
                CL_DEVICE_IMAGE2D_MAX_WIDTH:    16384
                CL_DEVICE_IMAGE2D_MAX_WIDTH:    16384
                CL_DEVICE_IMAGE2D_MAX_HEIGHT:   16384
                CL_DEVICE_IMAGE3D_MAX_WIDTH:    4096
                CL_DEVICE_IMAGE3D_MAX_HEIGHT:   4096
                CL_DEVICE_IMAGE3D_MAX_DEPTH:    4096
                CL_DEVICE_MAX_SAMPLERS: 32
                CL_DEVICE_MAX_PARAMETER_SIZE:   4352
                CL_DEVICE_MEM_BASE_ADDR_ALIGN:  4096
                CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:     128
                CL_DEVICE_SINGLE_FP_CONFIG:     CL_FP_DENORM | CL_FP_INF_NAN | C
L_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA
                CL_DEVICE_SINGLE_FP_CONFIG:     CL_READ_ONLY_CACHE | CL_READ_WRI
TE_CACHE
                CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:        CL_READ_WRITE_CACHE
                CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE:    128
                CL_DEVICE_GLOBAL_MEM_CACHE_SIZE:        65536
                CL_DEVICE_GLOBAL_MEM_SIZE:      2147483648
                CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:     65536
                CL_DEVICE_MAX_CONSTANT_ARGS:    9
                CL_DEVICE_LOCAL_MEM_TYPE:
                CL_DEVICE_LOCAL_MEM_SIZE:       49151
                CL_DEVICE_ERROR_CORRECTION_SUPPORT:     0
                CL_DEVICE_HOST_UNIFIED_MEMORY:  0
                CL_DEVICE_PROFILING_TIMER_RESOLUTION:   1000
                CL_DEVICE_ENDIAN_LITTLE:        1
                CL_DEVICE_AVAILABLE:    1
                CL_DEVICE_COMPILER_AVAILABLE:   1
                CL_DEVICE_EXECUTION_CAPABILITIES:       CL_EXEC_KERNEL
                CL_DEVICE_QUEUE_PROPERTIES:     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_
ENABLE | CL_QUEUE_PROFILING_ENABLE
                CL_DEVICE_PLATFORM:     000000DA609B27F0
        CL_DEVICE_NAME: GeForce GTX 650 Ti BOOST
        CL_DEVICE_VENDOR:       NVIDIA Corporation
        CL_DRIVER_VERSION:      350.12
        CL_DEVICE_PROFILE:      FULL_PROFILE
        CL_DEVICE_VERSION:      OpenCL 1.2 CUDA
        CL_DEVICE_OPENCL_C_VERSION:     OpenCL C 1.2
        CL_DEVICE_EXTENSIONS:   cl_khr_byte_addressable_store cl_khr_icd cl_khr_
gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unro
ll cl_nv_d3d9_sharing cl_nv_d3d10_sharing cl_khr_d3d10_sharing cl_nv_d3d11_shari
ng cl_nv_copy_opts  cl_khr_global_int32_base_atomics cl_khr_global_int32_extende
d_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl
_khr_fp64

OPENCV_OPENCL_DEVICE 的设置应该是什么？？我试过:GPU:NVIDIA , :GPU:GeForce GTX 650 Ti BOOST , :GPU:GeForce GTX 650 Ti BOOST<NVIDIA CUDA>并且错误仍然存在!

更新 2:与设置 :GPU:0和 :GPU:1我分别得到以下错误:

warming up ...
OpenCV Error: Gpu API call (CL_INVALID_VALUE) in cv::ocl::ContextImpl::ContextIm
pl, file C:\builds\2_4_PackSlave-win64-vc12-shared\opencv\modules\ocl\src\cl_con
text.cpp, line 578
ERROR: Can't select OpenCL device: GeForce GTX 650 Ti BOOST(NVIDIA CUDA)
ERROR: Required OpenCL device not found, check configuration: :GPU:0
    Platform: any
    Device types: GPU
    Device name: 0
OpenCV Error: Unknown error code -221 (Can't select OpenCL device) in cv::ocl::C
ontextImpl::getContext, file C:\builds\2_4_PackSlave-win64-vc12-shared\opencv\mo
dules\ocl\src\cl_context.cpp, line 684

和 :GPU:1 (这是一个有点不同的错误，长度方向):

warming up ...
ERROR: Required OpenCL device not found, check configuration: :GPU:1
    Platform: any
    Device types: GPU
    Device name: 1
OpenCV Error: Unknown error code -221 (Can't select OpenCL device) in cv::ocl::C
ontextImpl::getContext, file C:\builds\2_4_PackSlave-win64-vc12-shared\opencv\mo
dules\ocl\src\cl_context.cpp, line 684

我还截取了环境设置的屏幕截图，以仔细检查我是否正确设置了这些值:

enter image description here

最佳答案

您似乎无法打开设备，因为可能找不到名称，或者只是无法与 OpenCV OpenCL 配置解析器的工作方式进行字符串匹配。尝试更简单的方法。

Link to doc on setting OPENCV_OPENCL_DEVICE :

如果失败，请仔细检查 clinfo 是否正在接收您的设备。

关于c++ - 让 NVIDIA OpenCL 在 OpenCV 2.4.10 上运行，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/30010859/

文章推荐： c++ - 临界区段错误 - 避免死锁

文章推荐： c++ - boost 日志到文件不起作用

文章推荐： c++ - 将页面加载到 QWebView 时覆盖

opencl - 英特尔 OpenCL 与。 Khronos OpenCL
Intel、AMD 和 Khronos OpenCL 之间有什么区别。我对 OpenCL 完全陌生，想从它开始。我不知道在我的操作系统上安装哪个更好。最佳答案 OpenCL 是 C 和 C++ 语言
opencl - 从另一个 OpenCL 内核调用 OpenCL 内核
我在这里的一篇文章中看到，我们可以从 OpenCL 内核调用函数。但是在我的情况下，我还需要并行化该复杂函数(由所有可用线程运行)，所以我是否必须将该函数也设为内核并像从主内核中调用函数一样直接调
opencl - OpenCL 和 OpenCL Embedded 配置文件之间的主要区别
最近我看到一些开发板支持 OpenCL EP，例如 odroid XU。我知道的一件事是 OpenCL EP 适用于 ARM 处理器，但它与基于主要桌面的 OpenCL 在哪些特性上有所不同。最佳答
opencl - OpenCL 中内核参数数量的限制
我想知道在 OpenCL 中设置为内核函数的参数数量是否有任何限制。设置参数时出现 INVALID_ARG_INDEX 错误。我在内核函数中设置了 9 个参数。请在这方面帮助我。最佳答案您可以尝试
opencl - OpenCL 中零拷贝的访问路径
我对零拷贝的工作原理有点困惑。 1-要确认以下内容对应于opencl中的零拷贝。 ....................... . . . .
opencl - OpenCL 中的重叠传输和设备计算
我是 OpenCL 的初学者，我很难理解某些东西。我想改进主机和设备之间的图像传输。我制定了一个计划以更好地了解我。顶部:我现在拥有的 |底部:我想要的 HtD(主机到设备)和 DtH(设备到主
opencl - OpenCL 本地内存有限制吗？
今天我又加了四个 __local变量到我的内核以转储中间结果。但是只需将另外四个变量添加到内核的签名并添加相应的内核参数就会将内核的所有输出呈现为“0”。没有一个 cl 函数返回错误代码。我进一步尝
opencl - OpenCL 工作项是否并行执行？
我知道工作项被分组到工作组中，并且您不能在工作组之外进行同步。这是否意味着工作项是并行执行的？如果是这样，使用 128 个工作项创建 1 个工作组是否可能/有效？最佳答案组内的工作项将一起安排
opencl - OpenCL 上下文中的扭曲是什么？
我相当确定经纱仅在 CUDA 中定义。但也许我错了。就 OpenCL 而言，什么是扭曲？它与工作组不一样，是吗？任何相关的反馈都受到高度赞赏。谢谢! 最佳答案它没有在 OpenCL 标准中定义。
opencl - OpenCL 调试器
已结束。此问题正在寻求书籍、工具、软件库等的推荐。它不满足Stack Overflow guidelines 。目前不接受答案。我们不允许提出寻求书籍、工具、软件库等推荐的问题。您可以编辑问题，以便
opencl - OpenCL 中的障碍
在OpenCL中，我的理解是可以使用barrier()函数来同步工作组中的线程。我(通常)确实了解它们的用途以及何时使用它们。我还知道工作组中的所有线程都必须遇到障碍，否则会出现问题。然而，到目前为止
opencl - OpenCL 中的平台
我的主板上有 Nvidia 显卡 (GeForce GT 640)。我已经在我的盒子上安装了 OpenCL。当我使用“clGetPlatformInfo(参数)”查询平台时，我看到以下输出:-#可用平
opencl - OpenCL 内核执行时间过长导致崩溃
我目前正在构建一个 ray marcher 来查看像 mandelbox 等东西。它工作得很好。但是，在我当前的程序中，它使用每个 worker 作为从眼睛转换的光线。这意味着每个 worker 有大
opencl - OpenCl 寄存器的神奇数字
我编写了两个不同的 openCl 内核，使用 nvidia profiler 获取了有关它们的一些信息，发现两者每个工作项都使用 63 个寄存器。我尝试了一切我能想到的方法来降低这个数字(用 ush
opencl - OpenCL 中的平台
我的主板上有 Nvidia 显卡 (GeForce GT 640)。我已经在我的盒子上安装了 OpenCL。当我使用“clGetPlatformInfo(参数)”查询平台时，我看到以下输出:-#可用平
opencl - OpenCL 内核执行时间过长导致崩溃
我目前正在构建一个 ray marcher 来查看像 mandelbox 等东西。它工作得很好。但是，在我当前的程序中，它使用每个 worker 作为从眼睛转换的光线。这意味着每个 worker 有大
opencl - OpenCL 中的矩阵求逆
我正在尝试使用 OpenCL 加速一些计算，算法的一部分包括矩阵求逆。是否有任何开源库或免费可用的代码来计算用 OpenCL 或 CUDA 编写的矩阵的 lu 分解(lapack dgetrf 和 d
opencl - OpenCL 支持动态并行性...？
我正在尝试在 OpenCL 内核中使用递归。编译成功，但运行时出现编译错误，所以我想知道，由于 CUDA 现在支持动态并行，OpenCL 是否支持动态并行？最佳答案 OpenCL 不支持递归。请参阅
opencl - OpenCL 中主机和设备之间的内存传输？
考虑以下代码，它从大小为 size 的 double 组创建缓冲区内存对象: coef_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM
opencl - OpenCL 中目标平台的示例是什么？
OpenCL 中目标平台的示例是什么？例如，它是 Windows、Android、Mac 等操作系统，还是设备中的实际芯片？最佳答案 OpenCL 平台本质上是一个 OpenCL 实现。它与操作系统

塔克拉玛干

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - 让 NVIDIA OpenCL 在 OpenCV 2.4.10 上运行