c++ - GLSL 计算着色器不适用于大输入-6ren

c++ - GLSL 计算着色器不适用于大输入

转载作者：塔克拉玛干更新时间：2023-11-03 07:02:23

着色器采用具有位置、方向、波长和强度的光子 SSBO，每个线程负责准确地跟踪一个光子通过网格，在每个网格单元中，光子命中，强度为每个波长累积为每个网格单元创建光谱分布。

问题是着色器对 100,000 个光子完美运行，但不会返回 1,000,000 个光子的结果。

我查看了 SSBO 的大小，发现所有大小都在我的 GPU (NVIDIA Quadro P6000) 2GB 限制范围内:

SSBO 网格大小:1.5GB
SSBO 光子大小:0.02GB

如果我更改某些地方的逻辑，它可以处理一百万个光子(请参阅第 87 行和第 114 行的评论)。

我目前无法解释为什么着色器对 1,000,000 个光子失败，但对 100,000 个光子有效。逻辑相同，缓冲区大小在限制范围内。 (缓冲区大小不会成为问题也被它在更改逻辑时起作用所证实。)

下面是源代码。如果你想自己尝试，这里是 github 上的代码:https://github.com/TheJhonny007/TextureTracerDebug

计算着色器:

#version 430

#extension GL_EXT_compute_shader: enable
#extension GL_EXT_shader_storage_buffer_object: enable
#extension GL_ARB_compute_variable_group_size: enable

const uint TEX_WIDTH = 1024u;
const uint TEX_HEIGHT = TEX_WIDTH;

const uint MIN_WAVELENGTH = 380u;
const uint MAX_WAVELENGTH = 740u;
const uint NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;

// Size: 24 bytes -> ~40,000,000 photons per available gigabyte of ram
struct Photon {
    vec2 position;// m
    vec2 direction;// normalized
    uint wavelength;// nm
    float intensity;// 0..1 should start at 1
};

layout(std430, binding = 0) buffer Photons {
    Photon photons[];
};

// Size: 1440 bytes -> ~700,000 pixels per available gigabyte of ram
struct Pixel {
    uint intensityAtWavelengths[NUM_WAVELENGTHS];// [0..1000]
};

layout(std430, binding = 1) buffer Pixels {
//Pixel pixels[TEX_WIDTH][TEX_HEIGHT];
// NVIDIAs linker takes ages to link if the sizes are specified :(
    Pixel[] pixels;
};

uniform float xAxisScalingFactor;

vec2 getHorizontalRectangleAt(int i) {
    float x = pow(float(i), xAxisScalingFactor);
    float w = pow(float(i + 1), xAxisScalingFactor);
    return vec2(x, w);
}

uniform float rectangleHeight;

struct Rectangle {
    float x;
    float y;
    float w;
    float h;
};

layout (local_size_variable) in;

void addToPixel(uvec2 idx, uint wavelength, uint intensity) {
    if (idx.x >= 0u && idx.x < TEX_WIDTH && idx.y >= 0u && idx.y < TEX_HEIGHT) {
        uint index = (idx.y * TEX_WIDTH) + idx.x;
        atomicAdd(pixels[index].intensityAtWavelengths[wavelength - MIN_WAVELENGTH], intensity);
    }
}

/// Returns the rectangle at the given indices.
Rectangle getRectangleAt(ivec2 indices) {
    vec2 horRect = getHorizontalRectangleAt(indices.x);
    return Rectangle(horRect.x, rectangleHeight * float(indices.y), horRect.y, rectangleHeight);
}

uniform float shadowLength;
uniform float shadowHeight;

/// Returns the indices of the rectangle at the given location
ivec2 getRectangleIdxAt(vec2 location) {
    int x = 0;
    int y = int(location.y / rectangleHeight);
    return ivec2(x, y);
}

float getRayIntersectAtX(Photon ray, float x) {
    float slope = ray.direction.y / ray.direction.x;
    return slope * (x - ray.position.x) + ray.position.y;
}

ivec2 getRayRectangleExitEdge(Photon ray, Rectangle rect) {
    float intersectHeight = getRayIntersectAtX(ray, rect.x + rect.w);

    // IF ONE OF THE FIRST TWO CONDITIONS GETS REMOVED IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
    if (intersectHeight < rect.y) {
        return ivec2(0, -1);
    } else if (intersectHeight > rect.y + rect.h) {
        return ivec2(0, 1);
    } else {
        return ivec2(1, 0);
    }
}

void main() {
    uint gid = gl_GlobalInvocationID.x;
    if (gid >= photons.length()) return;

    Photon photon = photons[gid];

    ivec2 photonTexIndices = getRectangleIdxAt(photon.position);
    while (photonTexIndices.x < TEX_WIDTH && photonTexIndices.y < TEX_HEIGHT &&
    photonTexIndices.x >= 0        && photonTexIndices.y >= 0) {
        // need to convert to uint for atomic add operations...
        addToPixel(uvec2(photonTexIndices), photon.wavelength, uint(photon.intensity * 100.0));

        ivec2 dir = getRayRectangleExitEdge(photon, getRectangleAt(photonTexIndices));
        photonTexIndices += dir;

        // When the ray goes out of bounds on the bottom then mirror it to simulate rays coming from
        // the other side of the planet. This works because of the rotational symmetry of the system.
        // IF COMMENTET OUT IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
        if (photonTexIndices.y < 0) {
            photonTexIndices.y = 0;
            photon.position.y *= -1.0;
            photon.direction.y *= -1.0;
        }
    }
}

示踪剂.hpp

#ifndef TEXTURE_TRACER_HPP
#define TEXTURE_TRACER_HPP

#include <glm/glm.hpp>
#include <random>

namespace gpu {

    // 6 * 4 = 24 Bytes
    struct Photon {
        glm::vec2 position;  // m
        glm::vec2 direction; // normalized
        uint32_t waveLength; // nm
        float intensity;     // 0..1 should start at 1
    };

    class TextureTracer {
    public:
        TextureTracer();
        uint32_t createShadowMap(size_t numPhotons);

    private:
        void initTextureTracer();
        void traceThroughTexture(uint32_t ssboPhotons, size_t numPhotons);
        Photon emitPhoton();
        std::vector<Photon> generatePhotons(uint32_t count);

        struct {
            uint32_t uRectangleHeight;
            uint32_t uShadowLength;
            uint32_t uShadowHeight;
            uint32_t uXAxisScalingFactor;
        } mTextureTracerUniforms;

        uint32_t mTextureTracerProgram;

        std::mt19937_64 mRNG;
        std::uniform_real_distribution<> mDistributionSun;
        std::uniform_int_distribution<uint32_t> mDistributionWavelength;
        std::bernoulli_distribution mDistributionBoolean;
    };

} // namespace gpu

#endif // TEXTURE_TRACER_HPP

示踪剂.cpp

#include "TextureTracer.hpp"
#include <GL/glew.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <random>
#include <string>
#include <vector>

void GLAPIENTRY MessageCallback(GLenum source, GLenum type, GLuint id,
                                GLenum severity, GLsizei length,
                                const GLchar *message, const void *userParam) {
  if (type == GL_DEBUG_TYPE_ERROR)
    fprintf(stderr, "GL ERROR: type = 0x%x, severity = 0x%x, message = %s\n",
            type, severity, message);
  else
    fprintf(stdout, "GL INFO: type = 0x%x, severity = 0x%x, message = %s\n",
            type, severity, message);
}

namespace gpu {
const double TEX_HEIGHT_TO_RADIUS_FACTOR = 4;
const double TEX_SHADOW_LENGTH_FACTOR = 8;

const uint32_t TEX_WIDTH = 1024u;
const uint32_t TEX_HEIGHT = TEX_WIDTH;

const double RADIUS = 6'371'000.0;
const double RADIUS_FACTORED = RADIUS * TEX_HEIGHT_TO_RADIUS_FACTOR;

const double SUN_RADIUS = 695'510'000.0;
const double DIST_TO_SUN = 149'600'000'000.0;
const double ATMO_HEIGHT = 42'000.0;

std::string loadShader(const std::string &fileName) {
  std::ifstream shaderFileStream(fileName, std::ios::in);
  if (!shaderFileStream.is_open()) {
    std::cerr << "Could not load the GLSL shader from '" << fileName << "'!"
              << std::endl;
    exit(-1);
  }

  std::string shaderCode;
  while (!shaderFileStream.eof()) {
    std::string line;
    std::getline(shaderFileStream, line);
    shaderCode.append(line + "\n");
  }

  return shaderCode;
}

void TextureTracer::initTextureTracer() {
  mTextureTracerProgram = glCreateProgram();
  uint32_t rayTracingComputeShader = glCreateShader(GL_COMPUTE_SHADER);

  std::string code = loadShader("../resources/TextureTracer.glsl");
  const char *shader = code.c_str();
  glShaderSource(rayTracingComputeShader, 1, &shader, nullptr);
  glCompileShader(rayTracingComputeShader);

  glAttachShader(mTextureTracerProgram, rayTracingComputeShader);
  glLinkProgram(mTextureTracerProgram);

  mTextureTracerUniforms.uRectangleHeight =
      glGetUniformLocation(mTextureTracerProgram, "rectangleHeight");
  mTextureTracerUniforms.uShadowHeight =
      glGetUniformLocation(mTextureTracerProgram, "shadowHeight");
  mTextureTracerUniforms.uShadowLength =
      glGetUniformLocation(mTextureTracerProgram, "shadowLength");
  mTextureTracerUniforms.uXAxisScalingFactor =
      glGetUniformLocation(mTextureTracerProgram, "xAxisScalingFactor");

  glDetachShader(mTextureTracerProgram, rayTracingComputeShader);
  glDeleteShader(rayTracingComputeShader);
}

TextureTracer::TextureTracer()
    : mRNG(1L), mDistributionSun(
                    std::uniform_real_distribution<>(-SUN_RADIUS, SUN_RADIUS)),
      mDistributionWavelength(
          std::uniform_int_distribution<uint32_t>(380, 739)),
      mDistributionBoolean(std::bernoulli_distribution(0.5)) {
  glEnable(GL_DEBUG_OUTPUT);
  glDebugMessageCallback(MessageCallback, nullptr);

  initTextureTracer();
}

double raySphereDistance(glm::dvec2 origin, glm::dvec2 direction,
                         glm::dvec2 center, double radius) {
  glm::dvec2 m = origin - center;
  double b = glm::dot(m, direction);
  double c = glm::dot(m, m) - (radius * radius);
  if (c > 0.0 && b > 0.0)
    return -1.0;

  double discr = b * b - c;

  // A negative discriminant corresponds to ray missing sphere
  if (discr < 0.0)
    return -1.0;

  // Ray now found to intersect sphere, compute smallest t value of intersection
  return glm::max(0.0, -b - glm::sqrt(discr));
}

Photon TextureTracer::emitPhoton() {
  std::uniform_real_distribution<> distributionEarth(0.0, ATMO_HEIGHT);
  glm::dvec2 target = {0.0, RADIUS + distributionEarth(mRNG)};

  double d;
  do {
    d = glm::length(glm::dvec2(mDistributionSun(mRNG), mDistributionSun(mRNG)));
  } while (d > SUN_RADIUS);

  glm::dvec2 startPosition =
      glm::dvec2(-DIST_TO_SUN, mDistributionBoolean(mRNG) ? d : -d);
  glm::dvec2 direction = glm::normalize(target - startPosition);

  startPosition +=
      direction * raySphereDistance(startPosition, direction, {0.0, 0.0},
                                    RADIUS + ATMO_HEIGHT);

  return {glm::vec2(0.0, startPosition.y), glm::vec2(direction),
          mDistributionWavelength(mRNG), 1.0f};
}

std::vector<Photon> TextureTracer::generatePhotons(uint32_t count) {
  std::vector<Photon> photons(count);
  std::generate(photons.begin(), photons.end(),
                [this]() { return emitPhoton(); });
  return photons;
}

void TextureTracer::traceThroughTexture(uint32_t ssboPhotons,
                                        size_t numPhotons) {
  glUseProgram(mTextureTracerProgram);

  glUniform1f(mTextureTracerUniforms.uRectangleHeight,
              RADIUS_FACTORED / TEX_HEIGHT);

  const double shadowLength =
      TEX_SHADOW_LENGTH_FACTOR * (DIST_TO_SUN * RADIUS) / (SUN_RADIUS - RADIUS);

  glUniform1f(mTextureTracerUniforms.uShadowLength, shadowLength);
  glUniform1f(mTextureTracerUniforms.uShadowHeight, RADIUS_FACTORED);

  const double xAxisScalingFactor =
      glm::log(shadowLength) / glm::log(static_cast<double>(TEX_WIDTH));

  glUniform1f(mTextureTracerUniforms.uXAxisScalingFactor,
              static_cast<float>(xAxisScalingFactor));

  const uint32_t MIN_WAVELENGTH = 380u;
  const uint32_t MAX_WAVELENGTH = 740u;
  const uint32_t NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;

  size_t pixelBufferSize =
      TEX_WIDTH * TEX_HEIGHT * NUM_WAVELENGTHS * sizeof(uint32_t);
  uint32_t ssboPixels;
  glGenBuffers(1, &ssboPixels);
  glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
  glBufferData(GL_SHADER_STORAGE_BUFFER, pixelBufferSize, nullptr,
               GL_DYNAMIC_COPY);

  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssboPhotons);
  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ssboPixels);

  const uint32_t numThreads = 32u;
  const uint32_t numBlocks = numPhotons / numThreads;
  std::cout << "numBlocks: " << numBlocks << std::endl;

  glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

  struct Pixel {
    uint32_t intensityAtWavelengths[NUM_WAVELENGTHS];
  };

  std::vector<Pixel> pixels(TEX_WIDTH * TEX_HEIGHT);

  glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
  glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, pixelBufferSize,
                     pixels.data());
  glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);

  for (int y = 0; y < TEX_HEIGHT; ++y) {
    printf("%4i | ", y);
    for (int x = 0; x < TEX_WIDTH; ++x) {
      Pixel p = pixels[y * TEX_WIDTH + x];
      int counter = 0;
      for (uint32_t i : p.intensityAtWavelengths) {
        counter += i;
      }

      if (counter == 0) {
        printf("  ");
      } else if (counter > 100'000'000) {
        printf("%4s", "\u25A0");
      } else if (counter > 10'000'000) {
        printf("%4s", "\u25A3");
      } else if (counter > 1'000'000) {
        printf("%4s", "\u25A6");
      } else if (counter > 100'000) {
        printf("%4s", "\u25A4");
      } else {
        printf("%4s", "\u25A1");
      }
    }

    std::cout << std::endl;
  }

  glDeleteBuffers(1, &ssboPixels);

  glUseProgram(0);
}

uint32_t TextureTracer::createShadowMap(size_t numPhotons) {
  std::vector<Photon> photons = generatePhotons(numPhotons);

  uint32_t ssboPhotons;
  glGenBuffers(1, &ssboPhotons);
  glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPhotons);
  glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(Photon) * photons.size(),
               photons.data(), GL_DYNAMIC_COPY);

  traceThroughTexture(ssboPhotons, photons.size());

  glDeleteBuffers(1, &ssboPhotons);
  glDeleteProgram(mTextureTracerProgram);

  glDisable(GL_DEBUG_OUTPUT);
  glDebugMessageCallback(nullptr, nullptr);

  return 0;
}
}

主要.cpp

#include <GL/glew.h>
#include <GL/glut.h>

#include "TextureTracer.hpp"

int main(int argc, char *argv[]) {
    glutInit(&argc, argv);
    glutCreateWindow("OpenGL needs a window o.O");
    glewInit();

    auto mapper = gpu::TextureTracer();

    // WITH 100'000 PHOTONS IT WORKS, WITH 1'000'000 PHOTONS NOT WHY?
    mapper.createShadowMap(100'000);

    return 0;
}

最佳答案

如果 GPU 程序执行时间过长，操作系统会取消它们的执行。在 Windows 上通常是两秒，在 Linux 上大多数时候是五秒，但可能会有所不同。

这是为了检测卡住的 GPU 程序并取消它们。有不同的方法来解决此超时问题，但它们都需要管理员/root 权限，这并不总是可用的。

如果可能，可以将执行拆分为多个调用，如以下代码段所示:

const uint32_t passSize   = 2048u;
const uint32_t numPasses  = (numPhotons / passSize) + 1;
const uint32_t numThreads = 64u;
const uint32_t numBlocks  = passSize / numThreads;

glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "passSize"), passSize);
for (uint32_t pass = 0u; pass < numPasses; ++pass) {
  glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "pass"), pass);

  glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
  glFlush();
  glFinish();
}

glFlush() 和 glFinish() 调用很重要，否则执行将捆绑在一起，操作系统无论如何都会触发超时。

在着色器中，您只需要像这样访问输入数据的正确部分:

// other stuff

uniform uint pass;
uniform uint passSize;

void main() {
  uint gid = gl_GlobalInvocationID.x;
  uint passId = pass * passSize + gid;
  if (passId >= photons.length()) return;

  Photon photon = photons[passId];

  // rest of program
}

这就是全部。

如果您想禁用操作系统超时，这里有一篇与 Linux 相关的帖子:https://stackoverflow.com/a/30520538/5543884

这是一篇关于 Windows 的帖子:https://stackoverflow.com/a/29759823/5543884

关于c++ - GLSL 计算着色器不适用于大输入，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/57182545/

文章推荐： c++ - 带有别名的 SFINAE，重载问题

文章推荐： c++ - 如何匹配环境不同的两个群体？ C++

glsl - GLSL 中不变和精确的关键字
我试图理解这两个概念。我正在阅读的手册对它们非常简短，像多 channel 算法这样的东西对我来说是新的。我想要一些示例(不是代码)，说明我需要在哪里使用不变变量或精确变量，只是为了获得一个大致的想法
glsl - glsl 中的带边框圆角矩形
您好，我正在尝试获得一个快速的圆角矩形 glsl 着色器，但我只设法使用此函数( https://github.com/marklundin/glsl-sdf-primitives/blob/mast
glsl - [GLSL]如何比较世界坐标中所有顶点的z值？
这可能是一个简单的问题。作为 GLSL 的新手，我宁愿在这里问。现在，在顶点着色器中，我可以通过以下方式获取世界坐标系中的位置: gl_Position = ftransform();
glsl - GLSL 中的高效双三次过滤代码？
我想知道是否有人拥有完整、有效且高效的代码来在 glsl 中进行双三次纹理过滤。有这个: http://www.codeproject.com/Articles/236394/Bi-Cubic-and
glsl - GLSL 和 GLSL ES 2 之间的差异
真的有两个问题... GLSL ES 2 是完全独立的语言，还是 GLSL 的特殊版本？在“标准库”函数、语法和功能方面，它们之间有什么区别？我正在为一个针对 Windows、Mac 和 iPad
glsl - GLSL-长度函数
从GLSL文档(https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/length.xhtml)中，长度函数“计算 vector 的长度”
glsl - 如何在 GLSL 着色器中实现颜色矩阵过滤器
我想在 GLSL 着色器中实现颜色矩阵滤镜，但找不到与此相关的任何文档。我是着色器世界的新手(我自己从未编写过代码)所以如果我的解释/词汇没有意义，请原谅我。到目前为止我可以收集到的信息: 一个颜色
glsl - 如何在 GLSL 片段着色器中获得相对于法线的视角？
我刚刚开始使用 openframeworks 中的着色器，并且正在尝试编写一个片段着色器，它根据片段的观看角度来更改片段的颜色。例如，给定一个矩形，如果从正面看(相机与法线平行)它会是红色，但如果从侧
glsl - GLSL sign() 函数是否会导致分支以及它是如何工作的？
似乎某些在 case 中具有输出的函数可能使用 if 语句作为底层实现，从而导致分支。我不认为它，但我想知道。对于 sign(x)，如果数字是正数、负数或零，则分别重新运行 1、-1 和 0。那么
glsl - 如何在 glsl 中执行位操作
如何在 glsl 中执行位操作？使用常规 C 风格的按位运算符 | , & , ^ , 或 !不起作用。最佳答案它们是在 GLSL 1.30 (OGL 3.0) 中引入的。根据您想要做什么，您
glsl - 使用 GLSL 着色器从图像生成高度图
最近我一直在玩 webGl，我偶然发现了一个很酷的小演示 here (来源 here )我想稍微改变一下以获得一些很酷的结果。我对改变地形的生成方式很感兴趣。而不是分层 10 个 Octave
glsl - WebGL 使用什么版本的 GLSL
这是每个设备的事情吗？还是基于浏览器？抱歉问了这样一个基本问题，但我似乎找不到直接的答案。最佳答案它基于 OpenGL ES 2.0，并根据 the spec , 它必须支持 GLSL ES 版本
glsl - 您可以在 GLSL 着色器中通过引用传递矩阵吗？
你如何在 GLSL 着色器中通过引用传递？最佳答案您可以将属性标记为 inout在函数签名中，这将使属性有效地“通过引用传递” 例如， void doSomething( vec3 trans,
glsl - Vulkan GLSL 的统一纹素缓冲区
我有一个浮点 RGBA 缓冲区，我想将其作为统一 Texel 缓冲区传递到我的计算着色器(用于只读访问，没有采样)。谁能告诉我如何在 GLSL 中执行此操作？我能找到的所有示例似乎都在跳过该主题，或
glsl - GLSL ES 中的统一与属性
我有一些参数从 CPU 传递到 GPU，这些参数对于所有片段都是恒定的，但在每一帧上都会发生变化(我使用的是 GLSL ES 1.1)。对于这些值，我应该使用制服还是属性？属性可能因顶点而异，所以我的
glsl - GLSL rand() 这一行代码的起源是什么？
我已经看到这个伪随机数生成器在着色器中使用，引用here and there around the web : float rand(vec2 co){ return fract(sin(dot(
glsl - 如何在 GLSL 结构体中初始化数组
我尝试在结构内初始化数组，如下所示: struct myStruct { vec3 data[20] = vec3[20] (vec3(1, 1, 1), vec3( 1, -1, 1), v
glsl - 如何在 GLSL 结构体中初始化数组
我尝试在结构内初始化数组，如下所示: struct myStruct { vec3 data[20] = vec3[20] (vec3(1, 1, 1), vec3( 1, -1, 1), v
glsl - 在 GLSL 中传递值的最快方法是什么？
在 GLSL 着色器中，出于各种原因，我经常需要几个函数来修改单个值(例如，片段着色器使用四个函数来应用照明、纹理、镜面反射和雾化)。我可以想到至少三种方法来传递这些值进行修改: 使用 inout每个
glsl - 如何在 GLSL 中复制数组？
我在 SL 引用中搜索了“copy”，但找不到任何相关内容。如果我有: float a[3] = float[3] (1.0,2.0,3.0); float b[3] = a; 是 b现在指向 a

塔克拉玛干

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - GLSL 计算着色器不适用于大输入