gpt4 book ai didi

c++ - GLSL 计算着色器不适用于大输入

转载 作者:塔克拉玛干 更新时间:2023-11-03 07:02:23 25 4
gpt4 key购买 nike

着色器采用具有位置、方向、波长和强度的光子 SSBO,每个线程负责准确地跟踪一个光子通过网格,在每个网格单元中,光子命中,强度为每个波长累积为每个网格单元创建光谱分布。

问题是着色器对 100,000 个光子完美运行,但不会返回 1,000,000 个光子的结果。

我查看了 SSBO 的大小,发现所有大小都在我的 GPU (NVIDIA Quadro P6000) 2GB 限制范围内:

  • SSBO 网格大小:1.5GB
  • SSBO 光子大小:0.02GB

如果我更改某些地方的逻辑,它可以处理一百万个光子(请参阅第 87 行和第 114 行的评论)。

我目前无法解释为什么着色器对 1,000,000 个光子失败,但对 100,000 个光子有效。逻辑相同,缓冲区大小在限制范围内。 (缓冲区大小不会成为问题也被它在更改逻辑时起作用所证实。)

下面是源代码。如果你想自己尝试,这里是 github 上的代码:https://github.com/TheJhonny007/TextureTracerDebug

计算着色器:

#version 430

#extension GL_EXT_compute_shader: enable
#extension GL_EXT_shader_storage_buffer_object: enable
#extension GL_ARB_compute_variable_group_size: enable

const uint TEX_WIDTH = 1024u;
const uint TEX_HEIGHT = TEX_WIDTH;

const uint MIN_WAVELENGTH = 380u;
const uint MAX_WAVELENGTH = 740u;
const uint NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;

// Size: 24 bytes -> ~40,000,000 photons per available gigabyte of ram
struct Photon {
vec2 position;// m
vec2 direction;// normalized
uint wavelength;// nm
float intensity;// 0..1 should start at 1
};

layout(std430, binding = 0) buffer Photons {
Photon photons[];
};

// Size: 1440 bytes -> ~700,000 pixels per available gigabyte of ram
struct Pixel {
uint intensityAtWavelengths[NUM_WAVELENGTHS];// [0..1000]
};

layout(std430, binding = 1) buffer Pixels {
//Pixel pixels[TEX_WIDTH][TEX_HEIGHT];
// NVIDIAs linker takes ages to link if the sizes are specified :(
Pixel[] pixels;
};

uniform float xAxisScalingFactor;

vec2 getHorizontalRectangleAt(int i) {
float x = pow(float(i), xAxisScalingFactor);
float w = pow(float(i + 1), xAxisScalingFactor);
return vec2(x, w);
}

uniform float rectangleHeight;

struct Rectangle {
float x;
float y;
float w;
float h;
};

layout (local_size_variable) in;

void addToPixel(uvec2 idx, uint wavelength, uint intensity) {
if (idx.x >= 0u && idx.x < TEX_WIDTH && idx.y >= 0u && idx.y < TEX_HEIGHT) {
uint index = (idx.y * TEX_WIDTH) + idx.x;
atomicAdd(pixels[index].intensityAtWavelengths[wavelength - MIN_WAVELENGTH], intensity);
}
}

/// Returns the rectangle at the given indices.
Rectangle getRectangleAt(ivec2 indices) {
vec2 horRect = getHorizontalRectangleAt(indices.x);
return Rectangle(horRect.x, rectangleHeight * float(indices.y), horRect.y, rectangleHeight);
}

uniform float shadowLength;
uniform float shadowHeight;

/// Returns the indices of the rectangle at the given location
ivec2 getRectangleIdxAt(vec2 location) {
int x = 0;
int y = int(location.y / rectangleHeight);
return ivec2(x, y);
}

float getRayIntersectAtX(Photon ray, float x) {
float slope = ray.direction.y / ray.direction.x;
return slope * (x - ray.position.x) + ray.position.y;
}

ivec2 getRayRectangleExitEdge(Photon ray, Rectangle rect) {
float intersectHeight = getRayIntersectAtX(ray, rect.x + rect.w);

// IF ONE OF THE FIRST TWO CONDITIONS GETS REMOVED IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
if (intersectHeight < rect.y) {
return ivec2(0, -1);
} else if (intersectHeight > rect.y + rect.h) {
return ivec2(0, 1);
} else {
return ivec2(1, 0);
}
}

void main() {
uint gid = gl_GlobalInvocationID.x;
if (gid >= photons.length()) return;

Photon photon = photons[gid];

ivec2 photonTexIndices = getRectangleIdxAt(photon.position);
while (photonTexIndices.x < TEX_WIDTH && photonTexIndices.y < TEX_HEIGHT &&
photonTexIndices.x >= 0 && photonTexIndices.y >= 0) {
// need to convert to uint for atomic add operations...
addToPixel(uvec2(photonTexIndices), photon.wavelength, uint(photon.intensity * 100.0));

ivec2 dir = getRayRectangleExitEdge(photon, getRectangleAt(photonTexIndices));
photonTexIndices += dir;

// When the ray goes out of bounds on the bottom then mirror it to simulate rays coming from
// the other side of the planet. This works because of the rotational symmetry of the system.
// IF COMMENTET OUT IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
if (photonTexIndices.y < 0) {
photonTexIndices.y = 0;
photon.position.y *= -1.0;
photon.direction.y *= -1.0;
}
}
}

示踪剂.hpp

#ifndef TEXTURE_TRACER_HPP
#define TEXTURE_TRACER_HPP

#include <glm/glm.hpp>
#include <random>

namespace gpu {

// 6 * 4 = 24 Bytes
struct Photon {
glm::vec2 position; // m
glm::vec2 direction; // normalized
uint32_t waveLength; // nm
float intensity; // 0..1 should start at 1
};

class TextureTracer {
public:
TextureTracer();
uint32_t createShadowMap(size_t numPhotons);

private:
void initTextureTracer();
void traceThroughTexture(uint32_t ssboPhotons, size_t numPhotons);
Photon emitPhoton();
std::vector<Photon> generatePhotons(uint32_t count);

struct {
uint32_t uRectangleHeight;
uint32_t uShadowLength;
uint32_t uShadowHeight;
uint32_t uXAxisScalingFactor;
} mTextureTracerUniforms;

uint32_t mTextureTracerProgram;

std::mt19937_64 mRNG;
std::uniform_real_distribution<> mDistributionSun;
std::uniform_int_distribution<uint32_t> mDistributionWavelength;
std::bernoulli_distribution mDistributionBoolean;
};

} // namespace gpu

#endif // TEXTURE_TRACER_HPP

示踪剂.cpp

#include "TextureTracer.hpp"
#include <GL/glew.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <random>
#include <string>
#include <vector>

void GLAPIENTRY MessageCallback(GLenum source, GLenum type, GLuint id,
GLenum severity, GLsizei length,
const GLchar *message, const void *userParam) {
if (type == GL_DEBUG_TYPE_ERROR)
fprintf(stderr, "GL ERROR: type = 0x%x, severity = 0x%x, message = %s\n",
type, severity, message);
else
fprintf(stdout, "GL INFO: type = 0x%x, severity = 0x%x, message = %s\n",
type, severity, message);
}

namespace gpu {
const double TEX_HEIGHT_TO_RADIUS_FACTOR = 4;
const double TEX_SHADOW_LENGTH_FACTOR = 8;

const uint32_t TEX_WIDTH = 1024u;
const uint32_t TEX_HEIGHT = TEX_WIDTH;

const double RADIUS = 6'371'000.0;
const double RADIUS_FACTORED = RADIUS * TEX_HEIGHT_TO_RADIUS_FACTOR;

const double SUN_RADIUS = 695'510'000.0;
const double DIST_TO_SUN = 149'600'000'000.0;
const double ATMO_HEIGHT = 42'000.0;

std::string loadShader(const std::string &fileName) {
std::ifstream shaderFileStream(fileName, std::ios::in);
if (!shaderFileStream.is_open()) {
std::cerr << "Could not load the GLSL shader from '" << fileName << "'!"
<< std::endl;
exit(-1);
}

std::string shaderCode;
while (!shaderFileStream.eof()) {
std::string line;
std::getline(shaderFileStream, line);
shaderCode.append(line + "\n");
}

return shaderCode;
}

void TextureTracer::initTextureTracer() {
mTextureTracerProgram = glCreateProgram();
uint32_t rayTracingComputeShader = glCreateShader(GL_COMPUTE_SHADER);

std::string code = loadShader("../resources/TextureTracer.glsl");
const char *shader = code.c_str();
glShaderSource(rayTracingComputeShader, 1, &shader, nullptr);
glCompileShader(rayTracingComputeShader);

glAttachShader(mTextureTracerProgram, rayTracingComputeShader);
glLinkProgram(mTextureTracerProgram);

mTextureTracerUniforms.uRectangleHeight =
glGetUniformLocation(mTextureTracerProgram, "rectangleHeight");
mTextureTracerUniforms.uShadowHeight =
glGetUniformLocation(mTextureTracerProgram, "shadowHeight");
mTextureTracerUniforms.uShadowLength =
glGetUniformLocation(mTextureTracerProgram, "shadowLength");
mTextureTracerUniforms.uXAxisScalingFactor =
glGetUniformLocation(mTextureTracerProgram, "xAxisScalingFactor");

glDetachShader(mTextureTracerProgram, rayTracingComputeShader);
glDeleteShader(rayTracingComputeShader);
}

TextureTracer::TextureTracer()
: mRNG(1L), mDistributionSun(
std::uniform_real_distribution<>(-SUN_RADIUS, SUN_RADIUS)),
mDistributionWavelength(
std::uniform_int_distribution<uint32_t>(380, 739)),
mDistributionBoolean(std::bernoulli_distribution(0.5)) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(MessageCallback, nullptr);

initTextureTracer();
}

double raySphereDistance(glm::dvec2 origin, glm::dvec2 direction,
glm::dvec2 center, double radius) {
glm::dvec2 m = origin - center;
double b = glm::dot(m, direction);
double c = glm::dot(m, m) - (radius * radius);
if (c > 0.0 && b > 0.0)
return -1.0;

double discr = b * b - c;

// A negative discriminant corresponds to ray missing sphere
if (discr < 0.0)
return -1.0;

// Ray now found to intersect sphere, compute smallest t value of intersection
return glm::max(0.0, -b - glm::sqrt(discr));
}

Photon TextureTracer::emitPhoton() {
std::uniform_real_distribution<> distributionEarth(0.0, ATMO_HEIGHT);
glm::dvec2 target = {0.0, RADIUS + distributionEarth(mRNG)};

double d;
do {
d = glm::length(glm::dvec2(mDistributionSun(mRNG), mDistributionSun(mRNG)));
} while (d > SUN_RADIUS);

glm::dvec2 startPosition =
glm::dvec2(-DIST_TO_SUN, mDistributionBoolean(mRNG) ? d : -d);
glm::dvec2 direction = glm::normalize(target - startPosition);

startPosition +=
direction * raySphereDistance(startPosition, direction, {0.0, 0.0},
RADIUS + ATMO_HEIGHT);

return {glm::vec2(0.0, startPosition.y), glm::vec2(direction),
mDistributionWavelength(mRNG), 1.0f};
}

std::vector<Photon> TextureTracer::generatePhotons(uint32_t count) {
std::vector<Photon> photons(count);
std::generate(photons.begin(), photons.end(),
[this]() { return emitPhoton(); });
return photons;
}

void TextureTracer::traceThroughTexture(uint32_t ssboPhotons,
size_t numPhotons) {
glUseProgram(mTextureTracerProgram);

glUniform1f(mTextureTracerUniforms.uRectangleHeight,
RADIUS_FACTORED / TEX_HEIGHT);

const double shadowLength =
TEX_SHADOW_LENGTH_FACTOR * (DIST_TO_SUN * RADIUS) / (SUN_RADIUS - RADIUS);

glUniform1f(mTextureTracerUniforms.uShadowLength, shadowLength);
glUniform1f(mTextureTracerUniforms.uShadowHeight, RADIUS_FACTORED);

const double xAxisScalingFactor =
glm::log(shadowLength) / glm::log(static_cast<double>(TEX_WIDTH));

glUniform1f(mTextureTracerUniforms.uXAxisScalingFactor,
static_cast<float>(xAxisScalingFactor));

const uint32_t MIN_WAVELENGTH = 380u;
const uint32_t MAX_WAVELENGTH = 740u;
const uint32_t NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;

size_t pixelBufferSize =
TEX_WIDTH * TEX_HEIGHT * NUM_WAVELENGTHS * sizeof(uint32_t);
uint32_t ssboPixels;
glGenBuffers(1, &ssboPixels);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
glBufferData(GL_SHADER_STORAGE_BUFFER, pixelBufferSize, nullptr,
GL_DYNAMIC_COPY);

glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssboPhotons);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ssboPixels);

const uint32_t numThreads = 32u;
const uint32_t numBlocks = numPhotons / numThreads;
std::cout << "numBlocks: " << numBlocks << std::endl;

glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

struct Pixel {
uint32_t intensityAtWavelengths[NUM_WAVELENGTHS];
};

std::vector<Pixel> pixels(TEX_WIDTH * TEX_HEIGHT);

glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, pixelBufferSize,
pixels.data());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);

for (int y = 0; y < TEX_HEIGHT; ++y) {
printf("%4i | ", y);
for (int x = 0; x < TEX_WIDTH; ++x) {
Pixel p = pixels[y * TEX_WIDTH + x];
int counter = 0;
for (uint32_t i : p.intensityAtWavelengths) {
counter += i;
}

if (counter == 0) {
printf(" ");
} else if (counter > 100'000'000) {
printf("%4s", "\u25A0");
} else if (counter > 10'000'000) {
printf("%4s", "\u25A3");
} else if (counter > 1'000'000) {
printf("%4s", "\u25A6");
} else if (counter > 100'000) {
printf("%4s", "\u25A4");
} else {
printf("%4s", "\u25A1");
}
}

std::cout << std::endl;
}

glDeleteBuffers(1, &ssboPixels);

glUseProgram(0);
}

uint32_t TextureTracer::createShadowMap(size_t numPhotons) {
std::vector<Photon> photons = generatePhotons(numPhotons);

uint32_t ssboPhotons;
glGenBuffers(1, &ssboPhotons);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPhotons);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(Photon) * photons.size(),
photons.data(), GL_DYNAMIC_COPY);

traceThroughTexture(ssboPhotons, photons.size());

glDeleteBuffers(1, &ssboPhotons);
glDeleteProgram(mTextureTracerProgram);

glDisable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(nullptr, nullptr);

return 0;
}
}

主要.cpp

#include <GL/glew.h>
#include <GL/glut.h>

#include "TextureTracer.hpp"

int main(int argc, char *argv[]) {
glutInit(&argc, argv);
glutCreateWindow("OpenGL needs a window o.O");
glewInit();

auto mapper = gpu::TextureTracer();

// WITH 100'000 PHOTONS IT WORKS, WITH 1'000'000 PHOTONS NOT WHY?
mapper.createShadowMap(100'000);

return 0;
}

最佳答案

如果 GPU 程序执行时间过长,操作系统会取消它们的执行。在 Windows 上通常是两秒,在 Linux 上大多数时候是五秒,但可能会有所不同。

这是为了检测卡住的 GPU 程序并取消它们。有不同的方法来解决此超时问题,但它们都需要管理员/root 权限,这并不总是可用的。

如果可能,可以将执行拆分为多个调用,如以下代码段所示:

const uint32_t passSize   = 2048u;
const uint32_t numPasses = (numPhotons / passSize) + 1;
const uint32_t numThreads = 64u;
const uint32_t numBlocks = passSize / numThreads;

glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "passSize"), passSize);
for (uint32_t pass = 0u; pass < numPasses; ++pass) {
glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "pass"), pass);

glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glFlush();
glFinish();
}

glFlush()glFinish() 调用很重要,否则执行将捆绑在一起,操作系统无论如何都会触发超时。

在着色器中,您只需要像这样访问输入数据的正确部分:

// other stuff

uniform uint pass;
uniform uint passSize;

void main() {
uint gid = gl_GlobalInvocationID.x;
uint passId = pass * passSize + gid;
if (passId >= photons.length()) return;

Photon photon = photons[passId];

// rest of program
}

这就是全部。

如果您想禁用操作系统超时,这里有一篇与 Linux 相关的帖子:https://stackoverflow.com/a/30520538/5543884

这是一篇关于 Windows 的帖子:https://stackoverflow.com/a/29759823/5543884

关于c++ - GLSL 计算着色器不适用于大输入,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57182545/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com