- iOS/Objective-C 元类和类别
- objective-c - -1001 错误,当 NSURLSession 通过 httpproxy 和/etc/hosts
- java - 使用网络类获取 url 地址
- ios - 推送通知中不播放声音
着色器采用具有位置、方向、波长和强度的光子 SSBO,每个线程负责准确地跟踪一个光子通过网格,在每个网格单元中,光子命中,强度为每个波长累积为每个网格单元创建光谱分布。
问题是着色器对 100,000 个光子完美运行,但不会返回 1,000,000 个光子的结果。
我查看了 SSBO 的大小,发现所有大小都在我的 GPU (NVIDIA Quadro P6000) 2GB 限制范围内:
如果我更改某些地方的逻辑,它可以处理一百万个光子(请参阅第 87 行和第 114 行的评论)。
我目前无法解释为什么着色器对 1,000,000 个光子失败,但对 100,000 个光子有效。逻辑相同,缓冲区大小在限制范围内。 (缓冲区大小不会成为问题也被它在更改逻辑时起作用所证实。)
下面是源代码。如果你想自己尝试,这里是 github 上的代码:https://github.com/TheJhonny007/TextureTracerDebug
计算着色器:
#version 430
#extension GL_EXT_compute_shader: enable
#extension GL_EXT_shader_storage_buffer_object: enable
#extension GL_ARB_compute_variable_group_size: enable
const uint TEX_WIDTH = 1024u;
const uint TEX_HEIGHT = TEX_WIDTH;
const uint MIN_WAVELENGTH = 380u;
const uint MAX_WAVELENGTH = 740u;
const uint NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;
// Size: 24 bytes -> ~40,000,000 photons per available gigabyte of ram
struct Photon {
vec2 position;// m
vec2 direction;// normalized
uint wavelength;// nm
float intensity;// 0..1 should start at 1
};
layout(std430, binding = 0) buffer Photons {
Photon photons[];
};
// Size: 1440 bytes -> ~700,000 pixels per available gigabyte of ram
struct Pixel {
uint intensityAtWavelengths[NUM_WAVELENGTHS];// [0..1000]
};
layout(std430, binding = 1) buffer Pixels {
//Pixel pixels[TEX_WIDTH][TEX_HEIGHT];
// NVIDIAs linker takes ages to link if the sizes are specified :(
Pixel[] pixels;
};
uniform float xAxisScalingFactor;
vec2 getHorizontalRectangleAt(int i) {
float x = pow(float(i), xAxisScalingFactor);
float w = pow(float(i + 1), xAxisScalingFactor);
return vec2(x, w);
}
uniform float rectangleHeight;
struct Rectangle {
float x;
float y;
float w;
float h;
};
layout (local_size_variable) in;
void addToPixel(uvec2 idx, uint wavelength, uint intensity) {
if (idx.x >= 0u && idx.x < TEX_WIDTH && idx.y >= 0u && idx.y < TEX_HEIGHT) {
uint index = (idx.y * TEX_WIDTH) + idx.x;
atomicAdd(pixels[index].intensityAtWavelengths[wavelength - MIN_WAVELENGTH], intensity);
}
}
/// Returns the rectangle at the given indices.
Rectangle getRectangleAt(ivec2 indices) {
vec2 horRect = getHorizontalRectangleAt(indices.x);
return Rectangle(horRect.x, rectangleHeight * float(indices.y), horRect.y, rectangleHeight);
}
uniform float shadowLength;
uniform float shadowHeight;
/// Returns the indices of the rectangle at the given location
ivec2 getRectangleIdxAt(vec2 location) {
int x = 0;
int y = int(location.y / rectangleHeight);
return ivec2(x, y);
}
float getRayIntersectAtX(Photon ray, float x) {
float slope = ray.direction.y / ray.direction.x;
return slope * (x - ray.position.x) + ray.position.y;
}
ivec2 getRayRectangleExitEdge(Photon ray, Rectangle rect) {
float intersectHeight = getRayIntersectAtX(ray, rect.x + rect.w);
// IF ONE OF THE FIRST TWO CONDITIONS GETS REMOVED IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
if (intersectHeight < rect.y) {
return ivec2(0, -1);
} else if (intersectHeight > rect.y + rect.h) {
return ivec2(0, 1);
} else {
return ivec2(1, 0);
}
}
void main() {
uint gid = gl_GlobalInvocationID.x;
if (gid >= photons.length()) return;
Photon photon = photons[gid];
ivec2 photonTexIndices = getRectangleIdxAt(photon.position);
while (photonTexIndices.x < TEX_WIDTH && photonTexIndices.y < TEX_HEIGHT &&
photonTexIndices.x >= 0 && photonTexIndices.y >= 0) {
// need to convert to uint for atomic add operations...
addToPixel(uvec2(photonTexIndices), photon.wavelength, uint(photon.intensity * 100.0));
ivec2 dir = getRayRectangleExitEdge(photon, getRectangleAt(photonTexIndices));
photonTexIndices += dir;
// When the ray goes out of bounds on the bottom then mirror it to simulate rays coming from
// the other side of the planet. This works because of the rotational symmetry of the system.
// IF COMMENTET OUT IT WORKS WITH 1'000'000 PHOTONS OTHERWISE ONLY 100'000 WHY?
if (photonTexIndices.y < 0) {
photonTexIndices.y = 0;
photon.position.y *= -1.0;
photon.direction.y *= -1.0;
}
}
}
示踪剂.hpp
#ifndef TEXTURE_TRACER_HPP
#define TEXTURE_TRACER_HPP
#include <glm/glm.hpp>
#include <random>
namespace gpu {
// 6 * 4 = 24 Bytes
struct Photon {
glm::vec2 position; // m
glm::vec2 direction; // normalized
uint32_t waveLength; // nm
float intensity; // 0..1 should start at 1
};
class TextureTracer {
public:
TextureTracer();
uint32_t createShadowMap(size_t numPhotons);
private:
void initTextureTracer();
void traceThroughTexture(uint32_t ssboPhotons, size_t numPhotons);
Photon emitPhoton();
std::vector<Photon> generatePhotons(uint32_t count);
struct {
uint32_t uRectangleHeight;
uint32_t uShadowLength;
uint32_t uShadowHeight;
uint32_t uXAxisScalingFactor;
} mTextureTracerUniforms;
uint32_t mTextureTracerProgram;
std::mt19937_64 mRNG;
std::uniform_real_distribution<> mDistributionSun;
std::uniform_int_distribution<uint32_t> mDistributionWavelength;
std::bernoulli_distribution mDistributionBoolean;
};
} // namespace gpu
#endif // TEXTURE_TRACER_HPP
示踪剂.cpp
#include "TextureTracer.hpp"
#include <GL/glew.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <random>
#include <string>
#include <vector>
void GLAPIENTRY MessageCallback(GLenum source, GLenum type, GLuint id,
GLenum severity, GLsizei length,
const GLchar *message, const void *userParam) {
if (type == GL_DEBUG_TYPE_ERROR)
fprintf(stderr, "GL ERROR: type = 0x%x, severity = 0x%x, message = %s\n",
type, severity, message);
else
fprintf(stdout, "GL INFO: type = 0x%x, severity = 0x%x, message = %s\n",
type, severity, message);
}
namespace gpu {
const double TEX_HEIGHT_TO_RADIUS_FACTOR = 4;
const double TEX_SHADOW_LENGTH_FACTOR = 8;
const uint32_t TEX_WIDTH = 1024u;
const uint32_t TEX_HEIGHT = TEX_WIDTH;
const double RADIUS = 6'371'000.0;
const double RADIUS_FACTORED = RADIUS * TEX_HEIGHT_TO_RADIUS_FACTOR;
const double SUN_RADIUS = 695'510'000.0;
const double DIST_TO_SUN = 149'600'000'000.0;
const double ATMO_HEIGHT = 42'000.0;
std::string loadShader(const std::string &fileName) {
std::ifstream shaderFileStream(fileName, std::ios::in);
if (!shaderFileStream.is_open()) {
std::cerr << "Could not load the GLSL shader from '" << fileName << "'!"
<< std::endl;
exit(-1);
}
std::string shaderCode;
while (!shaderFileStream.eof()) {
std::string line;
std::getline(shaderFileStream, line);
shaderCode.append(line + "\n");
}
return shaderCode;
}
void TextureTracer::initTextureTracer() {
mTextureTracerProgram = glCreateProgram();
uint32_t rayTracingComputeShader = glCreateShader(GL_COMPUTE_SHADER);
std::string code = loadShader("../resources/TextureTracer.glsl");
const char *shader = code.c_str();
glShaderSource(rayTracingComputeShader, 1, &shader, nullptr);
glCompileShader(rayTracingComputeShader);
glAttachShader(mTextureTracerProgram, rayTracingComputeShader);
glLinkProgram(mTextureTracerProgram);
mTextureTracerUniforms.uRectangleHeight =
glGetUniformLocation(mTextureTracerProgram, "rectangleHeight");
mTextureTracerUniforms.uShadowHeight =
glGetUniformLocation(mTextureTracerProgram, "shadowHeight");
mTextureTracerUniforms.uShadowLength =
glGetUniformLocation(mTextureTracerProgram, "shadowLength");
mTextureTracerUniforms.uXAxisScalingFactor =
glGetUniformLocation(mTextureTracerProgram, "xAxisScalingFactor");
glDetachShader(mTextureTracerProgram, rayTracingComputeShader);
glDeleteShader(rayTracingComputeShader);
}
TextureTracer::TextureTracer()
: mRNG(1L), mDistributionSun(
std::uniform_real_distribution<>(-SUN_RADIUS, SUN_RADIUS)),
mDistributionWavelength(
std::uniform_int_distribution<uint32_t>(380, 739)),
mDistributionBoolean(std::bernoulli_distribution(0.5)) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(MessageCallback, nullptr);
initTextureTracer();
}
double raySphereDistance(glm::dvec2 origin, glm::dvec2 direction,
glm::dvec2 center, double radius) {
glm::dvec2 m = origin - center;
double b = glm::dot(m, direction);
double c = glm::dot(m, m) - (radius * radius);
if (c > 0.0 && b > 0.0)
return -1.0;
double discr = b * b - c;
// A negative discriminant corresponds to ray missing sphere
if (discr < 0.0)
return -1.0;
// Ray now found to intersect sphere, compute smallest t value of intersection
return glm::max(0.0, -b - glm::sqrt(discr));
}
Photon TextureTracer::emitPhoton() {
std::uniform_real_distribution<> distributionEarth(0.0, ATMO_HEIGHT);
glm::dvec2 target = {0.0, RADIUS + distributionEarth(mRNG)};
double d;
do {
d = glm::length(glm::dvec2(mDistributionSun(mRNG), mDistributionSun(mRNG)));
} while (d > SUN_RADIUS);
glm::dvec2 startPosition =
glm::dvec2(-DIST_TO_SUN, mDistributionBoolean(mRNG) ? d : -d);
glm::dvec2 direction = glm::normalize(target - startPosition);
startPosition +=
direction * raySphereDistance(startPosition, direction, {0.0, 0.0},
RADIUS + ATMO_HEIGHT);
return {glm::vec2(0.0, startPosition.y), glm::vec2(direction),
mDistributionWavelength(mRNG), 1.0f};
}
std::vector<Photon> TextureTracer::generatePhotons(uint32_t count) {
std::vector<Photon> photons(count);
std::generate(photons.begin(), photons.end(),
[this]() { return emitPhoton(); });
return photons;
}
void TextureTracer::traceThroughTexture(uint32_t ssboPhotons,
size_t numPhotons) {
glUseProgram(mTextureTracerProgram);
glUniform1f(mTextureTracerUniforms.uRectangleHeight,
RADIUS_FACTORED / TEX_HEIGHT);
const double shadowLength =
TEX_SHADOW_LENGTH_FACTOR * (DIST_TO_SUN * RADIUS) / (SUN_RADIUS - RADIUS);
glUniform1f(mTextureTracerUniforms.uShadowLength, shadowLength);
glUniform1f(mTextureTracerUniforms.uShadowHeight, RADIUS_FACTORED);
const double xAxisScalingFactor =
glm::log(shadowLength) / glm::log(static_cast<double>(TEX_WIDTH));
glUniform1f(mTextureTracerUniforms.uXAxisScalingFactor,
static_cast<float>(xAxisScalingFactor));
const uint32_t MIN_WAVELENGTH = 380u;
const uint32_t MAX_WAVELENGTH = 740u;
const uint32_t NUM_WAVELENGTHS = MAX_WAVELENGTH - MIN_WAVELENGTH;
size_t pixelBufferSize =
TEX_WIDTH * TEX_HEIGHT * NUM_WAVELENGTHS * sizeof(uint32_t);
uint32_t ssboPixels;
glGenBuffers(1, &ssboPixels);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
glBufferData(GL_SHADER_STORAGE_BUFFER, pixelBufferSize, nullptr,
GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssboPhotons);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, ssboPixels);
const uint32_t numThreads = 32u;
const uint32_t numBlocks = numPhotons / numThreads;
std::cout << "numBlocks: " << numBlocks << std::endl;
glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
struct Pixel {
uint32_t intensityAtWavelengths[NUM_WAVELENGTHS];
};
std::vector<Pixel> pixels(TEX_WIDTH * TEX_HEIGHT);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPixels);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, pixelBufferSize,
pixels.data());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
for (int y = 0; y < TEX_HEIGHT; ++y) {
printf("%4i | ", y);
for (int x = 0; x < TEX_WIDTH; ++x) {
Pixel p = pixels[y * TEX_WIDTH + x];
int counter = 0;
for (uint32_t i : p.intensityAtWavelengths) {
counter += i;
}
if (counter == 0) {
printf(" ");
} else if (counter > 100'000'000) {
printf("%4s", "\u25A0");
} else if (counter > 10'000'000) {
printf("%4s", "\u25A3");
} else if (counter > 1'000'000) {
printf("%4s", "\u25A6");
} else if (counter > 100'000) {
printf("%4s", "\u25A4");
} else {
printf("%4s", "\u25A1");
}
}
std::cout << std::endl;
}
glDeleteBuffers(1, &ssboPixels);
glUseProgram(0);
}
uint32_t TextureTracer::createShadowMap(size_t numPhotons) {
std::vector<Photon> photons = generatePhotons(numPhotons);
uint32_t ssboPhotons;
glGenBuffers(1, &ssboPhotons);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssboPhotons);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(Photon) * photons.size(),
photons.data(), GL_DYNAMIC_COPY);
traceThroughTexture(ssboPhotons, photons.size());
glDeleteBuffers(1, &ssboPhotons);
glDeleteProgram(mTextureTracerProgram);
glDisable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(nullptr, nullptr);
return 0;
}
}
主要.cpp
#include <GL/glew.h>
#include <GL/glut.h>
#include "TextureTracer.hpp"
int main(int argc, char *argv[]) {
glutInit(&argc, argv);
glutCreateWindow("OpenGL needs a window o.O");
glewInit();
auto mapper = gpu::TextureTracer();
// WITH 100'000 PHOTONS IT WORKS, WITH 1'000'000 PHOTONS NOT WHY?
mapper.createShadowMap(100'000);
return 0;
}
最佳答案
如果 GPU 程序执行时间过长,操作系统会取消它们的执行。在 Windows 上通常是两秒,在 Linux 上大多数时候是五秒,但可能会有所不同。
这是为了检测卡住的 GPU 程序并取消它们。有不同的方法来解决此超时问题,但它们都需要管理员/root 权限,这并不总是可用的。
如果可能,可以将执行拆分为多个调用,如以下代码段所示:
const uint32_t passSize = 2048u;
const uint32_t numPasses = (numPhotons / passSize) + 1;
const uint32_t numThreads = 64u;
const uint32_t numBlocks = passSize / numThreads;
glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "passSize"), passSize);
for (uint32_t pass = 0u; pass < numPasses; ++pass) {
glUniform1ui(glGetUniformLocation(mTextureTracerProgram, "pass"), pass);
glDispatchComputeGroupSizeARB(numBlocks, 1, 1, numThreads, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glFlush();
glFinish();
}
glFlush()
和 glFinish()
调用很重要,否则执行将捆绑在一起,操作系统无论如何都会触发超时。
在着色器中,您只需要像这样访问输入数据的正确部分:
// other stuff
uniform uint pass;
uniform uint passSize;
void main() {
uint gid = gl_GlobalInvocationID.x;
uint passId = pass * passSize + gid;
if (passId >= photons.length()) return;
Photon photon = photons[passId];
// rest of program
}
这就是全部。
如果您想禁用操作系统超时,这里有一篇与 Linux 相关的帖子:https://stackoverflow.com/a/30520538/5543884
这是一篇关于 Windows 的帖子:https://stackoverflow.com/a/29759823/5543884
关于c++ - GLSL 计算着色器不适用于大输入,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57182545/
我试图理解这两个概念。我正在阅读的手册对它们非常简短,像多 channel 算法这样的东西对我来说是新的。我想要一些示例(不是代码),说明我需要在哪里使用不变变量或精确变量,只是为了获得一个大致的想法
您好,我正在尝试获得一个快速的圆角矩形 glsl 着色器,但我只设法使用此函数( https://github.com/marklundin/glsl-sdf-primitives/blob/mast
这可能是一个简单的问题。作为 GLSL 的新手,我宁愿在这里问。 现在,在顶点着色器中,我可以通过以下方式获取世界坐标系中的位置: gl_Position = ftransform();
我想知道是否有人拥有完整、有效且高效的代码来在 glsl 中进行双三次纹理过滤。有这个: http://www.codeproject.com/Articles/236394/Bi-Cubic-and
真的有两个问题... GLSL ES 2 是完全独立的语言,还是 GLSL 的特殊版本? 在“标准库”函数、语法和功能方面,它们之间有什么区别? 我正在为一个针对 Windows、Mac 和 iPad
从GLSL文档(https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/length.xhtml)中,长度函数“计算 vector 的长度”
我想在 GLSL 着色器中实现颜色矩阵滤镜,但找不到与此相关的任何文档。我是着色器世界的新手(我自己从未编写过代码)所以如果我的解释/词汇没有意义,请原谅我。 到目前为止我可以收集到的信息: 一个颜色
我刚刚开始使用 openframeworks 中的着色器,并且正在尝试编写一个片段着色器,它根据片段的观看角度来更改片段的颜色。例如,给定一个矩形,如果从正面看(相机与法线平行)它会是红色,但如果从侧
似乎某些在 case 中具有输出的函数可能使用 if 语句作为底层实现,从而导致分支。我不认为它,但我想知道。 对于 sign(x),如果数字是正数、负数或零,则分别重新运行 1、-1 和 0。 那么
如何在 glsl 中执行位操作? 使用常规 C 风格的按位运算符 | , & , ^ , 或 !不起作用。 最佳答案 它们是在 GLSL 1.30 (OGL 3.0) 中引入的。 根据您想要做什么,您
最近我一直在玩 webGl,我偶然发现了一个很酷的小演示 here (来源 here )我想稍微改变一下以获得一些很酷的结果。 我对改变地形的生成方式很感兴趣。而不是分层 10 个 Octave
这是每个设备的事情吗?还是基于浏览器?抱歉问了这样一个基本问题,但我似乎找不到直接的答案。 最佳答案 它基于 OpenGL ES 2.0,并根据 the spec , 它必须支持 GLSL ES 版本
你如何在 GLSL 着色器中通过引用传递? 最佳答案 您可以将属性标记为 inout在函数签名中,这将使属性有效地“通过引用传递” 例如, void doSomething( vec3 trans,
我有一个浮点 RGBA 缓冲区,我想将其作为统一 Texel 缓冲区传递到我的计算着色器(用于只读访问,没有采样)。谁能告诉我如何在 GLSL 中执行此操作? 我能找到的所有示例似乎都在跳过该主题,或
我有一些参数从 CPU 传递到 GPU,这些参数对于所有片段都是恒定的,但在每一帧上都会发生变化(我使用的是 GLSL ES 1.1)。对于这些值,我应该使用制服还是属性?属性可能因顶点而异,所以我的
我已经看到这个伪随机数生成器在着色器中使用,引用here and there around the web : float rand(vec2 co){ return fract(sin(dot(
我尝试在结构内初始化数组,如下所示: struct myStruct { vec3 data[20] = vec3[20] (vec3(1, 1, 1), vec3( 1, -1, 1), v
我尝试在结构内初始化数组,如下所示: struct myStruct { vec3 data[20] = vec3[20] (vec3(1, 1, 1), vec3( 1, -1, 1), v
在 GLSL 着色器中,出于各种原因,我经常需要几个函数来修改单个值(例如,片段着色器使用四个函数来应用照明、纹理、镜面反射和雾化)。我可以想到至少三种方法来传递这些值进行修改: 使用 inout每个
我在 SL 引用中搜索了“copy”,但找不到任何相关内容。 如果我有: float a[3] = float[3] (1.0,2.0,3.0); float b[3] = a; 是 b现在指向 a
我是一名优秀的程序员,十分优秀!