- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在重写我的旧渲染管道。我根据自己的喜好创建了一个非常精简的原型(prototype),令我震惊的是,我原来相当复杂且优化不佳的管道与 super 简单的原型(prototype)具有完全相同的性能。
任务是渲染 1024 个任意大小的网格(总共 1400 万个三角形),每个网格使用一组不同的制服。
我现在做的是使用统一缓冲区 + glMultiDrawElementsIndirect
并使用 gl_DrawIDARB
索引到统一缓冲区。这是渲染循环:
function renderloop(window, N, frame_times, program, commandbuff)
glUseProgram(program)
glEnable(GL_DEPTH_TEST)
glClearColor(1, 1, 1, 1)
GLAbstraction.bind(commandbuff)
n = 0
while isopen(window) && n <= N
tic()
glFinish() # make sure we time the right thing
GLWindow.poll_glfw()
#glBindVertexArray(vbo.id) doesn't change timing much
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
glMultiDrawElementsIndirect(
GL_TRIANGLES,
GL_UNSIGNED_INT,
C_NULL, length(commandbuff), 0
)
#glBindVertexArray(0)
GLWindow.swapbuffers(window)
push!(frame_times, toq())
n += 1
end
frame_times
end
我的其他管道太复杂了,无法在这里写下来,但简而言之,它是未优化的 Julia 代码、带制服的 GLSL 3.0 绘图代码 + 光线拾取 + fxaa + 几个渲染目标等等。除了具有统一 block 等的现代化之外,着色器几乎相同。
新的(几乎)完整的代码可以在这里看到:
vert = """
#version 450
#extension GL_ARB_shader_draw_parameters : enable
struct VertexArgument{
vec4 color;
mat4 model;
};
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 normal;
layout (std140) uniform Scene{
vec4 lightposition;
mat4 proj;
mat4 view;
mat4 projview;
vec2 resolution;
} scene;
layout (std140) uniform VertexArguments{
VertexArgument[1024] args;
} vertex_arguments;
out VertexOut{
vec3 vertex;
vec3 normal;
vec3 lightdir;
vec4 color;
} vertex_out;
void main(){
VertexArgument arg = vertex_arguments.args[gl_DrawIDARB];
vec4 position_camspace = scene.view * arg.model * vec4(position, 1.0);
gl_Position = scene.proj * position_camspace;
vertex_out.lightdir = normalize(vec3(-10) - position.xyz);
vertex_out.vertex = -position_camspace.xyz;
vertex_out.normal = normal;
vertex_out.color = arg.color;
}
"""
frag = """
#version 450
vec3 blinnphong(vec3 V, vec3 N, vec3 L, vec3 color){
float diff_coeff = max(dot(L,N), 0.0);
// specular coefficient
vec3 H = normalize(L+V);
float spec_coeff = pow(max(dot(H,N), 0.0), 8.0);
if (diff_coeff <= 0.0)
spec_coeff = 0.0;
// final lighting model
return vec3(
vec3(0.1) * vec3(0.3) +
vec3(0.9) * color * diff_coeff +
vec3(0.3) * spec_coeff
);
}
in VertexOut{
vec3 vertex;
vec3 normal;
vec3 lightdir;
vec4 color;
} vertex_in;
layout (location = 0) out vec4 frag_color;
void main(){
vec3 L = normalize(vertex_in.lightdir);
vec3 N = normalize(vertex_in.normal);
vec3 light1 = blinnphong(vertex_in.vertex, N, L, vertex_in.color.rgb);
vec3 light2 = blinnphong(vertex_in.vertex, N, -L, vertex_in.color.rgb);
frag_color = vec4(light1 + light2, 1.0);
}
"""
window = create_glcontext(
major = 4, minor = 5, debugging = false,
windowhints = [
(GLFW.SAMPLES, 0),
(GLFW.DEPTH_BITS, 32),
(GLFW.ALPHA_BITS, 8),
(GLFW.RED_BITS, 8),
(GLFW.GREEN_BITS, 8),
(GLFW.BLUE_BITS, 8),
(GLFW.STENCIL_BITS, 0),
(GLFW.AUX_BUFFERS, 0)
]
)
events = WindowEvents(Window => window)
cam = PerspectiveCamera(
TranslationSpeed => 1f0,
LookAt => Vec3f0(0),
EyePosition => Vec3f0(6, 6, 8),
Rotation => Vec3f0(0),
Area => events[Area],
RotationSpeed => 0.1f0
)
vertshader = compile_shader(Vector{UInt8}(vert), GL_VERTEX_SHADER, :vertexshader)
fragshader = compile_shader(Vector{UInt8}(frag), GL_FRAGMENT_SHADER, :fragshader)
program = compile_program(vertshader, fragshader)
scene = (
Vec4f0(10),
cam[Projection],
cam[View],
cam[ProjectionView],
Vec2f0(widths(cam[Area]))
)
scene_buff = UniformBuffer(scene) # create UniformBuffer GL_STATIC_DRAW
FieldTraits.on(cam, ProjectionView) do projview
# write new values to scene buffer.. if not doing this, timings stay the same
scene_buff[1] = (
Vec4f0(10),
cam[Projection],
cam[View],
projview,
Vec2f0(widths(cam[Area]))
)
end
vals = (Vec4f0(1, 0, 0, 1), eye(Mat4f0))
uniform_array = UniformBuffer(typeof(vals))
function loadmeshes(folder)
# load 1024 meshes
meshpaths = filter(x-> endswith(x, ".ifs"), readdir(folder))[1:1024]
faces = GLTriangle[]
vertices = Tuple{Point3f0, Normal{3, Float32}}[]
fidx = 0; vidx = 0;
drawcommands = Vector{Command}(length(meshpaths))
for (i, meshpath) in enumerate(meshpaths)
mesh = read_ifs(joinpath(folder, meshpath))
fs, vs = mesh.indexes[1], mesh.parent
append!(faces, fs)
ns = normals(vs, fs)
append!(vertices, zip(vs, ns))
mini, maxi = extrema(mesh.parent)
x, y = ind2sub((32, 32), i)
trans = translationmatrix(Vec3f0(x, y, 0f0))
s = maximum(maxi .- mini)
scale = scalematrix(Vec3f0(1f0 ./ s))
# add uniform attributes to buffer
push!(uniform_array, (
Vec4f0(rand(Vec3f0)..., 1f0),
trans * scale * translationmatrix(-Vec3f0(mini))
))
drawcommands[i] = Command(length(fs) * 3, 1, fidx, vidx, 0)
fidx += length(fs) * 3; vidx += length(vs)
end
vbo = VertexArray(view(vertices, faces)) # vertexarray
ibuff = GLBuffer(drawcommands, buffertype = GL_DRAW_INDIRECT_BUFFER)
vbo, ibuff
end
vbo, commandbuff = loadmeshes(homedir() * "/3dstuff/models")
sceneidx = glGetUniformBlockIndex(program, "Scene")
vertex_arts_idx = glGetUniformBlockIndex(program, "VertexArguments")
glUniformBlockBinding(program, sceneidx, 0)
glUniformBlockBinding(program, vertex_arts_idx, 1)
glBindBufferBase(GL_UNIFORM_BUFFER, 0, scene_buff.buffer.id)
glBindBufferBase(GL_UNIFORM_BUFFER, 1, uniform_array.buffer.id)
function renderloop(window, N, frame_times, commandbuff)
glUseProgram(program)
glEnable(GL_DEPTH_TEST)
glClearColor(1, 1, 1, 1)
GLAbstraction.bind(commandbuff)
n = 0
while isopen(window) && n <= N
tic()
glFinish() # make sure we time the real thing
GLWindow.poll_glfw()
#glBindVertexArray(vbo.id) doesn't change timing much
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
glMultiDrawElementsIndirect(
GL_TRIANGLES,
GL_UNSIGNED_INT,
C_NULL, length(commandbuff), 0
)
#glBindVertexArray(0)
GLWindow.swapbuffers(window)
push!(frame_times, toq())
n += 1
end
frame_times
end
times = Float64[]
renderloop(window, 2000, times, commandbuff)
mean(times) * 1000 # ~ 14 ms
GPU 是 FirePro 9100。
旧管道的计时:每帧约 13 毫秒。新原型(prototype):~15 毫秒和 0.2 毫秒,没有调用 glMultiDrawElementsIndirect。
我还尝试打开和关闭 vsync,并稍微移动了代码,在时间上没有任何差异。新原型(prototype)手感也不太顺畅,看来这不仅仅是测量问题。
最佳答案
glMultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_INT, C_NULL,长度(命令缓冲区),0 )
这个参数应该是你要绘制多少个元素。将 1024 放在这里以查看它是否解决了性能问题。
关于opengl - glMultiDrawElementsIndirect 很慢,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43646127/
我正在重写我的旧渲染管道。我根据自己的喜好创建了一个非常精简的原型(prototype),令我震惊的是,我原来相当复杂且优化不佳的管道与 super 简单的原型(prototype)具有完全相同的性能
最初使用glDrawElementsInstancedBaseVertex 绘制场景网格。所有网格顶点属性都交错在单个缓冲区对象中。总共只有 30 个独特的网格。因此,我已经使用实例计数等调用了 30
我不明白这个命令结构是如何工作的。除了 firstIndex 之外,所有这些似乎都有意义(在文档中;我实际上还没有调用该函数)。实际上,在我看来文档中好像有错字。 这是我在查找相关文档时似乎在每个地方
我是一名优秀的程序员,十分优秀!