gpt4 book ai didi

c++ - 使用 OpenGL 3.3 实例化似乎很慢

转载 作者:塔克拉玛干 更新时间:2023-11-03 07:16:37 25 4
gpt4 key购买 nike

我用 C++ 编写了一个最小的代码示例,它渲染 10000 色屏幕上的四边形。我正在使用“实例化”,所以只更新每个四边形每帧的模型矩阵。 6个顶点的数据存储在一个单独的 VBO 中,并且将一直重复使用。投影矩阵(正交)在程序启动时注入(inject)一次通过制服。模型矩阵是在 CPU 上使用库 GLM 计算的。我测量了渲染时间,得到的平均 FPS 仅为 52。我认为这是很多或更少,但我无法在我的小示例程序中找到错误/瓶颈。

经过一些分析,似乎是用 GLM 完成的 3 个计算很慢。我在这里做错了什么吗?例如,如果我删除了旋转计算,我得到了 10 FPS 的 FPS 提升!也许你可以帮助我找出我可以在这里做得更好的地方以及如何做我可以优化我的样本吗?对我来说很重要,每个四边形在运行时都是单独配置的,所以我决定使用实例化。将矩阵计算转移到 GPU 似乎是另一种选择,但我真的很困惑,为什么 CPU 在计算 10000 时有这么多问题模型矩阵!好的,我的 CPU 非常糟糕(Athlon 2 Core-Duo M300,GPU 是 ATI Mobility Radeon 4100),但它应该在不可测量的时间内完成这项任务,或者?

这是最小的、完全工作的、可编译的例子(如果你有 GLFW 和 GLM)。也许有人有时间可以帮我解决这个问题 :)

#define GLEW_STATIC
#define GLM_FORCE_INLINE
#define GLM_FORCE_SSE2
#include "glew.h"
#include "glfw3.h"
#include "glm.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include <conio.h>
#include <cstdlib>
#include <iostream>
#include <ctime>

GLuint buildShader()
{
std::string strVSCode =
"#version 330 core\n"
"in vec3 vertexPosition;\n"
"in mat4 modelMatrix;\n"
"uniform mat4 projectionMatrix;\n"
"out vec4 m_color;\n"
"void main() {\n"
" vec4 vecVertex = vec4(vertexPosition, 1);\n"
" gl_Position = projectionMatrix * modelMatrix * vecVertex;\n"
" m_color = gl_Position;\n"
"}\n";

std::string strFSCode = "#version 330 core\n"
"out vec4 frag_colour;\n"
"in vec4 m_color;\n"
"void main() {\n"
" frag_colour = vec4(m_color.x, m_color.y, m_color.z, 0.5f);\n"
"}\n";

GLuint gluiVertexShaderId = glCreateShader(GL_VERTEX_SHADER);
char const * VertexSourcePointer = strVSCode.c_str();
glShaderSource(gluiVertexShaderId, 1, &VertexSourcePointer, NULL);
glCompileShader(gluiVertexShaderId);
GLuint gluiFragmentShaderId = glCreateShader(GL_FRAGMENT_SHADER);
char const * FragmentSourcePointer = strFSCode.c_str();
glShaderSource(gluiFragmentShaderId, 1, &FragmentSourcePointer, NULL);
glCompileShader(gluiFragmentShaderId);
GLuint gluiProgramId = glCreateProgram();
glAttachShader(gluiProgramId, gluiVertexShaderId);
glAttachShader(gluiProgramId, gluiFragmentShaderId);
glLinkProgram(gluiProgramId);
glDeleteShader(gluiVertexShaderId);
glDeleteShader(gluiFragmentShaderId);
return gluiProgramId;
}

struct Sprite
{
glm::vec3 position, dimension;
float speed, rotation, rx, ry;
};

struct Vertex
{
float x, y, z;
Vertex(){};
Vertex(float x, float y, float z) : x(x), y(y), z(z) {}
};

int main(int arc, char **argv)
{
// GLFW init
int displayResWith = 1366; //modify this here
int displayResHeight = 768; //modify this here
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, 1);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RED_BITS, 8);
glfwWindowHint(GLFW_GREEN_BITS, 8);
glfwWindowHint(GLFW_BLUE_BITS, 8);
glfwWindowHint(GLFW_ALPHA_BITS, 8);
glfwWindowHint(GLFW_DEPTH_BITS, 32);
glfwWindowHint(GLFW_STENCIL_BITS, 32);
GLFWwindow* window = glfwCreateWindow(displayResWith, displayResHeight,"Instancing", glfwGetPrimaryMonitor(),NULL);
int width, height;
glfwMakeContextCurrent(window);
glfwSwapInterval(0);
glfwGetFramebufferSize(window, &width, &height);

//GLEW init
glewExperimental = GL_TRUE;
glewInit();
const GLubyte* renderer = glGetString(GL_RENDERER);
const GLubyte* version = glGetString(GL_VERSION);
std::cout << "Renderer: " << renderer << std::endl;
std::cout << "OpenGL supported version: " << version << std::endl;

//OpenGL init
glEnable(GL_CULL_FACE);
glCullFace(GL_BACK);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glClearColor(255.0f, 255.0f, 255.0f, 255.0f);

//Shader
GLuint programID = buildShader();

//VBO vertexBuffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
Vertex VertexBufferData[6];
VertexBufferData[0] = Vertex(-0.5f, 0.5f, 0.0f); //Links oben
VertexBufferData[1] = Vertex(-0.5f, -0.5f, 0.0f); //Links unten
VertexBufferData[2] = Vertex(0.5f, -0.5f, 0.0f); //Rechts unten
VertexBufferData[3] = VertexBufferData[2]; //Rechts unten
VertexBufferData[4] = Vertex(0.5f, 0.5f, 0.0f); //Rechts oben
VertexBufferData[5] = VertexBufferData[0]; //Links oben
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex)*6, VertexBufferData, GL_STATIC_DRAW);

//VBO instanceBuffer
GLuint instanceBuffer;
glGenBuffers(1, &instanceBuffer);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
int iMaxInstanceCount = 30000;
glm::mat4 *ptrInstanceBufferData = new glm::mat4[iMaxInstanceCount];
glBufferData(GL_ARRAY_BUFFER, iMaxInstanceCount * sizeof(glm::mat4), NULL, GL_STREAM_DRAW);

//VAO - Start
GLuint vertexArrayObject;
glGenVertexArrays(1, &vertexArrayObject);
glBindVertexArray(vertexArrayObject);

//For VBO vertexbuffer
glEnableVertexAttribArray(glGetAttribLocation(programID, "vertexPosition"));
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glVertexAttribPointer(
glGetAttribLocation(programID, "vertexPosition"),
3,
GL_FLOAT,
GL_FALSE,
sizeof(Vertex),
(void*)0
);

glVertexAttribDivisor(0, 0);

//For VBO instanceBuffer
int pos = glGetAttribLocation(programID, "modelMatrix");
int pos1 = pos + 0;
int pos2 = pos + 1;
int pos3 = pos + 2;
int pos4 = pos + 3;
glEnableVertexAttribArray(pos1);
glEnableVertexAttribArray(pos2);
glEnableVertexAttribArray(pos3);
glEnableVertexAttribArray(pos4);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
glVertexAttribPointer(pos1, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(0));
glVertexAttribPointer(pos2, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 4));
glVertexAttribPointer(pos3, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 8));
glVertexAttribPointer(pos4, 4, GL_FLOAT, GL_FALSE, sizeof(GLfloat) * 4 * 4, (void*)(sizeof(float) * 12));
glVertexAttribDivisor(pos1, 1);
glVertexAttribDivisor(pos2, 1);
glVertexAttribDivisor(pos3, 1);
glVertexAttribDivisor(pos4, 1);

glBindVertexArray(0); //VAO - End

//Matrix vars
glm::mat4 Projection, Rotating, Scaling, Translation, Identity;
glm::vec3 ZRotateVec(0.0f, 0.0f, 1.0f);

//Calc projection-matrix and put shader (uniform)
Projection = glm::ortho(0.0f, (float)width, 0.0f, (float)height, 0.0f, 1.0f);
glUseProgram(programID);
glUniformMatrix4fv(glGetUniformLocation(programID, "projectionMatrix"), 1, GL_FALSE, &Projection[0][0]);

//Creating sprites
std::srand(static_cast<unsigned int>(std::time(0)));
int iActInstanceCount = 10000;
Sprite *ptrSprites = new Sprite[iActInstanceCount];
for (int i = 0; i < iActInstanceCount; ++i)
{
ptrSprites[i].dimension = glm::vec3(16, 16, 1.0f);
ptrSprites[i].position = glm::vec3(std::rand()%(width-32),std::rand()%(height-32),-1.0f *((std::rand()%256)/256.0f));
ptrSprites[i].rotation = rand() % 360 + 0.0f;
ptrSprites[i].rx = static_cast<float>(std::rand() % 2);
ptrSprites[i].ry = static_cast<float>(std::rand() % 2);
ptrSprites[i].speed = (std::rand() % 100) + 1.0f;
if (ptrSprites[i].speed < 1.0f) ptrSprites[i].speed = 1.0f;
}

//FPS init
double fFramesRendered = 0.0f;
double fFrameMeasurementStart = 0.0f;
double fFPS = 0.0f;
double fCurrentTime = 0.0f;
glfwSetTime(0);

//Main-loop (also renderloop)
while (!glfwWindowShouldClose(window))
{
//application-logic
if (glfwGetKey(window, GLFW_KEY_ESCAPE)== GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);

const double fNewTime = glfwGetTime();
double fDeltaTime = fNewTime - fCurrentTime;
fCurrentTime = fNewTime;

for (int i = 0; i < iActInstanceCount; ++i)
{
float fSpeed = ptrSprites[i].speed * static_cast<float>(fDeltaTime);
ptrSprites[i].rotation += fSpeed;
if (ptrSprites[i].rotation >= 360.0f) ptrSprites[i].rotation = 0.0f;
if (ptrSprites[i].rx == 1) ptrSprites[i].position.x = ptrSprites[i].position.x + fSpeed;
if (ptrSprites[i].rx == 0) ptrSprites[i].position.x = ptrSprites[i].position.x - fSpeed;
if (ptrSprites[i].ry == 1) ptrSprites[i].position.y = ptrSprites[i].position.y + fSpeed;
if (ptrSprites[i].ry == 0) ptrSprites[i].position.y = ptrSprites[i].position.y - fSpeed;
if (ptrSprites[i].position.x <= 0) ptrSprites[i].rx = 1;
if (ptrSprites[i].position.x + ptrSprites[i].dimension.x >= width) ptrSprites[i].rx = 0;
if (ptrSprites[i].position.y <= 0) ptrSprites[i].ry = 1;
if (ptrSprites[i].position.y + ptrSprites[i].dimension.y >= height) ptrSprites[i].ry = 0;

//matrix-calculations (saved in local buffer)
Translation = glm::translate(Identity, ptrSprites[i].position + glm::vec3(ptrSprites[i].dimension.x / 2.0f, ptrSprites[i].dimension.y / 2.0f, 0.0f));
Scaling = glm::scale(Translation, ptrSprites[i].dimension);
ptrInstanceBufferData[i] = glm::rotate(Scaling, ptrSprites[i].rotation, ZRotateVec);
}

//render-call
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glUseProgram(programID);
glBindVertexArray(vertexArrayObject);
glBindBuffer(GL_ARRAY_BUFFER, instanceBuffer);
glBufferData(GL_ARRAY_BUFFER, iMaxInstanceCount * sizeof(glm::mat4), NULL, GL_STREAM_DRAW); // Buffer orphaning
glBufferSubData(GL_ARRAY_BUFFER, 0, iActInstanceCount * sizeof(glm::mat4), ptrInstanceBufferData);
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, iActInstanceCount);
glBindVertexArray(0);
glfwSwapBuffers(window);
glfwPollEvents();


//FPS-stuff
++fFramesRendered;

if ((fCurrentTime*1000.0f) >= (fFrameMeasurementStart*1000.0f) + 1000.0f)
{
fFPS = ((fCurrentTime*1000.0f) - (fFrameMeasurementStart*1000.0f)) / 1000.0f * fFramesRendered;
fFrameMeasurementStart = fCurrentTime;
fFramesRendered = 0;
std::cout << "FPS: " << fFPS << std::endl;
}
}

//Termination and cleanup
glDeleteBuffers(1, &vertexBuffer);
glDeleteBuffers(1, &instanceBuffer);
glDeleteVertexArrays(1, &vertexArrayObject);
glDeleteProgram(programID);
glfwDestroyWindow(window);
glfwTerminate();
return _getch();
}

最佳答案

好吧,在我的机器上测试后,它肯定是 CPU 有限的,所以你用 OGL 做的任何事情都不会有太大的不同。我至少在 -O1 上使用 GCC 获得大约 ~300fps,但在 -O0 上只有 ~80。我的 CPU 非常快(i7 2600k,4.7ghz),但我的 GPU 相当慢(GT 520)。我也在使用 Ubuntu。

一些可能会加快速度的快速想法:

  • 将顶点位置放入顶点着色器的数组中,并使用 gl_VertexID 访问它们
  • 使用 GL_TRIANGLE_STRIP 而不是 GL_TRIANGLES
  • 角度使用弧度,否则 GLM 必须转换它们

这些都不可能产生太大的影响,真的。只需确保您的编译器设置正确,可能没有其他事情要做。

关于c++ - 使用 OpenGL 3.3 实例化似乎很慢,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32789308/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com