gpt4 book ai didi

xna - HLSL 像素着色器光照性能 (XNA)

转载 作者:行者123 更新时间:2023-12-04 05:07:58 29 4
gpt4 key购买 nike

我有一个足够简单的着色器,它支持多个点光源。
灯光存储为一组灯光结构(最大大小),当事件灯光发生变化时,我会传入事件灯光的数量。
问题出在 PixelShader 函数中:
这是基本的东西,从纹理中获取基色,遍历 0 到 numActiveLights 的灯光数组并添加效果,它工作正常,但性能很糟糕!
但是,如果我用相同值的常量替换对全局 var numActiveLights 的引用,则性能很好。
我只是无法理解为什么引用变量会产生 30+ fps 的差异。

谁能解释一下?

完整着色器代码:

#define MAX_POINT_LIGHTS 16

struct PointLight
{
float3 Position;
float4 Color;
float Radius;
};

float4x4 World;
float4x4 View;
float4x4 Projection;
float3 CameraPosition;

float4 SpecularColor;
float SpecularPower;
float SpecularIntensity;
float4 AmbientColor;
float AmbientIntensity;
float DiffuseIntensity;

int activeLights;
PointLight lights[MAX_POINT_LIGHTS];

bool IsLightingEnabled;
bool IsAmbientLightingEnabled;
bool IsDiffuseLightingEnabled;
bool IsSpecularLightingEnabled;


Texture Texture;
sampler TextureSampler = sampler_state
{
Texture = <Texture>;

Magfilter = POINT;
Minfilter = POINT;
Mipfilter = POINT;

AddressU = WRAP;
AddressV = WRAP;
};

struct VS_INPUT
{
float4 Position : POSITION0;
float2 TexCoord : TEXCOORD0;
float3 Normal : NORMAL0;
};

struct VS_OUTPUT
{
float3 WorldPosition : TEXCOORD0;
float4 Position : POSITION0;
float3 Normal : TEXCOORD1;
float2 TexCoord : TEXCOORD2;
float3 ViewDir : TEXCOORD3;

};

VS_OUTPUT VS_PointLighting(VS_INPUT input)
{
VS_OUTPUT output;

float4 worldPosition = mul(input.Position, World);
output.WorldPosition = worldPosition;

float4 viewPosition = mul(worldPosition, View);
output.Position = mul(viewPosition, Projection);

output.Normal = normalize(mul(input.Normal, World));
output.TexCoord = input.TexCoord;
output.ViewDir = normalize(CameraPosition - worldPosition);

return output;
}

float4 PS_PointLighting(VS_OUTPUT IN) : COLOR
{
if(!IsLightingEnabled) return tex2D(TextureSampler,IN.TexCoord);

float4 color = float4(0.0f, 0.0f, 0.0f, 0.0f);

float3 n = normalize(IN.Normal);
float3 v = normalize(IN.ViewDir);
float3 l = float3(0.0f, 0.0f, 0.0f);
float3 h = float3(0.0f, 0.0f, 0.0f);

float atten = 0.0f;
float nDotL = 0.0f;
float power = 0.0f;

if(IsAmbientLightingEnabled) color += (AmbientColor*AmbientIntensity);

if(IsDiffuseLightingEnabled || IsSpecularLightingEnabled)
{
//for (int i = 0; i < activeLights; ++i)//works but perfoemnce is terrible
for (int i = 0; i < 7; ++i)//performance is fine but obviously isn't dynamic
{
l = (lights[i].Position - IN.WorldPosition) / lights[i].Radius;
atten = saturate(1.0f - dot(l, l));

l = normalize(l);

nDotL = saturate(dot(n, l));

if(IsDiffuseLightingEnabled) color += (lights[i].Color * nDotL * atten);
if(IsSpecularLightingEnabled) color += (SpecularColor * SpecularPower * atten);
}
}

return color * tex2D(TextureSampler, IN.TexCoord);
}

technique PerPixelPointLighting
{
pass
{
VertexShader = compile vs_3_0 VS_PointLighting();
PixelShader = compile ps_3_0 PS_PointLighting();
}
}

最佳答案

我的猜测是将循环约束更改为编译时常量允许 HLSL 编译器展开循环。也就是说,而不是这样:

for (int i = 0; i < 7; i++)
doLoopyStuff();

变成这样了:
doLoopyStuff();
doLoopyStuff();
doLoopyStuff();
doLoopyStuff();
doLoopyStuff();
doLoopyStuff();
doLoopyStuff();

循环和条件分支可能会严重影响着色器代码的性能,应尽可能避免。

编辑

这只是我的头顶,但也许你可以尝试这样的事情?
for (int i = 0; i < MAX_LIGHTS; i++)
{
color += step(i, activeLights) * lightingFunction();
}

通过这种方式,您可以计算所有可能的灯光,但对于不事件的灯光始终获得 0 值。当然,好处将取决于照明功能的复杂性;你需要做更多的分析。

关于xna - HLSL 像素着色器光照性能 (XNA),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15277843/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com