gpt4 book ai didi

directx - 使用 DirectX11 像素着色器在 GPU 中从 DXGI_FORMAT_B8G8R8A8_UNORM 到 NV12 的颜色转换

转载 作者:行者123 更新时间:2023-12-04 04:23:43 24 4
gpt4 key购买 nike

我正在编写代码以使用桌面复制捕获桌面并使用英特尔硬件MFT 将其编码为 h264。编码器只接受 NV12 格式作为输入。我有一个工作正常的 DXGI_FORMAT_B8G8R8A8_UNORM 到 NV12 转换器(https://github.com/NVIDIA/video-sdk-samples/blob/master/nvEncDXGIOutputDuplicationSample/Preproc.cpp),它基于 DirectX VideoProcessor。

问题是某些英特尔图形硬件上的 VideoProcessor 仅支持从 DXGI_FORMAT_B8G8R8A8_UNORM 到 YUY2 但不支持 NV12 的转换,我通过 GetVideoProcessorOutputFormats 枚举支持的格式确认了这一点。虽然 VideoProcessor Blt 没有任何错误成功,而且我可以看到输出视频中的帧有点像素化,但如果我仔细观察的话,我会注意到它。

我猜,VideoProcessor 只是简单地故障转移到下一个支持的输出格式(YUY2),我在不知不觉中将它提供给认为输入在 NV12 中配置的编码器。由于 NV12 和 YUY2 之间的字节顺序和二次采样等差异很小,因此没有帧的故障或严重损坏。此外,我在支持 NV12 转换的硬件上没有像素化问题。

所以我决定使用基于此代码(https://github.com/bavulapati/DXGICaptureDXColorSpaceConversionIntelEncode/blob/master/DXGICaptureDXColorSpaceConversionIntelEncode/DuplicationManager.cpp)的像素着色器进行颜色转换。我能够使像素着色器工作,我还在这里上传了我的代码(https://codeshare.io/5PJjxP)以供引用(尽可能简化它)。

Now, I'm left with two channels, chroma, and luma respectively (ID3D11Texture2D textures). And I'm really confused about efficiently packing the two separate channels into one ID3D11Texture2D texture so that I may feed the same to the encoder. Is there a way to efficiently pack the Y and UV channels into a single ID3D11Texture2D in GPU? I'm really tired of CPU based approaches due to the fact that it's costly, and doesn't offer the best possible frame rates. In fact, I'm reluctant to even copy the textures to CPU. I'm thinking of a way to do it in GPU without any back and forth copies between CPU and GPU.



我一直在研究这个很长一段时间没有任何进展,任何帮助将不胜感激。
/**
* This method is incomplete. It's just a template of what I want to achieve.
*/

HRESULT CreateNV12TextureFromLumaAndChromaSurface(ID3D11Texture2D** pOutputTexture)
{
HRESULT hr = S_OK;

try
{
//Copying from GPU to CPU. Bad :(
m_pD3D11DeviceContext->CopyResource(m_CPUAccessibleLuminanceSurf, m_LuminanceSurf);

D3D11_MAPPED_SUBRESOURCE resource;
UINT subresource = D3D11CalcSubresource(0, 0, 0);

HRESULT hr = m_pD3D11DeviceContext->Map(m_CPUAccessibleLuminanceSurf, subresource, D3D11_MAP_READ, 0, &resource);

BYTE* sptr = reinterpret_cast<BYTE*>(resource.pData);
BYTE* dptrY = nullptr; // point to the address of Y channel in output surface

//Store Image Pitch
int m_ImagePitch = resource.RowPitch;

int height = GetImageHeight();
int width = GetImageWidth();

for (int i = 0; i < height; i++)
{
memcpy_s(dptrY, m_ImagePitch, sptr, m_ImagePitch);

sptr += m_ImagePitch;
dptrY += m_ImagePitch;
}

m_pD3D11DeviceContext->Unmap(m_CPUAccessibleLuminanceSurf, subresource);

//Copying from GPU to CPU. Bad :(
m_pD3D11DeviceContext->CopyResource(m_CPUAccessibleChrominanceSurf, m_ChrominanceSurf);
hr = m_pD3D11DeviceContext->Map(m_CPUAccessibleChrominanceSurf, subresource, D3D11_MAP_READ, 0, &resource);

sptr = reinterpret_cast<BYTE*>(resource.pData);
BYTE* dptrUV = nullptr; // point to the address of UV channel in output surface

m_ImagePitch = resource.RowPitch;
height /= 2;
width /= 2;

for (int i = 0; i < height; i++)
{
memcpy_s(dptrUV, m_ImagePitch, sptr, m_ImagePitch);

sptr += m_ImagePitch;
dptrUV += m_ImagePitch;
}

m_pD3D11DeviceContext->Unmap(m_CPUAccessibleChrominanceSurf, subresource);
}
catch(HRESULT){}

return hr;
}

绘制 NV12:
 //
// Draw frame for NV12 texture
//
HRESULT DrawNV12Frame(ID3D11Texture2D* inputTexture)
{
HRESULT hr;

// If window was resized, resize swapchain
if (!m_bIntialized)
{
HRESULT Ret = InitializeNV12Surfaces(inputTexture);
if (!SUCCEEDED(Ret))
{
return Ret;
}

m_bIntialized = true;
}

m_pD3D11DeviceContext->CopyResource(m_ShaderResourceSurf, inputTexture);

D3D11_TEXTURE2D_DESC FrameDesc;
m_ShaderResourceSurf->GetDesc(&FrameDesc);

D3D11_SHADER_RESOURCE_VIEW_DESC ShaderDesc;
ShaderDesc.Format = FrameDesc.Format;
ShaderDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
ShaderDesc.Texture2D.MostDetailedMip = FrameDesc.MipLevels - 1;
ShaderDesc.Texture2D.MipLevels = FrameDesc.MipLevels;

// Create new shader resource view
ID3D11ShaderResourceView* ShaderResource = nullptr;
hr = m_pD3D11Device->CreateShaderResourceView(m_ShaderResourceSurf, &ShaderDesc, &ShaderResource);

IF_FAILED_THROW(hr);

m_pD3D11DeviceContext->PSSetShaderResources(0, 1, &ShaderResource);

// Set resources
m_pD3D11DeviceContext->OMSetRenderTargets(1, &m_pLumaRT, nullptr);
m_pD3D11DeviceContext->PSSetShader(m_pPixelShaderLuma, nullptr, 0);
m_pD3D11DeviceContext->RSSetViewports(1, &m_VPLuminance);

// Draw textured quad onto render target
m_pD3D11DeviceContext->Draw(NUMVERTICES, 0);

m_pD3D11DeviceContext->OMSetRenderTargets(1, &m_pChromaRT, nullptr);
m_pD3D11DeviceContext->PSSetShader(m_pPixelShaderChroma, nullptr, 0);
m_pD3D11DeviceContext->RSSetViewports(1, &m_VPChrominance);

// Draw textured quad onto render target
m_pD3D11DeviceContext->Draw(NUMVERTICES, 0);

// Release shader resource
ShaderResource->Release();
ShaderResource = nullptr;

return S_OK;
}

初始化着色器:
void SetViewPort(D3D11_VIEWPORT* VP, UINT Width, UINT Height)
{
VP->Width = static_cast<FLOAT>(Width);
VP->Height = static_cast<FLOAT>(Height);
VP->MinDepth = 0.0f;
VP->MaxDepth = 1.0f;
VP->TopLeftX = 0;
VP->TopLeftY = 0;
}

HRESULT MakeRTV(ID3D11RenderTargetView** pRTV, ID3D11Texture2D* pSurf)
{
if (*pRTV)
{
(*pRTV)->Release();
*pRTV = nullptr;
}
// Create a render target view
HRESULT hr = m_pD3D11Device->CreateRenderTargetView(pSurf, nullptr, pRTV);

IF_FAILED_THROW(hr);

return S_OK;
}

HRESULT InitializeNV12Surfaces(ID3D11Texture2D* inputTexture)
{
ReleaseSurfaces();

D3D11_TEXTURE2D_DESC lOutputDuplDesc;
inputTexture->GetDesc(&lOutputDuplDesc);


// Create shared texture for all duplication threads to draw into
D3D11_TEXTURE2D_DESC DeskTexD;
RtlZeroMemory(&DeskTexD, sizeof(D3D11_TEXTURE2D_DESC));
DeskTexD.Width = lOutputDuplDesc.Width;
DeskTexD.Height = lOutputDuplDesc.Height;
DeskTexD.MipLevels = 1;
DeskTexD.ArraySize = 1;
DeskTexD.Format = lOutputDuplDesc.Format;
DeskTexD.SampleDesc.Count = 1;
DeskTexD.Usage = D3D11_USAGE_DEFAULT;
DeskTexD.BindFlags = D3D11_BIND_SHADER_RESOURCE;

HRESULT hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_ShaderResourceSurf);
IF_FAILED_THROW(hr);

DeskTexD.Format = DXGI_FORMAT_R8_UNORM;
DeskTexD.BindFlags = D3D11_BIND_RENDER_TARGET;

hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_LuminanceSurf);
IF_FAILED_THROW(hr);

DeskTexD.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
DeskTexD.Usage = D3D11_USAGE_STAGING;
DeskTexD.BindFlags = 0;

hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, NULL, &m_CPUAccessibleLuminanceSurf);
IF_FAILED_THROW(hr);

SetViewPort(&m_VPLuminance, DeskTexD.Width, DeskTexD.Height);

HRESULT Ret = MakeRTV(&m_pLumaRT, m_LuminanceSurf);
if (!SUCCEEDED(Ret))
return Ret;

DeskTexD.Width = lOutputDuplDesc.Width / 2;
DeskTexD.Height = lOutputDuplDesc.Height / 2;
DeskTexD.Format = DXGI_FORMAT_R8G8_UNORM;

DeskTexD.Usage = D3D11_USAGE_DEFAULT;
DeskTexD.CPUAccessFlags = 0;
DeskTexD.BindFlags = D3D11_BIND_RENDER_TARGET;

hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, nullptr, &m_ChrominanceSurf);
IF_FAILED_THROW(hr);

DeskTexD.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
DeskTexD.Usage = D3D11_USAGE_STAGING;
DeskTexD.BindFlags = 0;

hr = m_pD3D11Device->CreateTexture2D(&DeskTexD, NULL, &m_CPUAccessibleChrominanceSurf);
IF_FAILED_THROW(hr);

SetViewPort(&m_VPChrominance, DeskTexD.Width, DeskTexD.Height);
return MakeRTV(&m_pChromaRT, m_ChrominanceSurf);
}

HRESULT InitVertexShader(ID3D11VertexShader** ppID3D11VertexShader)
{
HRESULT hr = S_OK;
UINT Size = ARRAYSIZE(g_VS);

try
{
IF_FAILED_THROW(m_pD3D11Device->CreateVertexShader(g_VS, Size, NULL, ppID3D11VertexShader));;

m_pD3D11DeviceContext->VSSetShader(m_pVertexShader, nullptr, 0);

// Vertices for drawing whole texture
VERTEX Vertices[NUMVERTICES] =
{
{ XMFLOAT3(-1.0f, -1.0f, 0), XMFLOAT2(0.0f, 1.0f) },
{ XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f) },
{ XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f) },
{ XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f) },
{ XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f) },
{ XMFLOAT3(1.0f, 1.0f, 0), XMFLOAT2(1.0f, 0.0f) },
};

UINT Stride = sizeof(VERTEX);
UINT Offset = 0;

D3D11_BUFFER_DESC BufferDesc;
RtlZeroMemory(&BufferDesc, sizeof(BufferDesc));
BufferDesc.Usage = D3D11_USAGE_DEFAULT;
BufferDesc.ByteWidth = sizeof(VERTEX) * NUMVERTICES;
BufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
BufferDesc.CPUAccessFlags = 0;
D3D11_SUBRESOURCE_DATA InitData;
RtlZeroMemory(&InitData, sizeof(InitData));
InitData.pSysMem = Vertices;

// Create vertex buffer
IF_FAILED_THROW(m_pD3D11Device->CreateBuffer(&BufferDesc, &InitData, &m_VertexBuffer));

m_pD3D11DeviceContext->IASetVertexBuffers(0, 1, &m_VertexBuffer, &Stride, &Offset);
m_pD3D11DeviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

D3D11_INPUT_ELEMENT_DESC Layout[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 }
};

UINT NumElements = ARRAYSIZE(Layout);
hr = m_pD3D11Device->CreateInputLayout(Layout, NumElements, g_VS, Size, &m_pVertexLayout);

m_pD3D11DeviceContext->IASetInputLayout(m_pVertexLayout);
}
catch (HRESULT) {}

return hr;
}

HRESULT InitPixelShaders()
{
HRESULT hr = S_OK;
// Refer https://codeshare.io/5PJjxP for g_PS_Y & g_PS_UV blobs
try
{
UINT Size = ARRAYSIZE(g_PS_Y);
hr = m_pD3D11Device->CreatePixelShader(g_PS_Y, Size, nullptr, &m_pPixelShaderChroma);

IF_FAILED_THROW(hr);

Size = ARRAYSIZE(g_PS_UV);
hr = m_pD3D11Device->CreatePixelShader(g_PS_UV, Size, nullptr, &m_pPixelShaderLuma);

IF_FAILED_THROW(hr);
}
catch (HRESULT) {}

return hr;
}

最佳答案

我正在使用 DirectX11 仅在 GPU 中试验这种 RGBA 到 NV12 的转换。

这是一个很好的挑战。我对 Directx11 不熟悉,所以这是我的第一次实验。

检查此项目的更新:D3D11ShaderNV12

在我当前的实现中(可能不是最后一个),这就是我所做的:

  • 第 1 步:使用 DXGI_FORMAT_B8G8R8A8_UNORM 作为输入纹理
  • 第 2 步:制作第 1 遍着色器以获得 3 个纹理(Y:Luma、U:ChromaCb 和 V:ChromaCr):参见 YCbCrPS2.hlsl
  • 第 3 步:Y 是 DXGI_FORMAT_R8_UNORM,准备好最终的 NV12 纹理
  • 第 4 步:UV 需要在第二遍着色器中进行下采样:参见 ScreenPS2.hlsl(使用线性过滤)
  • 第 5 步:第三遍着色器对 Y 纹理进行采样
  • 第 6 步:第四遍着色器使用移位纹理对 UV 纹理进行采样(我认为可以使用其他技术)

  • ShaderNV12

    我的最终纹理不是 DXGI_FORMAT_NV12,而是类似的 DXGI_FORMAT_R8_UNORM 纹理。我的电脑是Windows7,所以DXGI_FORMAT_NV12没有处理。稍后我将在另一台计算机上尝试。

    有图的过程:

    RenderTarget

    关于directx - 使用 DirectX11 像素着色器在 GPU 中从 DXGI_FORMAT_B8G8R8A8_UNORM 到 NV12 的颜色转换,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58980202/

    24 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com