dolphin/Source/Core/VideoBackends/D3D/TextureCache.cpp
Markus Wick 491c10ec96 VideoBackends: Use VideoCommon shader generators for efb2tex copies.
This will generate one shader per copy format. For now, it is the same
shader with the colmat hard coded. So it should already improve the GPU
performance a bit, but a rewrite of the shader generator is suggested.

Half of the patch is done by linkmauve1:
VideoCommon: Reorganise the shader writes.
2017-12-02 15:17:39 +01:00

297 lines
10 KiB
C++

// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D/TextureCache.h"
#include <algorithm>
#include <memory>
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DShader.h"
#include "VideoBackends/D3D/D3DState.h"
#include "VideoBackends/D3D/D3DTexture.h"
#include "VideoBackends/D3D/D3DUtil.h"
#include "VideoBackends/D3D/DXTexture.h"
#include "VideoBackends/D3D/FramebufferManager.h"
#include "VideoBackends/D3D/GeometryShaderCache.h"
#include "VideoBackends/D3D/PSTextureEncoder.h"
#include "VideoBackends/D3D/PixelShaderCache.h"
#include "VideoBackends/D3D/VertexShaderCache.h"
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureConfig.h"
#include "VideoCommon/VideoConfig.h"
namespace DX11
{
static const size_t MAX_COPY_BUFFERS = 32;
static ID3D11Buffer* s_efbcopycbuf[MAX_COPY_BUFFERS] = {0};
static std::unique_ptr<PSTextureEncoder> g_encoder;
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const EFBRectangle& src_rect, bool scale_by_half)
{
g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
scale_by_half);
}
const char palette_shader[] =
R"HLSL(
sampler samp0 : register(s0);
Texture2DArray Tex0 : register(t0);
Buffer<uint> Tex1 : register(t1);
uniform float Multiply;
uint Convert3To8(uint v)
{
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
}
uint Convert4To8(uint v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
uint Convert5To8(uint v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
uint Convert6To8(uint v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}
float4 DecodePixel_RGB5A3(uint val)
{
int r,g,b,a;
if ((val&0x8000))
{
r=Convert5To8((val>>10) & 0x1f);
g=Convert5To8((val>>5 ) & 0x1f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
}
else
{
a=Convert3To8((val>>12) & 0x7);
r=Convert4To8((val>>8 ) & 0xf);
g=Convert4To8((val>>4 ) & 0xf);
b=Convert4To8((val ) & 0xf);
}
return float4(r, g, b, a) / 255;
}
float4 DecodePixel_RGB565(uint val)
{
int r, g, b, a;
r = Convert5To8((val >> 11) & 0x1f);
g = Convert6To8((val >> 5) & 0x3f);
b = Convert5To8((val) & 0x1f);
a = 0xFF;
return float4(r, g, b, a) / 255;
}
float4 DecodePixel_IA8(uint val)
{
int i = val & 0xFF;
int a = val >> 8;
return float4(i, i, i, a) / 255;
}
void main(
out float4 ocol0 : SV_Target,
in float4 pos : SV_Position,
in float3 uv0 : TEXCOORD0)
{
uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r;
src = Tex1.Load(src);
src = ((src << 8) & 0xFF00) | (src >> 8);
ocol0 = DECODE(src);
}
)HLSL";
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
const void* palette, TLUTFormat format)
{
DXTexture* source_texture = static_cast<DXTexture*>(source->texture.get());
DXTexture* destination_texture = static_cast<DXTexture*>(destination->texture.get());
g_renderer->ResetAPIState();
// stretch picture with increased internal resolution
const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, static_cast<float>(source->GetWidth()),
static_cast<float>(source->GetHeight()));
D3D::context->RSSetViewports(1, &vp);
D3D11_BOX box{0, 0, 0, 512, 1, 1};
D3D::context->UpdateSubresource(palette_buf, 0, &box, palette, 0, 0);
D3D::stateman->SetTexture(1, palette_buf_srv);
// TODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(palette_uniform);
const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight());
D3D::SetPointCopySampler();
// Make sure we don't draw with the texture set as both a source and target.
// (This can happen because we don't unbind textures when we free them.)
D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV());
D3D::stateman->Apply();
D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(),
nullptr);
// Create texture copy
D3D::drawShadedTexQuad(
source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(),
source->GetHeight(), palette_pixel_shader[static_cast<int>(format)],
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(),
GeometryShaderCache::GetCopyGeometryShader());
FramebufferManager::BindEFBRenderTarget();
g_renderer->RestoreAPIState();
}
ID3D11PixelShader* GetConvertShader(const char* Type)
{
std::string shader = "#define DECODE DecodePixel_";
shader.append(Type);
shader.append("\n");
shader.append(palette_shader);
return D3D::CompileAndCreatePixelShader(shader);
}
TextureCache::TextureCache()
{
// FIXME: Is it safe here?
g_encoder = std::make_unique<PSTextureEncoder>();
g_encoder->Init();
palette_buf = nullptr;
palette_buf_srv = nullptr;
palette_uniform = nullptr;
palette_pixel_shader[static_cast<int>(TLUTFormat::IA8)] = GetConvertShader("IA8");
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB565)] = GetConvertShader("RGB565");
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3");
auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE);
HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf);
CHECK(SUCCEEDED(hr), "create palette decoder lut buffer");
D3D::SetDebugObjectName(palette_buf, "texture decoder lut buffer");
// TODO: C14X2 format.
auto outlutUavDesc =
CD3D11_SHADER_RESOURCE_VIEW_DESC(palette_buf, DXGI_FORMAT_R16_UINT, 0, 256, 0);
hr = D3D::device->CreateShaderResourceView(palette_buf, &outlutUavDesc, &palette_buf_srv);
CHECK(SUCCEEDED(hr), "create palette decoder lut srv");
D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv");
const D3D11_BUFFER_DESC cbdesc =
CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform);
CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer");
D3D::SetDebugObjectName(palette_uniform,
"a constant buffer used in TextureCache::CopyRenderTargetToTexture");
}
TextureCache::~TextureCache()
{
for (unsigned int k = 0; k < MAX_COPY_BUFFERS; ++k)
SAFE_RELEASE(s_efbcopycbuf[k]);
g_encoder->Shutdown();
g_encoder.reset();
SAFE_RELEASE(palette_buf);
SAFE_RELEASE(palette_buf_srv);
SAFE_RELEASE(palette_uniform);
for (ID3D11PixelShader*& shader : palette_pixel_shader)
SAFE_RELEASE(shader);
}
void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity)
{
auto* destination_texture = static_cast<DXTexture*>(entry->texture.get());
// When copying at half size, in multisampled mode, resolve the color/depth buffer first.
// This is because multisampled texture reads go through Load, not Sample, and the linear
// filter is ignored.
bool multisampled = (g_ActiveConfig.iMultisamples > 1);
ID3D11ShaderResourceView* efbTexSRV = is_depth_copy ?
FramebufferManager::GetEFBDepthTexture()->GetSRV() :
FramebufferManager::GetEFBColorTexture()->GetSRV();
if (multisampled && scale_by_half)
{
multisampled = false;
efbTexSRV = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() :
FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
}
g_renderer->ResetAPIState();
// stretch picture with increased internal resolution
const D3D11_VIEWPORT vp =
CD3D11_VIEWPORT(0.f, 0.f, static_cast<float>(destination_texture->GetConfig().width),
static_cast<float>(destination_texture->GetConfig().height));
D3D::context->RSSetViewports(1, &vp);
// set transformation
if (nullptr == s_efbcopycbuf[cbuf_id])
{
const D3D11_BUFFER_DESC cbdesc =
CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = colmat;
HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &s_efbcopycbuf[cbuf_id]);
CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbuf_id);
D3D::SetDebugObjectName(s_efbcopycbuf[cbuf_id],
"a constant buffer used in TextureCache::CopyRenderTargetToTexture");
}
D3D::stateman->SetPixelConstants(s_efbcopycbuf[cbuf_id]);
const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect);
// TODO: try targetSource.asRECT();
const D3D11_RECT sourcerect =
CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom);
// Use linear filtering if (bScaleByHalf), use point filtering otherwise
if (scale_by_half)
D3D::SetLinearCopySampler();
else
D3D::SetPointCopySampler();
// Make sure we don't draw with the texture set as both a source and target.
// (This can happen because we don't unbind textures when we free them.)
D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV());
D3D::stateman->Apply();
D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(),
nullptr);
// Create texture copy
D3D::drawShadedTexQuad(
efbTexSRV, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(),
is_depth_copy ? PixelShaderCache::GetDepthMatrixProgram(multisampled) :
PixelShaderCache::GetColorMatrixProgram(multisampled),
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(),
GeometryShaderCache::GetCopyGeometryShader());
FramebufferManager::BindEFBRenderTarget();
g_renderer->RestoreAPIState();
}
}