mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-16 11:09:16 +01:00
f74dbc794c
Using 8-bit integer math here lead to precision loss for depth copies, which broke various effects in games, e.g. lens flare in MK:DD. It's unlikely the console implements this as a floating-point multiply (fixed-point perhaps), but since we have the float round trip in our EFB2RAM shaders anyway, it's not going to make things any worse. If we do rewrite our shaders to use integer math completely, then it might be worth switching this conversion back to integers. However, the range of the values (format) should be known, or we should expand all values out to 24-bits first.
319 lines
11 KiB
C++
319 lines
11 KiB
C++
// Copyright 2010 Dolphin Emulator Project
|
|
// Licensed under GPLv2+
|
|
// Refer to the license.txt file included.
|
|
|
|
#include "VideoBackends/D3D/TextureCache.h"
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
|
|
#include "Common/CommonTypes.h"
|
|
#include "Common/Logging/Log.h"
|
|
|
|
#include "VideoBackends/D3D/D3DBase.h"
|
|
#include "VideoBackends/D3D/D3DShader.h"
|
|
#include "VideoBackends/D3D/D3DState.h"
|
|
#include "VideoBackends/D3D/D3DTexture.h"
|
|
#include "VideoBackends/D3D/D3DUtil.h"
|
|
#include "VideoBackends/D3D/DXTexture.h"
|
|
#include "VideoBackends/D3D/FramebufferManager.h"
|
|
#include "VideoBackends/D3D/GeometryShaderCache.h"
|
|
#include "VideoBackends/D3D/PSTextureEncoder.h"
|
|
#include "VideoBackends/D3D/PixelShaderCache.h"
|
|
#include "VideoBackends/D3D/VertexShaderCache.h"
|
|
|
|
#include "VideoCommon/ImageWrite.h"
|
|
#include "VideoCommon/RenderBase.h"
|
|
#include "VideoCommon/TextureConfig.h"
|
|
#include "VideoCommon/VideoConfig.h"
|
|
|
|
namespace DX11
|
|
{
|
|
static std::unique_ptr<PSTextureEncoder> g_encoder;
|
|
|
|
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
|
|
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
|
const EFBRectangle& src_rect, bool scale_by_half, float y_scale,
|
|
float gamma, bool clamp_top, bool clamp_bottom,
|
|
const CopyFilterCoefficientArray& filter_coefficients)
|
|
{
|
|
g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
|
|
scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients);
|
|
}
|
|
|
|
const char palette_shader[] =
|
|
R"HLSL(
|
|
sampler samp0 : register(s0);
|
|
Texture2DArray Tex0 : register(t0);
|
|
Buffer<uint> Tex1 : register(t1);
|
|
uniform float Multiply;
|
|
|
|
uint Convert3To8(uint v)
|
|
{
|
|
// Swizzle bits: 00000123 -> 12312312
|
|
return (v << 5) | (v << 2) | (v >> 1);
|
|
}
|
|
|
|
uint Convert4To8(uint v)
|
|
{
|
|
// Swizzle bits: 00001234 -> 12341234
|
|
return (v << 4) | v;
|
|
}
|
|
|
|
uint Convert5To8(uint v)
|
|
{
|
|
// Swizzle bits: 00012345 -> 12345123
|
|
return (v << 3) | (v >> 2);
|
|
}
|
|
|
|
uint Convert6To8(uint v)
|
|
{
|
|
// Swizzle bits: 00123456 -> 12345612
|
|
return (v << 2) | (v >> 4);
|
|
}
|
|
|
|
float4 DecodePixel_RGB5A3(uint val)
|
|
{
|
|
int r,g,b,a;
|
|
if ((val&0x8000))
|
|
{
|
|
r=Convert5To8((val>>10) & 0x1f);
|
|
g=Convert5To8((val>>5 ) & 0x1f);
|
|
b=Convert5To8((val ) & 0x1f);
|
|
a=0xFF;
|
|
}
|
|
else
|
|
{
|
|
a=Convert3To8((val>>12) & 0x7);
|
|
r=Convert4To8((val>>8 ) & 0xf);
|
|
g=Convert4To8((val>>4 ) & 0xf);
|
|
b=Convert4To8((val ) & 0xf);
|
|
}
|
|
return float4(r, g, b, a) / 255;
|
|
}
|
|
|
|
float4 DecodePixel_RGB565(uint val)
|
|
{
|
|
int r, g, b, a;
|
|
r = Convert5To8((val >> 11) & 0x1f);
|
|
g = Convert6To8((val >> 5) & 0x3f);
|
|
b = Convert5To8((val) & 0x1f);
|
|
a = 0xFF;
|
|
return float4(r, g, b, a) / 255;
|
|
}
|
|
|
|
float4 DecodePixel_IA8(uint val)
|
|
{
|
|
int i = val & 0xFF;
|
|
int a = val >> 8;
|
|
return float4(i, i, i, a) / 255;
|
|
}
|
|
|
|
void main(
|
|
out float4 ocol0 : SV_Target,
|
|
in float4 pos : SV_Position,
|
|
in float3 uv0 : TEXCOORD0)
|
|
{
|
|
uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r;
|
|
src = Tex1.Load(src);
|
|
src = ((src << 8) & 0xFF00) | (src >> 8);
|
|
ocol0 = DECODE(src);
|
|
}
|
|
)HLSL";
|
|
|
|
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
|
|
const void* palette, TLUTFormat format)
|
|
{
|
|
DXTexture* source_texture = static_cast<DXTexture*>(source->texture.get());
|
|
DXTexture* destination_texture = static_cast<DXTexture*>(destination->texture.get());
|
|
g_renderer->ResetAPIState();
|
|
|
|
// stretch picture with increased internal resolution
|
|
const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, static_cast<float>(source->GetWidth()),
|
|
static_cast<float>(source->GetHeight()));
|
|
D3D::context->RSSetViewports(1, &vp);
|
|
|
|
D3D11_BOX box{0, 0, 0, 512, 1, 1};
|
|
D3D::context->UpdateSubresource(palette_buf, 0, &box, palette, 0, 0);
|
|
|
|
D3D::stateman->SetTexture(1, palette_buf_srv);
|
|
|
|
// TODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
|
|
float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
|
|
D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, ¶ms, 0, 0);
|
|
D3D::stateman->SetPixelConstants(uniform_buffer);
|
|
|
|
const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight());
|
|
|
|
D3D::SetPointCopySampler();
|
|
|
|
// Make sure we don't draw with the texture set as both a source and target.
|
|
// (This can happen because we don't unbind textures when we free them.)
|
|
D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV());
|
|
D3D::stateman->Apply();
|
|
|
|
D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(),
|
|
nullptr);
|
|
|
|
// Create texture copy
|
|
D3D::drawShadedTexQuad(
|
|
source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(),
|
|
source->GetHeight(), palette_pixel_shader[static_cast<int>(format)],
|
|
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(),
|
|
GeometryShaderCache::GetCopyGeometryShader());
|
|
|
|
g_renderer->RestoreAPIState();
|
|
}
|
|
|
|
ID3D11PixelShader* GetConvertShader(const char* Type)
|
|
{
|
|
std::string shader = "#define DECODE DecodePixel_";
|
|
shader.append(Type);
|
|
shader.append("\n");
|
|
shader.append(palette_shader);
|
|
return D3D::CompileAndCreatePixelShader(shader);
|
|
}
|
|
|
|
TextureCache::TextureCache()
|
|
{
|
|
// FIXME: Is it safe here?
|
|
g_encoder = std::make_unique<PSTextureEncoder>();
|
|
g_encoder->Init();
|
|
|
|
palette_buf = nullptr;
|
|
palette_buf_srv = nullptr;
|
|
uniform_buffer = nullptr;
|
|
palette_pixel_shader[static_cast<int>(TLUTFormat::IA8)] = GetConvertShader("IA8");
|
|
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB565)] = GetConvertShader("RGB565");
|
|
palette_pixel_shader[static_cast<int>(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3");
|
|
auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE);
|
|
HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf);
|
|
CHECK(SUCCEEDED(hr), "create palette decoder lut buffer");
|
|
D3D::SetDebugObjectName(palette_buf, "texture decoder lut buffer");
|
|
// TODO: C14X2 format.
|
|
auto outlutUavDesc =
|
|
CD3D11_SHADER_RESOURCE_VIEW_DESC(palette_buf, DXGI_FORMAT_R16_UINT, 0, 256, 0);
|
|
hr = D3D::device->CreateShaderResourceView(palette_buf, &outlutUavDesc, &palette_buf_srv);
|
|
CHECK(SUCCEEDED(hr), "create palette decoder lut srv");
|
|
D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv");
|
|
const D3D11_BUFFER_DESC cbdesc =
|
|
CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
|
|
hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer);
|
|
CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer");
|
|
D3D::SetDebugObjectName(uniform_buffer,
|
|
"a constant buffer used in TextureCache::CopyRenderTargetToTexture");
|
|
}
|
|
|
|
TextureCache::~TextureCache()
|
|
{
|
|
g_encoder->Shutdown();
|
|
g_encoder.reset();
|
|
|
|
SAFE_RELEASE(palette_buf);
|
|
SAFE_RELEASE(palette_buf_srv);
|
|
SAFE_RELEASE(uniform_buffer);
|
|
for (auto*& shader : palette_pixel_shader)
|
|
SAFE_RELEASE(shader);
|
|
for (auto& iter : m_efb_to_tex_pixel_shaders)
|
|
SAFE_RELEASE(iter.second);
|
|
}
|
|
|
|
void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
|
const EFBRectangle& src_rect, bool scale_by_half,
|
|
EFBCopyFormat dst_format, bool is_intensity, float gamma,
|
|
bool clamp_top, bool clamp_bottom,
|
|
const CopyFilterCoefficientArray& filter_coefficients)
|
|
{
|
|
auto* destination_texture = static_cast<DXTexture*>(entry->texture.get());
|
|
|
|
bool multisampled = g_ActiveConfig.iMultisamples > 1;
|
|
ID3D11ShaderResourceView* efb_tex_srv;
|
|
if (multisampled)
|
|
{
|
|
efb_tex_srv = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() :
|
|
FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
|
|
}
|
|
else
|
|
{
|
|
efb_tex_srv = is_depth_copy ? FramebufferManager::GetEFBDepthTexture()->GetSRV() :
|
|
FramebufferManager::GetEFBColorTexture()->GetSRV();
|
|
}
|
|
|
|
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
|
scale_by_half,
|
|
NeedsCopyFilterInShader(filter_coefficients));
|
|
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
|
|
if (!pixel_shader)
|
|
return;
|
|
|
|
g_renderer->ResetAPIState();
|
|
|
|
// stretch picture with increased internal resolution
|
|
const D3D11_VIEWPORT vp =
|
|
CD3D11_VIEWPORT(0.f, 0.f, static_cast<float>(destination_texture->GetConfig().width),
|
|
static_cast<float>(destination_texture->GetConfig().height));
|
|
D3D::context->RSSetViewports(1, &vp);
|
|
|
|
const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect);
|
|
// TODO: try targetSource.asRECT();
|
|
const D3D11_RECT sourcerect =
|
|
CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom);
|
|
|
|
// Use linear filtering if (bScaleByHalf), use point filtering otherwise
|
|
if (scale_by_half)
|
|
D3D::SetLinearCopySampler();
|
|
else
|
|
D3D::SetPointCopySampler();
|
|
|
|
struct PixelConstants
|
|
{
|
|
float filter_coefficients[3];
|
|
float gamma_rcp;
|
|
float clamp_top;
|
|
float clamp_bottom;
|
|
float pixel_height;
|
|
u32 padding;
|
|
};
|
|
PixelConstants constants;
|
|
for (size_t i = 0; i < filter_coefficients.size(); i++)
|
|
constants.filter_coefficients[i] = filter_coefficients[i];
|
|
constants.gamma_rcp = 1.0f / gamma;
|
|
constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f;
|
|
constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f;
|
|
constants.pixel_height =
|
|
g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT;
|
|
constants.padding = 0;
|
|
D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0);
|
|
D3D::stateman->SetPixelConstants(uniform_buffer);
|
|
|
|
// Make sure we don't draw with the texture set as both a source and target.
|
|
// (This can happen because we don't unbind textures when we free them.)
|
|
D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV());
|
|
D3D::stateman->Apply();
|
|
|
|
D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(),
|
|
nullptr);
|
|
|
|
// Create texture copy
|
|
D3D::drawShadedTexQuad(
|
|
efb_tex_srv, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(),
|
|
pixel_shader, VertexShaderCache::GetSimpleVertexShader(),
|
|
VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader());
|
|
|
|
g_renderer->RestoreAPIState();
|
|
}
|
|
|
|
ID3D11PixelShader*
|
|
TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid)
|
|
{
|
|
auto iter = m_efb_to_tex_pixel_shaders.find(uid);
|
|
if (iter != m_efb_to_tex_pixel_shaders.end())
|
|
return iter->second;
|
|
|
|
ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::D3D, uid.GetUidData());
|
|
ID3D11PixelShader* shader = D3D::CompileAndCreatePixelShader(code.GetBuffer());
|
|
m_efb_to_tex_pixel_shaders.emplace(uid, shader);
|
|
return shader;
|
|
}
|
|
} // namespace DX11
|