diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 3a2a40a6d7..61904b8a70 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -139,6 +139,7 @@ const ConfigInfo GFX_HACK_SKIP_XFB_COPY_TO_RAM{{System::GFX, "Hacks", "XFB true}; const ConfigInfo GFX_HACK_DISABLE_COPY_TO_VRAM{{System::GFX, "Hacks", "DisableCopyToVRAM"}, false}; +const ConfigInfo GFX_HACK_DEFER_EFB_COPIES{{System::GFX, "Hacks", "DeferEFBCopies"}, true}; const ConfigInfo GFX_HACK_IMMEDIATE_XFB{{System::GFX, "Hacks", "ImmediateXFBEnable"}, false}; const ConfigInfo GFX_HACK_COPY_EFB_SCALED{{System::GFX, "Hacks", "EFBScaledCopy"}, true}; const ConfigInfo GFX_HACK_EFB_EMULATE_FORMAT_CHANGES{ diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 3e87298f85..73edbe8cf4 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -107,6 +107,7 @@ extern const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE; extern const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM; extern const ConfigInfo GFX_HACK_SKIP_XFB_COPY_TO_RAM; extern const ConfigInfo GFX_HACK_DISABLE_COPY_TO_VRAM; +extern const ConfigInfo GFX_HACK_DEFER_EFB_COPIES; extern const ConfigInfo GFX_HACK_IMMEDIATE_XFB; extern const ConfigInfo GFX_HACK_COPY_EFB_SCALED; extern const ConfigInfo GFX_HACK_EFB_EMULATE_FORMAT_CHANGES; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 86098a9557..c3345bad24 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -114,6 +114,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM.location, Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM.location, Config::GFX_HACK_DISABLE_COPY_TO_VRAM.location, + Config::GFX_HACK_DEFER_EFB_COPIES.location, Config::GFX_HACK_IMMEDIATE_XFB.location, Config::GFX_HACK_COPY_EFB_SCALED.location, Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES.location, diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp index 6d6609c741..9342c3cc00 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp @@ -46,10 +46,13 @@ void HacksWidget::CreateWidgets() Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES, true); m_store_efb_copies = new GraphicsBool(tr("Store EFB Copies to Texture Only"), Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM); + m_defer_efb_copies = + new GraphicsBool(tr("Defer EFB Copies to RAM"), Config::GFX_HACK_DEFER_EFB_COPIES); efb_layout->addWidget(m_skip_efb_cpu, 0, 0); efb_layout->addWidget(m_ignore_format_changes, 0, 1); efb_layout->addWidget(m_store_efb_copies, 1, 0); + efb_layout->addWidget(m_defer_efb_copies, 1, 1); // Texture Cache auto* texture_cache_box = new QGroupBox(tr("Texture Cache")); @@ -109,6 +112,8 @@ void HacksWidget::CreateWidgets() main_layout->addStretch(); setLayout(main_layout); + + UpdateDeferEFBCopiesEnabled(); } void HacksWidget::OnBackendChanged(const QString& backend_name) @@ -129,6 +134,10 @@ void HacksWidget::OnBackendChanged(const QString& backend_name) void HacksWidget::ConnectWidgets() { connect(m_accuracy, &QSlider::valueChanged, [this](int) { SaveSettings(); }); + connect(m_store_efb_copies, &QCheckBox::stateChanged, + [this](int) { UpdateDeferEFBCopiesEnabled(); }); + connect(m_store_xfb_copies, &QCheckBox::stateChanged, + [this](int) { UpdateDeferEFBCopiesEnabled(); }); } void HacksWidget::LoadSettings() @@ -202,6 +211,11 @@ void HacksWidget::AddDescriptions() "in a small number of games.\n\nEnabled = EFB Copies to Texture\nDisabled = EFB Copies to " "RAM " "(and Texture)\n\nIf unsure, leave this checked."); + static const char TR_DEFER_EFB_COPIES_DESCRIPTION[] = QT_TR_NOOP( + "Waits until the game synchronizes with the emulated GPU before writing the contents of EFB " + "copies to RAM. Reduces the overhead of EFB RAM copies, provides a performance boost in many " + "games, at the risk of breaking those which do not safely synchronize with the emulated " + "GPU.\n\nIf unsure, leave this checked."); static const char TR_ACCUARCY_DESCRIPTION[] = QT_TR_NOOP( "The \"Safe\" setting eliminates the likelihood of the GPU missing texture updates " "from RAM.\nLower accuracies cause in-game text to appear garbled in certain " @@ -240,6 +254,7 @@ void HacksWidget::AddDescriptions() AddDescription(m_skip_efb_cpu, TR_SKIP_EFB_CPU_ACCESS_DESCRIPTION); AddDescription(m_ignore_format_changes, TR_IGNORE_FORMAT_CHANGE_DESCRIPTION); AddDescription(m_store_efb_copies, TR_STORE_EFB_TO_TEXTURE_DESCRIPTION); + AddDescription(m_defer_efb_copies, TR_DEFER_EFB_COPIES_DESCRIPTION); AddDescription(m_accuracy, TR_ACCUARCY_DESCRIPTION); AddDescription(m_store_xfb_copies, TR_STORE_XFB_TO_TEXTURE_DESCRIPTION); AddDescription(m_immediate_xfb, TR_IMMEDIATE_XFB_DESCRIPTION); @@ -248,3 +263,11 @@ void HacksWidget::AddDescriptions() AddDescription(m_disable_bounding_box, TR_DISABLE_BOUNDINGBOX_DESCRIPTION); AddDescription(m_vertex_rounding, TR_VERTEX_ROUNDING_DESCRIPTION); } + +void HacksWidget::UpdateDeferEFBCopiesEnabled() +{ + // We disable the checkbox for defer EFB copies when both EFB and XFB copies to texture are + // enabled. + const bool can_defer = m_store_efb_copies->isChecked() && m_store_xfb_copies->isChecked(); + m_defer_efb_copies->setEnabled(!can_defer); +} diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h index 937c92d064..47de3c0287 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h @@ -42,8 +42,11 @@ private: QCheckBox* m_fast_depth_calculation; QCheckBox* m_disable_bounding_box; QCheckBox* m_vertex_rounding; + QCheckBox* m_defer_efb_copies; void CreateWidgets(); void ConnectWidgets(); void AddDescriptions(); + + void UpdateDeferEFBCopiesEnabled(); }; diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 7c6dbd4411..231f317488 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -46,13 +46,8 @@ PSTextureEncoder::~PSTextureEncoder() = default; void PSTextureEncoder::Init() { - // TODO: Move this to a constant somewhere in common. - TextureConfig encoding_texture_config(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, - true); - m_encoding_render_texture = g_renderer->CreateTexture(encoding_texture_config); - m_encoding_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Readback, encoding_texture_config); - ASSERT(m_encoding_render_texture && m_encoding_readback_texture); + m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); + ASSERT(m_encoding_render_texture); // Create constant buffer for uploading data to shaders D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), D3D11_BIND_CONSTANT_BUFFER); @@ -71,9 +66,9 @@ void PSTextureEncoder::Shutdown() } void PSTextureEncoder::Encode( - u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, - u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, - bool clamp_top, bool clamp_bottom, + AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { // Resolve MSAA targets before copying. @@ -133,14 +128,7 @@ void PSTextureEncoder::Encode( // Copy to staging buffer MathUtil::Rectangle copy_rect(0, 0, words_per_row, num_blocks_y); - m_encoding_readback_texture->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, - copy_rect); - m_encoding_readback_texture->Flush(); - if (m_encoding_readback_texture->Map()) - { - m_encoding_readback_texture->ReadTexels(copy_rect, dst, memory_stride); - m_encoding_readback_texture->Unmap(); - } + dst->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); } g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h index 4054b07fbf..43f153c4cf 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h @@ -38,9 +38,9 @@ public: void Init(); void Shutdown(); - void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); private: @@ -48,7 +48,6 @@ private: ID3D11Buffer* m_encode_params = nullptr; std::unique_ptr m_encoding_render_texture; - std::unique_ptr m_encoding_readback_texture; std::map m_encoding_shaders; }; } diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index 41ad435817..f17d12f4c3 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -31,8 +31,8 @@ namespace DX11 { static std::unique_ptr g_encoder; -void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, +void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index 24dda22d60..1bfa34c045 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -22,18 +22,11 @@ public: ~TextureCache(); private: - u64 EncodeToRamFromTexture(u32 address, void* source_texture, u32 SourceW, u32 SourceH, - bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, - const EFBRectangle& source) - { - return 0; - }; - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, TLUTFormat format) override; - void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) override; diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index 25803344a4..648871f626 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -25,8 +25,8 @@ public: { } - void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) override { diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 10f191448e..4fbbb39935 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -159,8 +159,8 @@ void main() //#define TIME_TEXTURE_DECODING 1 -void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, +void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index 54dc4afef8..89fc82ee9d 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -63,8 +63,8 @@ private: void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, TLUTFormat format) override; - void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) override; diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index 699e988196..095365140f 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -46,7 +46,6 @@ struct EncodingProgram std::map s_encoding_programs; std::unique_ptr s_encoding_render_texture; -std::unique_ptr s_encoding_readback_texture; const int renderBufferWidth = EFB_WIDTH * 4; const int renderBufferHeight = 1024; @@ -93,16 +92,11 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) void Init() { - TextureConfig config(renderBufferWidth, renderBufferHeight, 1, 1, 1, AbstractTextureFormat::BGRA8, - true); - s_encoding_render_texture = g_renderer->CreateTexture(config); - s_encoding_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Readback, config); + s_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); } void Shutdown() { - s_encoding_readback_texture.reset(); s_encoding_render_texture.reset(); for (auto& program : s_encoding_programs) @@ -112,8 +106,9 @@ void Shutdown() // dst_line_size, writeStride in bytes -static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line_size, - u32 dstHeight, u32 writeStride, bool linearFilter, float y_scale) +static void EncodeToRamUsingShader(GLuint srcTexture, AbstractStagingTexture* destAddr, + u32 dst_line_size, u32 dstHeight, u32 writeStride, + bool linearFilter, float y_scale) { FramebufferManager::SetFramebuffer( static_cast(s_encoding_render_texture.get())->GetFramebuffer()); @@ -137,15 +132,14 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); MathUtil::Rectangle copy_rect(0, 0, dst_line_size / 4, dstHeight); - s_encoding_readback_texture->CopyFromTexture(s_encoding_render_texture.get(), copy_rect, 0, 0, - copy_rect); - s_encoding_readback_texture->ReadTexels(copy_rect, destAddr, writeStride); + + destAddr->CopyFromTexture(s_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); } -void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, float clamp_top, float clamp_bottom, +void EncodeToRamFromTexture(AbstractStagingTexture* dest, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, float clamp_top, float clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { g_renderer->ResetAPIState(); @@ -165,7 +159,7 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ FramebufferManager::ResolveAndGetDepthTarget(src_rect) : FramebufferManager::ResolveAndGetRenderTarget(src_rect); - EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride, + EncodeToRamUsingShader(read_texture, dest, bytes_per_row, num_blocks_y, memory_stride, scale_by_half && !params.depth, y_scale); g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h index 0d7450e4b6..575fbf5bd0 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.h +++ b/Source/Core/VideoBackends/OGL/TextureConverter.h @@ -10,6 +10,9 @@ #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" +struct EFBCopyParams; +class AbstractStagingTexture; + namespace OGL { // Converts textures between formats using shaders @@ -21,7 +24,7 @@ void Shutdown(); // returns size of the encoded data (in bytes) void EncodeToRamFromTexture( - u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, + AbstractStagingTexture* dest, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, float clamp_top, float clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); diff --git a/Source/Core/VideoBackends/Software/SWTexture.h b/Source/Core/VideoBackends/Software/SWTexture.h index 5c403f54ba..f9d5c50485 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.h +++ b/Source/Core/VideoBackends/Software/SWTexture.h @@ -57,6 +57,8 @@ public: void Unmap() override; void Flush() override; + void SetMapStride(size_t stride) { m_map_stride = stride; } + private: std::vector m_data; }; diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index f3d9c91938..06bfbf73fa 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -16,8 +16,8 @@ public: TLUTFormat format) override { } - void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) override { diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.cpp b/Source/Core/VideoBackends/Software/TextureEncoder.cpp index a31888e544..07eba31530 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.cpp +++ b/Source/Core/VideoBackends/Software/TextureEncoder.cpp @@ -5,12 +5,14 @@ #include "VideoBackends/Software/TextureEncoder.h" #include "Common/Align.h" +#include "Common/Assert.h" #include "Common/CommonFuncs.h" #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" #include "Common/Swap.h" #include "VideoBackends/Software/EfbInterface.h" +#include "VideoBackends/Software/SWTexture.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/LookUpTables.h" @@ -1468,18 +1470,26 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b } } -void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma) +void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + bool scale_by_half, float y_scale, float gamma) { + // HACK: Override the memory stride for this staging texture with new copy stride. + // This is required because the texture encoder assumes that we're writing directly to memory, + // and each row is tightly packed with no padding, whereas our encoding abstract texture has + // a width of 2560. When we copy the texture back later on, it'll use the tightly packed stride. + ASSERT(memory_stride <= (dst->GetConfig().width * dst->GetTexelSize())); + static_cast(dst)->SetMapStride(memory_stride); + if (params.copy_format == EFBCopyFormat::XFB) { - EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma); + EfbInterface::EncodeXFB(reinterpret_cast(dst->GetMappedPointer()), native_width, src_rect, + y_scale, gamma); } else { - EncodeEfbCopy(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half); + EncodeEfbCopy(reinterpret_cast(dst->GetMappedPointer()), params, native_width, + bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half); } } } diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.h b/Source/Core/VideoBackends/Software/TextureEncoder.h index 20aa05605a..705fa2e81b 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.h +++ b/Source/Core/VideoBackends/Software/TextureEncoder.h @@ -10,7 +10,7 @@ namespace TextureEncoder { -void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma); +void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + bool scale_by_half, float y_scale, float gamma); } diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp index 334b3f9b1a..dd61cce377 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp @@ -98,8 +98,8 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } -void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, +void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h index 846761d1d5..fd6b57981a 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.h +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.h @@ -36,8 +36,8 @@ public: void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, TLUTFormat format) override; - void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) override; diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp index da40b08667..c5acdc8e7f 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp @@ -207,10 +207,10 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, } void TextureConverter::EncodeTextureToMemory( - VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) + VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, + bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { VkShaderModule shader = GetEncodingShader(params); if (shader == VK_NULL_HANDLE) @@ -273,9 +273,7 @@ void TextureConverter::EncodeTextureToMemory( draw.EndRenderPass(); MathUtil::Rectangle copy_rect(0, 0, render_width, render_height); - m_encoding_readback_texture->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, - copy_rect); - m_encoding_readback_texture->ReadTexels(copy_rect, dest_ptr, memory_stride); + dest->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); } bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format) @@ -610,14 +608,8 @@ VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params) bool TextureConverter::CreateEncodingTexture() { - TextureConfig config(ENCODING_TEXTURE_WIDTH, ENCODING_TEXTURE_HEIGHT, 1, 1, 1, - ENCODING_TEXTURE_FORMAT, true); - - m_encoding_render_texture = g_renderer->CreateTexture(config); - m_encoding_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Readback, config); - - return m_encoding_render_texture && m_encoding_readback_texture; + m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); + return m_encoding_render_texture != nullptr; } bool TextureConverter::CreateDecodingTexture() diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h index f85efc4d5c..9c035296b2 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.h @@ -21,7 +21,6 @@ class AbstractStagingTexture; namespace Vulkan { -class StagingTexture2D; class Texture2D; class VKTexture; @@ -38,14 +37,12 @@ public: TextureCache::TCacheEntry* src_entry, const void* palette, TLUTFormat palette_format); - // Uses an encoding shader to copy src_texture to dest_ptr. - // NOTE: Executes the current command buffer. - void - EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); + // Uses an encoding shader to copy src_texture to dest. + void EncodeTextureToMemory( + VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, + bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format); void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry, @@ -54,9 +51,6 @@ public: const u8* palette, TLUTFormat palette_format); private: - static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4; - static const u32 ENCODING_TEXTURE_HEIGHT = 1024; - static const AbstractTextureFormat ENCODING_TEXTURE_FORMAT = AbstractTextureFormat::BGRA8; static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3; // Maximum size of a texture based on BP registers. @@ -100,7 +94,6 @@ private: // Texture encoding - RGBA8->GX format in memory std::map m_encoding_shaders; std::unique_ptr m_encoding_render_texture; - std::unique_ptr m_encoding_readback_texture; // Texture decoding - GX format in memory->RGBA8 struct TextureDecodingPipeline diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index b3c111d5a2..63b88195c0 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -466,6 +466,7 @@ void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle m_needs_flush = false; g_command_buffer_mgr->RemoveFencePointCallback( this); + m_staging_buffer->InvalidateCPUCache(); }); } diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index eb43f70af5..7712421f57 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -177,6 +177,7 @@ static void BPWritten(const BPCmd& bp) switch (bp.newvalue & 0xFF) { case 0x02: + g_texture_cache->FlushEFBCopies(); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetFinish(); // may generate interrupt DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF)); @@ -188,11 +189,13 @@ static void BPWritten(const BPCmd& bp) } return; case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID + g_texture_cache->FlushEFBCopies(); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), false); DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF)); return; case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID + g_texture_cache->FlushEFBCopies(); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), true); DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF)); diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 70025ec720..448b195a62 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -724,6 +724,10 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // state changes the specialized shader will not take over. g_vertex_manager->InvalidatePipelineObject(); + // Flush any outstanding EFB copies to RAM, in case the game is running at an uncapped frame + // rate and not waiting for vblank. Otherwise, we'd end up with a huge list of pending copies. + g_texture_cache->FlushEFBCopies(); + Core::Callback_VideoCopiedToXFB(true); } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index f40fdfda4a..74d1d8a51e 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -28,6 +28,7 @@ #include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/Debugger.h" #include "VideoCommon/FramebufferManagerBase.h" @@ -89,6 +90,7 @@ TextureCacheBase::TextureCacheBase() void TextureCacheBase::Invalidate() { + FlushEFBCopies(); InvalidateAllBindPoints(); for (size_t i = 0; i < bound_textures.size(); ++i) { @@ -1693,35 +1695,6 @@ void TextureCacheBase::CopyRenderTargetToTexture( const u32 bytes_per_row = num_blocks_x * bytes_per_block; const u32 covered_range = num_blocks_y * dstStride; - if (copy_to_ram) - { - CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); - PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; - EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, - NeedsCopyFilterInShader(coefficients)); - CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf, - y_scale, gamma, clamp_top, clamp_bottom, coefficients); - } - else - { - if (is_xfb_copy) - { - UninitializeXFBMemory(dst, dstStride, bytes_per_row, num_blocks_y); - } - else - { - // Hack: Most games don't actually need the correct texture data in RAM - // and we can just keep a copy in VRAM. We zero the memory so we - // can check it hasn't changed before using our copy in VRAM. - u8* ptr = dst; - for (u32 i = 0; i < num_blocks_y; i++) - { - memset(ptr, 0, bytes_per_row); - ptr += dstStride; - } - } - } - if (g_bRecordFifoData) { // Mark the memory behind this efb copy as dynamicly generated for the Fifo log @@ -1775,7 +1748,9 @@ void TextureCacheBase::CopyRenderTargetToTexture( (!strided_efb_copy && entry->size_in_bytes == overlap_range) || (strided_efb_copy && entry->size_in_bytes == overlap_range && entry->addr == dstAddr)) { - iter.first = InvalidateTexture(iter.first); + // Pending EFB copies which are completely covered by this new copy can simply be tossed, + // instead of having to flush them later on, since this copy will write over everything. + iter.first = InvalidateTexture(iter.first, true); continue; } entry->may_have_overlapping_textures = true; @@ -1804,6 +1779,7 @@ void TextureCacheBase::CopyRenderTargetToTexture( ++iter.first; } + TCacheEntry* entry = nullptr; if (copy_to_vram) { // create the texture @@ -1813,8 +1789,7 @@ void TextureCacheBase::CopyRenderTargetToTexture( config.height = scaled_tex_h; config.layers = FramebufferManagerBase::GetEFBLayers(); - TCacheEntry* entry = AllocateCacheEntry(config); - + entry = AllocateCacheEntry(config); if (entry) { entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy); @@ -1836,9 +1811,6 @@ void TextureCacheBase::CopyRenderTargetToTexture( clamp_top, clamp_bottom, GetVRAMCopyFilterCoefficients(filter_coefficients)); - u64 hash = entry->CalculateHash(); - entry->SetHashes(hash, hash); - if (g_ActiveConfig.bDumpEFBTarget && !is_xfb_copy) { static int efb_count = 0; @@ -1860,6 +1832,134 @@ void TextureCacheBase::CopyRenderTargetToTexture( textures_by_address.emplace(dstAddr, entry); } } + + if (copy_to_ram) + { + CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); + PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; + EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, + NeedsCopyFilterInShader(coefficients)); + + std::unique_ptr staging_texture = GetEFBCopyStagingTexture(); + if (staging_texture) + { + CopyEFB(staging_texture.get(), format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, + scaleByHalf, y_scale, gamma, clamp_top, clamp_bottom, coefficients); + + // We can't defer if there is no VRAM copy (since we need to update the hash). + if (!copy_to_vram || !g_ActiveConfig.bDeferEFBCopies) + { + // Immediately flush it. + WriteEFBCopyToRAM(dst, bytes_per_row / sizeof(u32), num_blocks_y, dstStride, + std::move(staging_texture)); + } + else + { + // Defer the flush until later. + entry->pending_efb_copy = std::move(staging_texture); + entry->pending_efb_copy_width = bytes_per_row / sizeof(u32); + entry->pending_efb_copy_height = num_blocks_y; + entry->pending_efb_copy_invalidated = false; + m_pending_efb_copies.push_back(entry); + } + } + } + else + { + if (is_xfb_copy) + { + UninitializeXFBMemory(dst, dstStride, bytes_per_row, num_blocks_y); + } + else + { + // Hack: Most games don't actually need the correct texture data in RAM + // and we can just keep a copy in VRAM. We zero the memory so we + // can check it hasn't changed before using our copy in VRAM. + u8* ptr = dst; + for (u32 i = 0; i < num_blocks_y; i++) + { + std::memset(ptr, 0, bytes_per_row); + ptr += dstStride; + } + } + } + + // Even if the copy is deferred, still compute the hash. This way if the copy is used as a texture + // in a subsequent draw before it is flushed, it will have the same hash. + if (entry) + { + const u64 hash = entry->CalculateHash(); + entry->SetHashes(hash, hash); + } +} + +void TextureCacheBase::FlushEFBCopies() +{ + if (m_pending_efb_copies.empty()) + return; + + for (TCacheEntry* entry : m_pending_efb_copies) + FlushEFBCopy(entry); + m_pending_efb_copies.clear(); +} + +TextureConfig TextureCacheBase::GetEncodingTextureConfig() +{ + return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true); +} + +void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride, + std::unique_ptr staging_texture) +{ + MathUtil::Rectangle copy_rect(0, 0, static_cast(width), static_cast(height)); + staging_texture->ReadTexels(copy_rect, dst_ptr, stride); + ReleaseEFBCopyStagingTexture(std::move(staging_texture)); +} + +void TextureCacheBase::FlushEFBCopy(TCacheEntry* entry) +{ + // Copy from texture -> guest memory. + u8* const dst = Memory::GetPointer(entry->addr); + WriteEFBCopyToRAM(dst, entry->pending_efb_copy_width, entry->pending_efb_copy_height, + entry->memory_stride, std::move(entry->pending_efb_copy)); + + // If the EFB copy was invalidated (e.g. the bloom case mentioned in InvalidateTexture), + // now is the time to clean up the TCacheEntry. In which case, we don't need to compute + // the new hash of the RAM copy. + if (entry->pending_efb_copy_invalidated) + { + auto config = entry->texture->GetConfig(); + texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture))); + return; + } + + // Re-hash the texture now that the guest memory is populated. + // This should be safe because we'll catch any writes before the game can modify it. + const u64 hash = entry->CalculateHash(); + entry->SetHashes(hash, hash); +} + +std::unique_ptr TextureCacheBase::GetEFBCopyStagingTexture() +{ + // Pull off the back first to re-use the most frequently used textures. + if (!m_efb_copy_staging_texture_pool.empty()) + { + auto ptr = std::move(m_efb_copy_staging_texture_pool.back()); + m_efb_copy_staging_texture_pool.pop_back(); + return ptr; + } + + std::unique_ptr tex = + g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig()); + if (!tex) + WARN_LOG(VIDEO, "Failed to create EFB copy staging texture"); + + return tex; +} + +void TextureCacheBase::ReleaseEFBCopyStagingTexture(std::unique_ptr tex) +{ + m_efb_copy_staging_texture_pool.push_back(std::move(tex)); } void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, @@ -1989,7 +2089,7 @@ TextureCacheBase::FindOverlappingTextures(u32 addr, u32 size_in_bytes) } TextureCacheBase::TexAddrCache::iterator -TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter) +TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pending_efb_copy) { if (iter == textures_by_address.end()) return textures_by_address.end(); @@ -2014,6 +2114,33 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter) } } + // If this is a pending EFB copy, we don't want to flush it here. + // Why? Because let's say a game is rendering a bloom-type effect, using EFB copies to essentially + // downscale the framebuffer. Copy from EFB->Texture, draw texture to EFB, copy EFB->Texture, + // draw, repeat. The second copy will invalidate the first, forcing a flush. Which means we lose + // any benefit of EFB copy batching. So instead, let's just leave the EFB copy pending, but remove + // it from the texture cache. This way we don't use the old VRAM copy. When the EFB copies are + // eventually flushed, they will overwrite each other, and the end result should be the same. + if (entry->pending_efb_copy) + { + if (discard_pending_efb_copy) + { + // If the RAM copy is being completely overwritten by a new EFB copy, we can discard the + // existing pending copy, and not bother waiting for it in the future. This happens in + // Xenoblade's sunset scene, where 35 copies are done per frame, and 25 of them are + // copied to the same address, and can be skipped. + ReleaseEFBCopyStagingTexture(std::move(entry->pending_efb_copy)); + auto pending_it = std::find(m_pending_efb_copies.begin(), m_pending_efb_copies.end(), entry); + if (pending_it != m_pending_efb_copies.end()) + m_pending_efb_copies.erase(pending_it); + } + else + { + entry->pending_efb_copy_invalidated = true; + return textures_by_address.erase(iter); + } + } + auto config = entry->texture->GetConfig(); texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture))); diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 1c84662065..c797a9a4f0 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "Common/CommonTypes.h" #include "VideoCommon/AbstractTexture.h" @@ -22,6 +23,7 @@ #include "VideoCommon/VideoCommon.h" struct VideoConfig; +class AbstractStagingTexture; struct TextureAndTLUTFormat { @@ -149,6 +151,12 @@ public: // * partially updated textures which refer to this efb copy std::unordered_set references; + // Pending EFB copy + std::unique_ptr pending_efb_copy; + u32 pending_efb_copy_width = 0; + u32 pending_efb_copy_height = 0; + bool pending_efb_copy_invalidated = false; + explicit TCacheEntry(std::unique_ptr tex); ~TCacheEntry(); @@ -216,10 +224,10 @@ public: void Invalidate(); - virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, + virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) = 0; virtual bool CompileShaders() = 0; @@ -278,6 +286,12 @@ public: void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height); + // Flushes all pending EFB copies to emulated RAM. + void FlushEFBCopies(); + + // Returns a texture config suitable for drawing a RAM EFB copy into. + static TextureConfig GetEncodingTextureConfig(); + protected: TextureCacheBase(); @@ -329,7 +343,8 @@ private: const CopyFilterCoefficientArray& filter_coefficients) = 0; // Removes and unlinks texture from texture cache and returns it to the pool - TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter); + TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter, + bool discard_pending_efb_copy = false); void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); @@ -339,6 +354,17 @@ private: CopyFilterCoefficientArray GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; + // Flushes a pending EFB copy to RAM from the host to the guest RAM. + void WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride, + std::unique_ptr staging_texture); + void FlushEFBCopy(TCacheEntry* entry); + + // Returns a staging texture of the maximum EFB copy size. + std::unique_ptr GetEFBCopyStagingTexture(); + + // Returns an EFB copy staging texture to the pool, so it can be re-used. + void ReleaseEFBCopyStagingTexture(std::unique_ptr tex); + TexAddrCache textures_by_address; TexHashCache textures_by_hash; TexPool texture_pool; @@ -360,6 +386,13 @@ private: bool arbitrary_mipmap_detection; }; BackupConfig backup_config = {}; + + // Pool of readback textures used for deferred EFB copies. + std::vector> m_efb_copy_staging_texture_pool; + + // List of pending EFB copies. It is important that the order is preserved for these, + // so that overlapping textures are written to guest RAM in the order they are issued. + std::vector m_pending_efb_copies; }; extern std::unique_ptr g_texture_cache; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 5fb5d843cc..b4da204fe4 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -141,6 +141,7 @@ void VideoConfig::Refresh() bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM); bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM); bDisableCopyToVRAM = Config::Get(Config::GFX_HACK_DISABLE_COPY_TO_VRAM); + bDeferEFBCopies = Config::Get(Config::GFX_HACK_DEFER_EFB_COPIES); bImmediateXFB = Config::Get(Config::GFX_HACK_IMMEDIATE_XFB); bCopyEFBScaled = Config::Get(Config::GFX_HACK_COPY_EFB_SCALED); bEFBEmulateFormatChanges = Config::Get(Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index ec5563de74..054fc9680a 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -120,6 +120,7 @@ struct VideoConfig final bool bSkipEFBCopyToRam; bool bSkipXFBCopyToRam; bool bDisableCopyToVRAM; + bool bDeferEFBCopies; bool bImmediateXFB; bool bCopyEFBScaled; int iSafeTextureCache_ColorSamples;