diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index ed72c57128..4e47306152 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -72,7 +72,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 110; // Last changed in PR 8036 +static const u32 STATE_VERSION = 111; // Last changed in PR 6321 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index 67a3441af1..a824d0020a 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -11,6 +11,7 @@ #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoState.h" AsyncRequests AsyncRequests::s_singleton; @@ -156,7 +157,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) break; case Event::DO_SAVE_STATE: - g_video_backend->DoStateGPUThread(*e.do_save_state.p); + VideoCommon_DoState(*e.do_save_state.p); break; } } diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 40b6f9d51e..48b8e4cfd3 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -25,6 +25,7 @@ #include #include "Common/Assert.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Config/Config.h" #include "Common/Event.h" @@ -1324,8 +1325,11 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6 } // Update our last xfb values - m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width; - m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height; + m_last_xfb_addr = xfb_addr; + m_last_xfb_ticks = ticks; + m_last_xfb_width = fb_width; + m_last_xfb_stride = fb_stride; + m_last_xfb_height = fb_height; } else { @@ -1681,6 +1685,27 @@ bool Renderer::UseVertexDepthRange() const return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f; } +void Renderer::DoState(PointerWrap& p) +{ + p.Do(m_aspect_wide); + p.Do(m_frame_count); + p.Do(m_prev_efb_format); + p.Do(m_last_xfb_ticks); + p.Do(m_last_xfb_addr); + p.Do(m_last_xfb_width); + p.Do(m_last_xfb_stride); + p.Do(m_last_xfb_height); + + if (p.GetMode() == PointerWrap::MODE_READ) + { + // Force the next xfb to be displayed. + m_last_xfb_id = std::numeric_limits::max(); + + // And actually display it. + Swap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height, m_last_xfb_ticks); + } +} + std::unique_ptr Renderer::CreateAsyncShaderCompiler() { return std::make_unique(); diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 584f07bc41..116080a1a8 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -41,6 +41,7 @@ class AbstractTexture; class AbstractStagingTexture; class NativeVertexFormat; class NetPlayChatUI; +class PointerWrap; struct TextureConfig; struct ComputePipelineConfig; struct AbstractPipelineConfig; @@ -237,6 +238,7 @@ public: void ChangeSurface(void* new_surface_handle); void ResizeSurface(); bool UseVertexDepthRange() const; + void DoState(PointerWrap& p); virtual std::unique_ptr CreateAsyncShaderCompiler(); @@ -356,9 +358,10 @@ private: // Tracking of XFB textures so we don't render duplicate frames. u64 m_last_xfb_id = std::numeric_limits::max(); - - // Note: Only used for auto-ir + u64 m_last_xfb_ticks = 0; + u32 m_last_xfb_addr = 0; u32 m_last_xfb_width = 0; + u32 m_last_xfb_stride = 0; u32 m_last_xfb_height = 0; // NOTE: The methods below are called on the framedumping thread. diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index fb21603b6a..974b121c56 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -15,6 +15,7 @@ #include "Common/Align.h" #include "Common/Assert.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/FileUtil.h" #include "Common/Hash.h" @@ -404,6 +405,326 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer))); } +bool TextureCacheBase::CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format) +{ + if (m_readback_texture && m_readback_texture->GetConfig().width >= width && + m_readback_texture->GetConfig().height >= height && + m_readback_texture->GetConfig().format == format) + { + return true; + } + + TextureConfig staging_config(std::max(width, 128u), std::max(height, 128u), 1, 1, 1, format, 0); + m_readback_texture.reset(); + m_readback_texture = + g_renderer->CreateStagingTexture(StagingTextureType::Readback, staging_config); + return m_readback_texture != nullptr; +} + +void TextureCacheBase::SerializeTexture(AbstractTexture* tex, const TextureConfig& config, + PointerWrap& p) +{ + // If we're in measure mode, skip the actual readback to save some time. + const bool skip_readback = p.GetMode() == PointerWrap::MODE_MEASURE; + p.DoPOD(config); + + std::vector texture_data; + if (skip_readback || CheckReadbackTexture(config.width, config.height, config.format)) + { + // Save out each layer of the texture to the staging texture, and then + // append it onto the end of the vector. This gives us all the sub-images + // in one single buffer which can be written out to the save state. + for (u32 layer = 0; layer < config.layers; layer++) + { + for (u32 level = 0; level < config.levels; level++) + { + u32 level_width = std::max(config.width >> level, 1u); + u32 level_height = std::max(config.height >> level, 1u); + auto rect = tex->GetConfig().GetMipRect(level); + if (!skip_readback) + m_readback_texture->CopyFromTexture(tex, rect, layer, level, rect); + + size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width); + size_t size = stride * level_height; + size_t start = texture_data.size(); + texture_data.resize(texture_data.size() + size); + if (!skip_readback) + m_readback_texture->ReadTexels(rect, &texture_data[start], static_cast(stride)); + } + } + } + else + { + PanicAlert("Failed to create staging texture for serialization"); + } + + p.Do(texture_data); +} + +std::optional TextureCacheBase::DeserializeTexture(PointerWrap& p) +{ + TextureConfig config; + p.Do(config); + + std::vector texture_data; + p.Do(texture_data); + + if (p.GetMode() != PointerWrap::MODE_READ) + return std::nullopt; + + auto tex = AllocateTexture(config); + if (!tex) + { + PanicAlert("Failed to create texture for deserialization"); + return std::nullopt; + } + + size_t start = 0; + for (u32 layer = 0; layer < config.layers; layer++) + { + for (u32 level = 0; level < config.levels; level++) + { + u32 level_width = std::max(config.width >> level, 1u); + u32 level_height = std::max(config.height >> level, 1u); + size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width); + size_t size = stride * level_height; + if ((start + size) > texture_data.size()) + { + ERROR_LOG(VIDEO, "Insufficient texture data for layer %u level %u", layer, level); + return tex; + } + + tex->texture->Load(level, level_width, level_height, level_width, &texture_data[start], size); + start += size; + } + } + + return tex; +} + +void TextureCacheBase::DoState(PointerWrap& p) +{ + // Flush all pending XFB copies before either loading or saving. + FlushEFBCopies(); + + p.Do(last_entry_id); + + if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE) + DoSaveState(p); + else + DoLoadState(p); +} + +void TextureCacheBase::DoSaveState(PointerWrap& p) +{ + std::map entry_map; + std::vector entries_to_save; + auto ShouldSaveEntry = [](const TCacheEntry* entry) { + // We skip non-copies as they can be decoded from RAM when the state is loaded. + // Storing them would duplicate data in the save state file, adding to decompression time. + return entry->IsCopy(); + }; + auto AddCacheEntryToMap = [&entry_map, &entries_to_save, &p](TCacheEntry* entry) -> u32 { + auto iter = entry_map.find(entry); + if (iter != entry_map.end()) + return iter->second; + + // Since we are sequentially allocating texture entries, we need to save the textures in the + // same order they were collected. This is because of iterating both the address and hash maps. + // Therefore, the map is used for fast lookup, and the vector for ordering. + u32 id = static_cast(entry_map.size()); + entry_map.emplace(entry, id); + entries_to_save.push_back(entry); + return id; + }; + auto GetCacheEntryId = [&entry_map](const TCacheEntry* entry) -> std::optional { + auto iter = entry_map.find(entry); + return iter != entry_map.end() ? std::make_optional(iter->second) : std::nullopt; + }; + + // Transform the textures_by_address and textures_by_hash maps to a mapping + // of address/hash to entry ID. + std::vector> textures_by_address_list; + std::vector> textures_by_hash_list; + for (const auto& it : textures_by_address) + { + if (ShouldSaveEntry(it.second)) + { + u32 id = AddCacheEntryToMap(it.second); + textures_by_address_list.push_back(std::make_pair(it.first, id)); + } + } + for (const auto& it : textures_by_hash) + { + if (ShouldSaveEntry(it.second)) + { + u32 id = AddCacheEntryToMap(it.second); + textures_by_hash_list.push_back(std::make_pair(it.first, id)); + } + } + + // Save the texture cache entries out in the order the were referenced. + u32 size = static_cast(entries_to_save.size()); + p.Do(size); + for (TCacheEntry* entry : entries_to_save) + { + g_texture_cache->SerializeTexture(entry->texture.get(), entry->texture->GetConfig(), p); + entry->DoState(p); + } + p.DoMarker("TextureCacheEntries"); + + // Save references for each cache entry. + // As references are circular, we need to have everything created before linking entries. + std::set> reference_pairs; + for (const auto& it : entry_map) + { + const TCacheEntry* entry = it.first; + auto id1 = GetCacheEntryId(entry); + if (!id1) + continue; + + for (const TCacheEntry* referenced_entry : entry->references) + { + auto id2 = GetCacheEntryId(referenced_entry); + if (!id2) + continue; + + auto refpair1 = std::make_pair(*id1, *id2); + auto refpair2 = std::make_pair(*id2, *id1); + if (reference_pairs.count(refpair1) == 0 && reference_pairs.count(refpair2) == 0) + reference_pairs.insert(refpair1); + } + } + + size = static_cast(reference_pairs.size()); + p.Do(size); + for (const auto& it : reference_pairs) + { + p.Do(it.first); + p.Do(it.second); + } + + size = static_cast(textures_by_address_list.size()); + p.Do(size); + for (const auto& it : textures_by_address_list) + { + p.Do(it.first); + p.Do(it.second); + } + + size = static_cast(textures_by_hash_list.size()); + p.Do(size); + for (const auto& it : textures_by_hash_list) + { + p.Do(it.first); + p.Do(it.second); + } + + // Free the readback texture to potentially save host-mapped GPU memory, depending on where + // the driver mapped the staging buffer. + m_readback_texture.reset(); +} + +void TextureCacheBase::DoLoadState(PointerWrap& p) +{ + // Helper for getting a cache entry from an ID. + std::map id_map; + auto GetEntry = [&id_map](u32 id) { + auto iter = id_map.find(id); + return iter == id_map.end() ? nullptr : iter->second; + }; + + // Only clear out state when actually restoring/loading. + // Since we throw away entries when not in loading mode now, we don't need to check + // before inserting entries into the cache, as GetEntry will always return null. + const bool commit_state = p.GetMode() == PointerWrap::MODE_READ; + if (commit_state) + Invalidate(); + + // Preload all cache entries. + u32 size = 0; + p.Do(size); + for (u32 i = 0; i < size; i++) + { + // Even if the texture isn't valid, we still need to create the cache entry object + // to update the point in the state state. We'll just throw it away if it's invalid. + auto tex = g_texture_cache->DeserializeTexture(p); + TCacheEntry* entry = new TCacheEntry(std::move(tex->texture), std::move(tex->framebuffer)); + entry->textures_by_hash_iter = g_texture_cache->textures_by_hash.end(); + entry->DoState(p); + if (entry->texture && commit_state) + id_map.emplace(i, entry); + else + delete entry; + } + p.DoMarker("TextureCacheEntries"); + + // Link all cache entry references. + p.Do(size); + for (u32 i = 0; i < size; i++) + { + u32 id1 = 0, id2 = 0; + p.Do(id1); + p.Do(id2); + TCacheEntry* e1 = GetEntry(id1); + TCacheEntry* e2 = GetEntry(id2); + if (e1 && e2) + e1->CreateReference(e2); + } + + // Fill in address map. + p.Do(size); + for (u32 i = 0; i < size; i++) + { + u32 addr = 0; + u32 id = 0; + p.Do(addr); + p.Do(id); + + TCacheEntry* entry = GetEntry(id); + if (entry) + textures_by_address.emplace(addr, entry); + } + + // Fill in hash map. + p.Do(size); + for (u32 i = 0; i < size; i++) + { + u64 hash = 0; + u32 id = 0; + p.Do(hash); + p.Do(id); + + TCacheEntry* entry = GetEntry(id); + if (entry) + entry->textures_by_hash_iter = textures_by_hash.emplace(hash, entry); + } +} + +void TextureCacheBase::TCacheEntry::DoState(PointerWrap& p) +{ + p.Do(addr); + p.Do(size_in_bytes); + p.Do(base_hash); + p.Do(hash); + p.Do(format); + p.Do(memory_stride); + p.Do(is_efb_copy); + p.Do(is_custom_tex); + p.Do(may_have_overlapping_textures); + p.Do(tmem_only); + p.Do(has_arbitrary_mips); + p.Do(should_force_safe_hashing); + p.Do(is_xfb_copy); + p.Do(is_xfb_container); + p.Do(id); + p.Do(reference_changed); + p.Do(native_width); + p.Do(native_height); + p.Do(native_levels); + p.Do(frameCount); +} + TextureCacheBase::TCacheEntry* TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt) diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 12b39039dd..5e5a28b34b 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -24,6 +24,7 @@ class AbstractFramebuffer; class AbstractStagingTexture; +class PointerWrap; struct VideoConfig; struct TextureAndTLUTFormat @@ -185,6 +186,17 @@ public: u32 GetNumLevels() const { return texture->GetConfig().levels; } u32 GetNumLayers() const { return texture->GetConfig().layers; } AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; } + void DoState(PointerWrap& p); + }; + + // Minimal version of TCacheEntry just for TexPool + struct TexPoolEntry + { + std::unique_ptr texture; + std::unique_ptr framebuffer; + int frameCount = FRAMECOUNT_INVALID; + + TexPoolEntry(std::unique_ptr tex, std::unique_ptr fb); }; TextureCacheBase(); @@ -224,6 +236,13 @@ public: // Flushes all pending EFB copies to emulated RAM. void FlushEFBCopies(); + // Texture Serialization + void SerializeTexture(AbstractTexture* tex, const TextureConfig& config, PointerWrap& p); + std::optional DeserializeTexture(PointerWrap& p); + + // Save States + void DoState(PointerWrap& p); + // Returns false if the top/bottom row coefficients are zero. static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); @@ -256,15 +275,6 @@ protected: static std::bitset<8> valid_bind_points; private: - // Minimal version of TCacheEntry just for TexPool - struct TexPoolEntry - { - std::unique_ptr texture; - std::unique_ptr framebuffer; - int frameCount = FRAMECOUNT_INVALID; - - TexPoolEntry(std::unique_ptr tex, std::unique_ptr fb); - }; using TexAddrCache = std::multimap; using TexHashCache = std::multimap; using TexPool = std::unordered_multimap; @@ -319,6 +329,10 @@ private: // Returns an EFB copy staging texture to the pool, so it can be re-used. void ReleaseEFBCopyStagingTexture(std::unique_ptr tex); + bool CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format); + void DoSaveState(PointerWrap& p); + void DoLoadState(PointerWrap& p); + TexAddrCache textures_by_address; TexHashCache textures_by_hash; TexPool texture_pool; @@ -354,6 +368,11 @@ private: // List of pending EFB copies. It is important that the order is preserved for these, // so that overlapping textures are written to guest RAM in the order they are issued. std::vector m_pending_efb_copies; + + // Staging texture used for readbacks. + // We store this in the class so that the same staging texture can be used for multiple + // readbacks, saving the overhead of allocating a new buffer every time. + std::unique_ptr m_readback_texture; }; extern std::unique_ptr g_texture_cache; diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 527a836d63..f1150dc340 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -461,6 +461,16 @@ void VertexManagerBase::Flush() void VertexManagerBase::DoState(PointerWrap& p) { + if (p.GetMode() == PointerWrap::MODE_READ) + { + // Flush old vertex data before loading state. + Flush(); + + // Clear all caches that touch RAM + // (? these don't appear to touch any emulation state that gets saved. moved to on load only.) + VertexLoaderManager::MarkAllDirty(); + } + p.Do(m_zslope); } diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index ae628e5ab1..8ee7eb1202 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -241,7 +241,7 @@ void VideoBackendBase::DoState(PointerWrap& p) { if (!SConfig::GetInstance().bCPUThread) { - DoStateGPUThread(p); + VideoCommon_DoState(p); return; } @@ -255,34 +255,6 @@ void VideoBackendBase::DoState(PointerWrap& p) Fifo::GpuMaySleep(); } -void VideoBackendBase::DoStateGPUThread(PointerWrap& p) -{ - bool software = false; - p.Do(software); - - if (p.GetMode() == PointerWrap::MODE_READ && software == true) - { - // change mode to abort load of incompatible save state. - p.SetMode(PointerWrap::MODE_VERIFY); - } - - VideoCommon_DoState(p); - p.DoMarker("VideoCommon"); - - // Refresh state. - if (p.GetMode() == PointerWrap::MODE_READ) - { - // Inform backend of new state from registers. - g_vertex_manager->Flush(); - g_texture_cache->Invalidate(); - BPReload(); - - // Clear all caches that touch RAM - // (? these don't appear to touch any emulation state that gets saved. moved to on load only.) - VertexLoaderManager::MarkAllDirty(); - } -} - void VideoBackendBase::InitializeShared() { memset(&g_main_cp_state, 0, sizeof(g_main_cp_state)); diff --git a/Source/Core/VideoCommon/VideoBackendBase.h b/Source/Core/VideoCommon/VideoBackendBase.h index 7e68d93147..0a248dbd70 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.h +++ b/Source/Core/VideoCommon/VideoBackendBase.h @@ -66,9 +66,6 @@ public: // Wrapper function which pushes the event to the GPU thread. void DoState(PointerWrap& p); - // Function which handles the real state load/save logic. - void DoStateGPUThread(PointerWrap& p); - protected: void InitializeShared(); void ShutdownShared(); diff --git a/Source/Core/VideoCommon/VideoState.cpp b/Source/Core/VideoCommon/VideoState.cpp index ab84e1b001..0fdeebcfa1 100644 --- a/Source/Core/VideoCommon/VideoState.cpp +++ b/Source/Core/VideoCommon/VideoState.cpp @@ -13,6 +13,8 @@ #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" @@ -21,6 +23,15 @@ void VideoCommon_DoState(PointerWrap& p) { + bool software = false; + p.Do(software); + + if (p.GetMode() == PointerWrap::MODE_READ && software == true) + { + // change mode to abort load of incompatible save state. + p.SetMode(PointerWrap::MODE_VERIFY); + } + // BP Memory p.Do(bpmem); p.DoMarker("BP Memory"); @@ -63,5 +74,16 @@ void VideoCommon_DoState(PointerWrap& p) BoundingBox::DoState(p); p.DoMarker("BoundingBox"); - // TODO: search for more data that should be saved and add it here + g_texture_cache->DoState(p); + p.DoMarker("TextureCache"); + + g_renderer->DoState(p); + p.DoMarker("Renderer"); + + // Refresh state. + if (p.GetMode() == PointerWrap::MODE_READ) + { + // Inform backend of new state from registers. + BPReload(); + } }