diff --git a/Data/Sys/GameSettings/NAL.ini b/Data/Sys/GameSettings/NAL.ini index 459ee44e13..a03b2d8a22 100644 --- a/Data/Sys/GameSettings/NAL.ini +++ b/Data/Sys/GameSettings/NAL.ini @@ -14,3 +14,8 @@ [Video_Stereoscopy] StereoConvergence = 5000 + +[Video_Settings] +# This game creates a large number of EFB copies at different addresses, resulting +# in a large texture cache which takes considerable time to save. +SaveTextureCacheToState = False \ No newline at end of file diff --git a/Data/Sys/GameSettings/NAT.ini b/Data/Sys/GameSettings/NAT.ini new file mode 100644 index 0000000000..ede4c31766 --- /dev/null +++ b/Data/Sys/GameSettings/NAT.ini @@ -0,0 +1,18 @@ +# NATJ01, NATP01, NATE01 - Mario Tennis (Virtual Console) + +[Core] +# Values set here will override the main Dolphin settings. + +[OnLoad] +# Add memory patches to be loaded once on boot here. + +[OnFrame] +# Add memory patches to be applied every frame here. + +[ActionReplay] +# Add action replay cheats here. + +[Video_Settings] +# This game creates a large number of EFB copies at different addresses, resulting +# in a large texture cache which takes considerable time to save. +SaveTextureCacheToState = False \ No newline at end of file diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 30ed2d6959..83baf5ae05 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -91,6 +91,8 @@ const ConfigInfo GFX_SHADER_COMPILER_THREADS{ {System::GFX, "Settings", "ShaderCompilerThreads"}, 1}; const ConfigInfo GFX_SHADER_PRECOMPILER_THREADS{ {System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1}; +const ConfigInfo GFX_SAVE_TEXTURE_CACHE_TO_STATE{ + {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; const ConfigInfo GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true}; const ConfigInfo GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true}; diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 1946e704fc..dc8d501fab 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -67,6 +67,7 @@ extern const ConfigInfo GFX_WAIT_FOR_SHADERS_BEFORE_STARTING; extern const ConfigInfo GFX_SHADER_COMPILATION_MODE; extern const ConfigInfo GFX_SHADER_COMPILER_THREADS; extern const ConfigInfo GFX_SHADER_PRECOMPILER_THREADS; +extern const ConfigInfo GFX_SAVE_TEXTURE_CACHE_TO_STATE; extern const ConfigInfo GFX_SW_ZCOMPLOC; extern const ConfigInfo GFX_SW_ZFREEZE; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index b60693000b..15711a65b8 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -90,6 +90,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_SHADER_COMPILATION_MODE.location, Config::GFX_SHADER_COMPILER_THREADS.location, Config::GFX_SHADER_PRECOMPILER_THREADS.location, + Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE.location, Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location, diff --git a/Source/Core/Core/Core.cpp b/Source/Core/Core/Core.cpp index bdd3fcaf10..f1e3161220 100644 --- a/Source/Core/Core/Core.cpp +++ b/Source/Core/Core/Core.cpp @@ -21,6 +21,7 @@ #include "Common/CPUDetect.h" #include "Common/CommonPaths.h" #include "Common/CommonTypes.h" +#include "Common/Event.h" #include "Common/FileUtil.h" #include "Common/Flag.h" #include "Common/Logging/LogManager.h" @@ -110,6 +111,7 @@ struct HostJob }; static std::mutex s_host_jobs_lock; static std::queue s_host_jobs_queue; +static Common::Event s_cpu_thread_job_finished; static thread_local bool tls_is_cpu_thread = false; @@ -433,6 +435,7 @@ static void EmuThread(std::unique_ptr boot, WindowSystemInfo wsi Common::ScopeGuard movie_guard{Movie::Shutdown}; HW::Init(); + Common::ScopeGuard hw_guard{[] { // We must set up this flag before executing HW::Shutdown() s_hardware_initialized = false; @@ -771,6 +774,45 @@ void RunAsCPUThread(std::function function) PauseAndLock(false, was_unpaused); } +void RunOnCPUThread(std::function function, bool wait_for_completion) +{ + // If the CPU thread is not running, assume there is no active CPU thread we can race against. + if (!IsRunning() || IsCPUThread()) + { + function(); + return; + } + + // Pause the CPU (set it to stepping mode). + const bool was_running = PauseAndLock(true, true); + + // Queue the job function. + if (wait_for_completion) + { + // Trigger the event after executing the function. + s_cpu_thread_job_finished.Reset(); + CPU::AddCPUThreadJob([&function]() { + function(); + s_cpu_thread_job_finished.Set(); + }); + } + else + { + CPU::AddCPUThreadJob(std::move(function)); + } + + // Release the CPU thread, and let it execute the callback. + PauseAndLock(false, was_running); + + // If we're waiting for completion, block until the event fires. + if (wait_for_completion) + { + // Periodically yield to the UI thread, so we don't deadlock. + while (!s_cpu_thread_job_finished.WaitFor(std::chrono::milliseconds(10))) + Host_YieldToUI(); + } +} + // Display FPS info // This should only be called from VI void VideoThrottle() diff --git a/Source/Core/Core/Core.h b/Source/Core/Core/Core.h index fdd30a539a..26e30a2d8a 100644 --- a/Source/Core/Core/Core.h +++ b/Source/Core/Core/Core.h @@ -82,6 +82,10 @@ void UpdateTitle(); // This should only be called from the CPU thread or the host thread. void RunAsCPUThread(std::function function); +// Run a function on the CPU thread, asynchronously. +// This is only valid to call from the host thread, since it uses PauseAndLock() internally. +void RunOnCPUThread(std::function function, bool wait_for_completion); + // for calling back into UI code without introducing a dependency on it in core using StateChangedCallbackFunc = std::function; void SetOnStateChangedCallback(StateChangedCallbackFunc callback); diff --git a/Source/Core/Core/HW/CPU.cpp b/Source/Core/Core/HW/CPU.cpp index a1b21ea7a2..b8243a0452 100644 --- a/Source/Core/Core/HW/CPU.cpp +++ b/Source/Core/Core/HW/CPU.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "AudioCommon/AudioCommon.h" #include "Common/CommonTypes.h" @@ -44,6 +45,7 @@ static bool s_state_paused_and_locked = false; static bool s_state_system_request_stepping = false; static bool s_state_cpu_step_instruction = false; static Common::Event* s_state_cpu_step_instruction_sync = nullptr; +static std::queue> s_pending_jobs; void Init(PowerPC::CPUCore cpu_core) { @@ -60,6 +62,9 @@ void Shutdown() // Requires holding s_state_change_lock static void FlushStepSyncEventLocked() { + if (!s_state_cpu_step_instruction) + return; + if (s_state_cpu_step_instruction_sync) { s_state_cpu_step_instruction_sync->Set(); @@ -68,12 +73,25 @@ static void FlushStepSyncEventLocked() s_state_cpu_step_instruction = false; } +static void ExecutePendingJobs(std::unique_lock& state_lock) +{ + while (!s_pending_jobs.empty()) + { + auto callback = s_pending_jobs.front(); + s_pending_jobs.pop(); + state_lock.unlock(); + callback(); + state_lock.lock(); + } +} + void Run() { std::unique_lock state_lock(s_state_change_lock); while (s_state != State::PowerDown) { s_state_cpu_cvar.wait(state_lock, [] { return !s_state_paused_and_locked; }); + ExecutePendingJobs(state_lock); switch (s_state) { @@ -108,8 +126,10 @@ void Run() case State::Stepping: // Wait for step command. - s_state_cpu_cvar.wait(state_lock, - [] { return s_state_cpu_step_instruction || !IsStepping(); }); + s_state_cpu_cvar.wait(state_lock, [&state_lock] { + ExecutePendingJobs(state_lock); + return s_state_cpu_step_instruction || !IsStepping(); + }); if (!IsStepping()) { // Signal event if the mode changes. @@ -330,4 +350,11 @@ bool PauseAndLock(bool do_lock, bool unpause_on_unlock, bool control_adjacent) } return was_unpaused; } + +void AddCPUThreadJob(std::function function) +{ + std::unique_lock state_lock(s_state_change_lock); + s_pending_jobs.push(std::move(function)); +} + } // namespace CPU diff --git a/Source/Core/Core/HW/CPU.h b/Source/Core/Core/HW/CPU.h index 408b82ace2..26ffa6783c 100644 --- a/Source/Core/Core/HW/CPU.h +++ b/Source/Core/Core/HW/CPU.h @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #pragma once +#include namespace Common { @@ -74,4 +75,8 @@ const State* GetStatePtr(); // "control_adjacent" causes PauseAndLock to behave like EnableStepping by modifying the // state of the Audio and FIFO subsystems as well. bool PauseAndLock(bool do_lock, bool unpause_on_unlock = true, bool control_adjacent = false); + +// Adds a job to be executed during on the CPU thread. This should be combined with PauseAndLock(), +// as while the CPU is in the run loop, it won't execute the function. +void AddCPUThreadJob(std::function function); } // namespace CPU diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 42a2fa792b..cd4544b8f1 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -63,7 +63,7 @@ static AfterLoadCallbackFunc s_on_after_load_callback; // Temporary undo state buffer static std::vector g_undo_load_buffer; static std::vector g_current_buffer; -static int g_loadDepth = 0; +static bool s_load_or_save_in_progress; static std::mutex g_cs_undo_load_buffer; static std::mutex g_cs_current_buffer; @@ -72,7 +72,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 110; // Last changed in PR 8036 +static const u32 STATE_VERSION = 111; // Last changed in PR 6321 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, @@ -170,6 +170,11 @@ static void DoState(PointerWrap& p) return; } + // Movie must be done before the video backend, because the window is redrawn in the video backend + // state load, and the frame number must be up-to-date. + Movie::DoState(p); + p.DoMarker("Movie"); + // Begin with video backend, so that it gets a chance to clear its caches and writeback modified // things to RAM g_video_backend->DoState(p); @@ -186,8 +191,6 @@ static void DoState(PointerWrap& p) if (SConfig::GetInstance().bWii) Wiimote::DoState(p); p.DoMarker("Wiimote"); - Movie::DoState(p); - p.DoMarker("Movie"); Gecko::DoState(p); p.DoMarker("Gecko"); @@ -204,27 +207,31 @@ void LoadFromBuffer(std::vector& buffer) return; } - Core::RunAsCPUThread([&] { - u8* ptr = &buffer[0]; - PointerWrap p(&ptr, PointerWrap::MODE_READ); - DoState(p); - }); + Core::RunOnCPUThread( + [&] { + u8* ptr = &buffer[0]; + PointerWrap p(&ptr, PointerWrap::MODE_READ); + DoState(p); + }, + true); } void SaveToBuffer(std::vector& buffer) { - Core::RunAsCPUThread([&] { - u8* ptr = nullptr; - PointerWrap p(&ptr, PointerWrap::MODE_MEASURE); + Core::RunOnCPUThread( + [&] { + u8* ptr = nullptr; + PointerWrap p(&ptr, PointerWrap::MODE_MEASURE); - DoState(p); - const size_t buffer_size = reinterpret_cast(ptr); - buffer.resize(buffer_size); + DoState(p); + const size_t buffer_size = reinterpret_cast(ptr); + buffer.resize(buffer_size); - ptr = &buffer[0]; - p.SetMode(PointerWrap::MODE_WRITE); - DoState(p); - }); + ptr = &buffer[0]; + p.SetMode(PointerWrap::MODE_WRITE); + DoState(p); + }, + true); } // return state number not in map @@ -381,42 +388,51 @@ static void CompressAndDumpState(CompressAndDumpState_args save_args) void SaveAs(const std::string& filename, bool wait) { - Core::RunAsCPUThread([&] { - // Measure the size of the buffer. - u8* ptr = nullptr; - PointerWrap p(&ptr, PointerWrap::MODE_MEASURE); - DoState(p); - const size_t buffer_size = reinterpret_cast(ptr); + if (s_load_or_save_in_progress) + return; - // Then actually do the write. - { - std::lock_guard lk(g_cs_current_buffer); - g_current_buffer.resize(buffer_size); - ptr = &g_current_buffer[0]; - p.SetMode(PointerWrap::MODE_WRITE); - DoState(p); - } + s_load_or_save_in_progress = true; - if (p.GetMode() == PointerWrap::MODE_WRITE) - { - Core::DisplayMessage("Saving State...", 1000); + Core::RunOnCPUThread( + [&] { + // Measure the size of the buffer. + u8* ptr = nullptr; + PointerWrap p(&ptr, PointerWrap::MODE_MEASURE); + DoState(p); + const size_t buffer_size = reinterpret_cast(ptr); - CompressAndDumpState_args save_args; - save_args.buffer_vector = &g_current_buffer; - save_args.buffer_mutex = &g_cs_current_buffer; - save_args.filename = filename; - save_args.wait = wait; + // Then actually do the write. + { + std::lock_guard lk(g_cs_current_buffer); + g_current_buffer.resize(buffer_size); + ptr = &g_current_buffer[0]; + p.SetMode(PointerWrap::MODE_WRITE); + DoState(p); + } - Flush(); - g_save_thread = std::thread(CompressAndDumpState, save_args); - g_compressAndDumpStateSyncEvent.Wait(); - } - else - { - // someone aborted the save by changing the mode? - Core::DisplayMessage("Unable to save: Internal DoState Error", 4000); - } - }); + if (p.GetMode() == PointerWrap::MODE_WRITE) + { + Core::DisplayMessage("Saving State...", 1000); + + CompressAndDumpState_args save_args; + save_args.buffer_vector = &g_current_buffer; + save_args.buffer_mutex = &g_cs_current_buffer; + save_args.filename = filename; + save_args.wait = wait; + + Flush(); + g_save_thread = std::thread(CompressAndDumpState, save_args); + g_compressAndDumpStateSyncEvent.Wait(); + } + else + { + // someone aborted the save by changing the mode? + Core::DisplayMessage("Unable to save: Internal DoState Error", 4000); + } + }, + true); + + s_load_or_save_in_progress = false; } bool ReadHeader(const std::string& filename, StateHeader& header) @@ -515,7 +531,7 @@ static void LoadFileStateData(const std::string& filename, std::vector& ret_ void LoadAs(const std::string& filename) { - if (!Core::IsRunning()) + if (!Core::IsRunning() || s_load_or_save_in_progress) { return; } @@ -525,64 +541,65 @@ void LoadAs(const std::string& filename) return; } - Core::RunAsCPUThread([&] { - g_loadDepth++; + s_load_or_save_in_progress = true; - // Save temp buffer for undo load state - if (!Movie::IsJustStartingRecordingInputFromSaveState()) - { - std::lock_guard lk(g_cs_undo_load_buffer); - SaveToBuffer(g_undo_load_buffer); - if (Movie::IsMovieActive()) - Movie::SaveRecording(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"); - else if (File::Exists(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm")) - File::Delete(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"); - } + Core::RunOnCPUThread( + [&] { + // Save temp buffer for undo load state + if (!Movie::IsJustStartingRecordingInputFromSaveState()) + { + std::lock_guard lk(g_cs_undo_load_buffer); + SaveToBuffer(g_undo_load_buffer); + if (Movie::IsMovieActive()) + Movie::SaveRecording(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"); + else if (File::Exists(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm")) + File::Delete(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"); + } - bool loaded = false; - bool loadedSuccessfully = false; + bool loaded = false; + bool loadedSuccessfully = false; - // brackets here are so buffer gets freed ASAP - { - std::vector buffer; - LoadFileStateData(filename, buffer); + // brackets here are so buffer gets freed ASAP + { + std::vector buffer; + LoadFileStateData(filename, buffer); - if (!buffer.empty()) - { - u8* ptr = &buffer[0]; - PointerWrap p(&ptr, PointerWrap::MODE_READ); - DoState(p); - loaded = true; - loadedSuccessfully = (p.GetMode() == PointerWrap::MODE_READ); - } - } + if (!buffer.empty()) + { + u8* ptr = &buffer[0]; + PointerWrap p(&ptr, PointerWrap::MODE_READ); + DoState(p); + loaded = true; + loadedSuccessfully = (p.GetMode() == PointerWrap::MODE_READ); + } + } - if (loaded) - { - if (loadedSuccessfully) - { - Core::DisplayMessage(StringFromFormat("Loaded state from %s", filename.c_str()), 2000); - if (File::Exists(filename + ".dtm")) - Movie::LoadInput(filename + ".dtm"); - else if (!Movie::IsJustStartingRecordingInputFromSaveState() && - !Movie::IsJustStartingPlayingInputFromSaveState()) - Movie::EndPlayInput(false); - } - else - { - Core::DisplayMessage("The savestate could not be loaded", OSD::Duration::NORMAL); + if (loaded) + { + if (loadedSuccessfully) + { + Core::DisplayMessage(StringFromFormat("Loaded state from %s", filename.c_str()), 2000); + if (File::Exists(filename + ".dtm")) + Movie::LoadInput(filename + ".dtm"); + else if (!Movie::IsJustStartingRecordingInputFromSaveState() && + !Movie::IsJustStartingPlayingInputFromSaveState()) + Movie::EndPlayInput(false); + } + else + { + Core::DisplayMessage("The savestate could not be loaded", OSD::Duration::NORMAL); - // since we could be in an inconsistent state now (and might crash or whatever), undo. - if (g_loadDepth < 2) - UndoLoadState(); - } - } + // since we could be in an inconsistent state now (and might crash or whatever), undo. + UndoLoadState(); + } + } - if (s_on_after_load_callback) - s_on_after_load_callback(); + if (s_on_after_load_callback) + s_on_after_load_callback(); + }, + true); - g_loadDepth--; - }); + s_load_or_save_in_progress = false; } void SetOnAfterLoadCallback(AfterLoadCallbackFunc callback) diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp index 319b7eb309..a615f60896 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp @@ -100,10 +100,13 @@ void HacksWidget::CreateWidgets() m_disable_bounding_box = new GraphicsBool(tr("Disable Bounding Box"), Config::GFX_HACK_BBOX_ENABLE, true); m_vertex_rounding = new GraphicsBool(tr("Vertex Rounding"), Config::GFX_HACK_VERTEX_ROUDING); + m_save_texture_cache_state = + new GraphicsBool(tr("Save Texture Cache to State"), Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE); other_layout->addWidget(m_fast_depth_calculation, 0, 0); other_layout->addWidget(m_disable_bounding_box, 0, 1); other_layout->addWidget(m_vertex_rounding, 1, 0); + other_layout->addWidget(m_save_texture_cache_state, 1, 1); main_layout->addWidget(efb_box); main_layout->addWidget(texture_cache_box); @@ -244,6 +247,10 @@ void HacksWidget::AddDescriptions() static const char TR_DISABLE_BOUNDINGBOX_DESCRIPTION[] = QT_TR_NOOP("Disables bounding box emulation.\n\nThis may improve GPU performance " "significantly, but some games will break.\n\nIf unsure, leave this checked."); + static const char TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION[] = QT_TR_NOOP( + "Includes the contents of the embedded frame buffer (EFB) and upscaled EFB copies " + "in save states. Fixes missing and/or non-upscaled textures/objects when loading " + "states at the cost of additional save/load time.\n\nIf unsure, leave this checked."); static const char TR_VERTEX_ROUNDING_DESCRIPTION[] = QT_TR_NOOP("Rounds 2D vertices to whole pixels.\n\nFixes graphical problems in some games at " "higher internal resolutions. This setting has no effect when native internal " @@ -259,6 +266,7 @@ void HacksWidget::AddDescriptions() AddDescription(m_gpu_texture_decoding, TR_GPU_DECODING_DESCRIPTION); AddDescription(m_fast_depth_calculation, TR_FAST_DEPTH_CALC_DESCRIPTION); AddDescription(m_disable_bounding_box, TR_DISABLE_BOUNDINGBOX_DESCRIPTION); + AddDescription(m_save_texture_cache_state, TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION); AddDescription(m_vertex_rounding, TR_VERTEX_ROUNDING_DESCRIPTION); } diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h index 47de3c0287..d46cb67932 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h @@ -42,6 +42,7 @@ private: QCheckBox* m_fast_depth_calculation; QCheckBox* m_disable_bounding_box; QCheckBox* m_vertex_rounding; + QCheckBox* m_save_texture_cache_state; QCheckBox* m_defer_efb_copies; void CreateWidgets(); diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index 11a37afdfa..a824d0020a 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -11,6 +11,7 @@ #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoState.h" AsyncRequests AsyncRequests::s_singleton; @@ -154,6 +155,10 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) case Event::PERF_QUERY: g_perf_query->FlushResults(); break; + + case Event::DO_SAVE_STATE: + VideoCommon_DoState(*e.do_save_state.p); + break; } } diff --git a/Source/Core/VideoCommon/AsyncRequests.h b/Source/Core/VideoCommon/AsyncRequests.h index acd665b3b1..dc81667586 100644 --- a/Source/Core/VideoCommon/AsyncRequests.h +++ b/Source/Core/VideoCommon/AsyncRequests.h @@ -13,6 +13,7 @@ #include "Common/Flag.h" struct EfbPokeData; +class PointerWrap; class AsyncRequests { @@ -28,6 +29,7 @@ public: SWAP_EVENT, BBOX_READ, PERF_QUERY, + DO_SAVE_STATE, } type; u64 time; @@ -64,6 +66,11 @@ public: struct { } perf_query; + + struct + { + PointerWrap* p; + } do_save_state; }; }; diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 12fc46b209..fdfca161b9 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -68,9 +68,6 @@ static void BPWritten(const BPCmd& bp) ---------------------------------------------------------------------------------------------------------------- */ - // check for invalid state, else unneeded configuration are built - g_video_backend->CheckInvalidState(); - if (((s32*)&bpmem)[bp.address] == bp.newvalue) { if (!(bp.address == BPMEM_TRIGGER_EFB_COPY || bp.address == BPMEM_CLEARBBOX1 || diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 36a9ee7307..f63aaff805 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -299,14 +299,15 @@ void RunGpuLoop() [] { const SConfig& param = SConfig::GetInstance(); + // Run events from the CPU thread. + AsyncRequests::GetInstance()->PullEvents(); + // Do nothing while paused if (!s_emu_running_state.IsSet()) return; if (s_use_deterministic_gpu_thread) { - AsyncRequests::GetInstance()->PullEvents(); - // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. u8* seen_ptr = s_video_buffer_seen_ptr; u8* write_ptr = s_video_buffer_write_ptr; @@ -321,9 +322,6 @@ void RunGpuLoop() else { CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; - - AsyncRequests::GetInstance()->PullEvents(); - CommandProcessor::SetCPStatusFromGPU(); // check if we are able to run this buffer diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index 6f4e132c06..f61ebcf4a9 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -7,8 +7,10 @@ #include "VideoCommon/FramebufferShaderGen.h" #include "VideoCommon/VertexManagerBase.h" +#include "Common/ChunkFile.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" +#include "Core/Config/GraphicsSettings.h" #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractShader.h" @@ -464,6 +466,20 @@ bool FramebufferManager::CompileReadbackPipelines() return false; } + // EFB restore pipeline + auto restore_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateEFBRestorePixelShader()); + if (!restore_shader) + return false; + + config.framebuffer_state = GetEFBFramebufferState(); + config.framebuffer_state.per_sample_shading = false; + config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader(); + config.pixel_shader = restore_shader.get(); + m_efb_restore_pipeline = g_renderer->CreatePipeline(config); + if (!m_efb_restore_pipeline) + return false; + return true; } @@ -842,3 +858,107 @@ void FramebufferManager::DestroyPokePipelines() m_color_poke_pipeline.reset(); m_poke_vertex_format.reset(); } + +void FramebufferManager::DoState(PointerWrap& p) +{ + FlushEFBPokes(); + + bool save_efb_state = Config::Get(Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE); + p.Do(save_efb_state); + if (!save_efb_state) + return; + + if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE) + DoSaveState(p); + else + DoLoadState(p); +} + +void FramebufferManager::DoSaveState(PointerWrap& p) +{ + // For multisampling, we need to resolve first before we can save. + // This won't be bit-exact when loading, which could cause interesting rendering side-effects for + // a frame. But whatever, MSAA doesn't exactly behave that well anyway. + AbstractTexture* color_texture = ResolveEFBColorTexture(m_efb_color_texture->GetRect()); + AbstractTexture* depth_texture = ResolveEFBDepthTexture(m_efb_depth_texture->GetRect()); + + // We don't want to save these as rendertarget textures, just the data itself when deserializing. + const TextureConfig color_texture_config(color_texture->GetWidth(), color_texture->GetHeight(), + color_texture->GetLevels(), color_texture->GetLayers(), + 1, GetEFBColorFormat(), 0); + g_texture_cache->SerializeTexture(color_texture, color_texture_config, p); + + if (GetEFBDepthFormat() == AbstractTextureFormat::D32F) + { + const TextureConfig depth_texture_config( + depth_texture->GetWidth(), depth_texture->GetHeight(), depth_texture->GetLevels(), + depth_texture->GetLayers(), 1, + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), 0); + g_texture_cache->SerializeTexture(depth_texture, depth_texture_config, p); + } + else + { + // If the EFB is backed by a D24S8 texture, we first have to convert it to R32F. + const TextureConfig temp_texture_config(depth_texture->GetWidth(), depth_texture->GetHeight(), + depth_texture->GetLevels(), depth_texture->GetLayers(), + 1, AbstractTextureFormat::R32F, + AbstractTextureFlag_RenderTarget); + std::unique_ptr temp_texture = g_renderer->CreateTexture(temp_texture_config); + std::unique_ptr temp_fb = + g_renderer->CreateFramebuffer(temp_texture.get(), nullptr); + if (temp_texture && temp_fb) + { + g_renderer->ScaleTexture(temp_fb.get(), temp_texture->GetRect(), depth_texture, + depth_texture->GetRect()); + + const TextureConfig depth_texture_config( + depth_texture->GetWidth(), depth_texture->GetHeight(), depth_texture->GetLevels(), + depth_texture->GetLayers(), 1, temp_texture->GetFormat(), 0); + g_texture_cache->SerializeTexture(depth_texture, depth_texture_config, p); + } + else + { + PanicAlert("Failed to create temp texture for depth saving"); + g_texture_cache->SerializeTexture(color_texture, color_texture_config, p); + } + } +} + +void FramebufferManager::DoLoadState(PointerWrap& p) +{ + // Invalidate any peek cache tiles. + InvalidatePeekCache(true); + + // Deserialize the color and depth textures. This could fail. + auto color_tex = g_texture_cache->DeserializeTexture(p); + auto depth_tex = g_texture_cache->DeserializeTexture(p); + + // If the stereo mode is different in the save state, throw it away. + if (!color_tex || !depth_tex || + color_tex->texture->GetLayers() != m_efb_color_texture->GetLayers()) + { + WARN_LOG(VIDEO, "Failed to deserialize EFB contents. Clearing instead."); + g_renderer->SetAndClearFramebuffer( + m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, + g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : 0.0f); + return; + } + + // Size differences are okay here, since the linear filtering will downscale/upscale it. + // Depth buffer is always point sampled, since we don't want to interpolate depth values. + const bool rescale = color_tex->texture->GetWidth() != m_efb_color_texture->GetWidth() || + color_tex->texture->GetHeight() != m_efb_color_texture->GetHeight(); + + // Draw the deserialized textures over the EFB. + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_efb_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect()); + g_renderer->SetPipeline(m_efb_restore_pipeline.get()); + g_renderer->SetTexture(0, color_tex->texture.get()); + g_renderer->SetTexture(1, depth_tex->texture.get()); + g_renderer->SetSamplerState(0, rescale ? RenderState::GetLinearSamplerState() : + RenderState::GetPointSamplerState()); + g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); +} diff --git a/Source/Core/VideoCommon/FramebufferManager.h b/Source/Core/VideoCommon/FramebufferManager.h index b97d45b31e..b4ae99361d 100644 --- a/Source/Core/VideoCommon/FramebufferManager.h +++ b/Source/Core/VideoCommon/FramebufferManager.h @@ -17,6 +17,7 @@ #include "VideoCommon/TextureConfig.h" class NativeVertexFormat; +class PointerWrap; enum class EFBReinterpretType { @@ -95,6 +96,9 @@ public: void PokeEFBDepth(u32 x, u32 y, float depth); void FlushEFBPokes(); + // Save state load/save. + void DoState(PointerWrap& p); + protected: struct EFBPokeVertex { @@ -145,6 +149,9 @@ protected: void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count, const AbstractPipeline* pipeline); + void DoLoadState(PointerWrap& p); + void DoSaveState(PointerWrap& p); + std::unique_ptr m_efb_color_texture; std::unique_ptr m_efb_convert_color_texture; std::unique_ptr m_efb_depth_texture; @@ -156,6 +163,9 @@ protected: std::unique_ptr m_efb_depth_resolve_framebuffer; std::unique_ptr m_efb_depth_resolve_pipeline; + // Pipeline for restoring the contents of the EFB from a save state + std::unique_ptr m_efb_restore_pipeline; + // Format conversion shaders std::array, 6> m_format_conversion_pipelines; diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index 5789c0a0d2..00ff753d92 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -644,4 +644,24 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF return ss.str(); } +std::string GenerateEFBRestorePixelShader() +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 2, false); + EmitPixelMainDeclaration(ss, 1, 0, "float4", + GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : ""); + ss << "{\n"; + ss << " float3 coords = float3(v_tex0.x, " + << (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin ? "1.0 - " : "") + << "v_tex0.y, v_tex0.z);\n"; + ss << " ocol0 = "; + EmitSampleTexture(ss, 0, "coords"); + ss << ";\n"; + ss << " " << (GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth") << " = "; + EmitSampleTexture(ss, 1, "coords"); + ss << ".r;\n"; + ss << "}\n"; + return ss.str(); +} + } // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.h b/Source/Core/VideoCommon/FramebufferShaderGen.h index b0134b5897..2ec50b4d76 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.h +++ b/Source/Core/VideoCommon/FramebufferShaderGen.h @@ -30,5 +30,6 @@ std::string GenerateEFBPokeVertexShader(); std::string GenerateColorPixelShader(); std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples); std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format); +std::string GenerateEFBRestorePixelShader(); } // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 40b6f9d51e..48b8e4cfd3 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -25,6 +25,7 @@ #include #include "Common/Assert.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Config/Config.h" #include "Common/Event.h" @@ -1324,8 +1325,11 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6 } // Update our last xfb values - m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width; - m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height; + m_last_xfb_addr = xfb_addr; + m_last_xfb_ticks = ticks; + m_last_xfb_width = fb_width; + m_last_xfb_stride = fb_stride; + m_last_xfb_height = fb_height; } else { @@ -1681,6 +1685,27 @@ bool Renderer::UseVertexDepthRange() const return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f; } +void Renderer::DoState(PointerWrap& p) +{ + p.Do(m_aspect_wide); + p.Do(m_frame_count); + p.Do(m_prev_efb_format); + p.Do(m_last_xfb_ticks); + p.Do(m_last_xfb_addr); + p.Do(m_last_xfb_width); + p.Do(m_last_xfb_stride); + p.Do(m_last_xfb_height); + + if (p.GetMode() == PointerWrap::MODE_READ) + { + // Force the next xfb to be displayed. + m_last_xfb_id = std::numeric_limits::max(); + + // And actually display it. + Swap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height, m_last_xfb_ticks); + } +} + std::unique_ptr Renderer::CreateAsyncShaderCompiler() { return std::make_unique(); diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 584f07bc41..116080a1a8 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -41,6 +41,7 @@ class AbstractTexture; class AbstractStagingTexture; class NativeVertexFormat; class NetPlayChatUI; +class PointerWrap; struct TextureConfig; struct ComputePipelineConfig; struct AbstractPipelineConfig; @@ -237,6 +238,7 @@ public: void ChangeSurface(void* new_surface_handle); void ResizeSurface(); bool UseVertexDepthRange() const; + void DoState(PointerWrap& p); virtual std::unique_ptr CreateAsyncShaderCompiler(); @@ -356,9 +358,10 @@ private: // Tracking of XFB textures so we don't render duplicate frames. u64 m_last_xfb_id = std::numeric_limits::max(); - - // Note: Only used for auto-ir + u64 m_last_xfb_ticks = 0; + u32 m_last_xfb_addr = 0; u32 m_last_xfb_width = 0; + u32 m_last_xfb_stride = 0; u32 m_last_xfb_height = 0; // NOTE: The methods below are called on the framedumping thread. diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index fb21603b6a..2f330bb2bc 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -15,6 +15,7 @@ #include "Common/Align.h" #include "Common/Assert.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/FileUtil.h" #include "Common/Hash.h" @@ -23,6 +24,7 @@ #include "Common/MemoryUtil.h" #include "Common/StringUtil.h" +#include "Core/Config/GraphicsSettings.h" #include "Core/ConfigManager.h" #include "Core/FifoPlayer/FifoPlayer.h" #include "Core/FifoPlayer/FifoRecorder.h" @@ -404,6 +406,329 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer))); } +bool TextureCacheBase::CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format) +{ + if (m_readback_texture && m_readback_texture->GetConfig().width >= width && + m_readback_texture->GetConfig().height >= height && + m_readback_texture->GetConfig().format == format) + { + return true; + } + + TextureConfig staging_config(std::max(width, 128u), std::max(height, 128u), 1, 1, 1, format, 0); + m_readback_texture.reset(); + m_readback_texture = + g_renderer->CreateStagingTexture(StagingTextureType::Readback, staging_config); + return m_readback_texture != nullptr; +} + +void TextureCacheBase::SerializeTexture(AbstractTexture* tex, const TextureConfig& config, + PointerWrap& p) +{ + // If we're in measure mode, skip the actual readback to save some time. + const bool skip_readback = p.GetMode() == PointerWrap::MODE_MEASURE; + p.DoPOD(config); + + std::vector texture_data; + if (skip_readback || CheckReadbackTexture(config.width, config.height, config.format)) + { + // Save out each layer of the texture to the staging texture, and then + // append it onto the end of the vector. This gives us all the sub-images + // in one single buffer which can be written out to the save state. + for (u32 layer = 0; layer < config.layers; layer++) + { + for (u32 level = 0; level < config.levels; level++) + { + u32 level_width = std::max(config.width >> level, 1u); + u32 level_height = std::max(config.height >> level, 1u); + auto rect = tex->GetConfig().GetMipRect(level); + if (!skip_readback) + m_readback_texture->CopyFromTexture(tex, rect, layer, level, rect); + + size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width); + size_t size = stride * level_height; + size_t start = texture_data.size(); + texture_data.resize(texture_data.size() + size); + if (!skip_readback) + m_readback_texture->ReadTexels(rect, &texture_data[start], static_cast(stride)); + } + } + } + else + { + PanicAlert("Failed to create staging texture for serialization"); + } + + p.Do(texture_data); +} + +std::optional TextureCacheBase::DeserializeTexture(PointerWrap& p) +{ + TextureConfig config; + p.Do(config); + + std::vector texture_data; + p.Do(texture_data); + + if (p.GetMode() != PointerWrap::MODE_READ || texture_data.empty()) + return std::nullopt; + + auto tex = AllocateTexture(config); + if (!tex) + { + PanicAlert("Failed to create texture for deserialization"); + return std::nullopt; + } + + size_t start = 0; + for (u32 layer = 0; layer < config.layers; layer++) + { + for (u32 level = 0; level < config.levels; level++) + { + u32 level_width = std::max(config.width >> level, 1u); + u32 level_height = std::max(config.height >> level, 1u); + size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width); + size_t size = stride * level_height; + if ((start + size) > texture_data.size()) + { + ERROR_LOG(VIDEO, "Insufficient texture data for layer %u level %u", layer, level); + return tex; + } + + tex->texture->Load(level, level_width, level_height, level_width, &texture_data[start], size); + start += size; + } + } + + return tex; +} + +void TextureCacheBase::DoState(PointerWrap& p) +{ + // Flush all pending XFB copies before either loading or saving. + FlushEFBCopies(); + + p.Do(last_entry_id); + + if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE) + DoSaveState(p); + else + DoLoadState(p); +} + +void TextureCacheBase::DoSaveState(PointerWrap& p) +{ + std::map entry_map; + std::vector entries_to_save; + auto ShouldSaveEntry = [](const TCacheEntry* entry) { + // We skip non-copies as they can be decoded from RAM when the state is loaded. + // Storing them would duplicate data in the save state file, adding to decompression time. + return entry->IsCopy(); + }; + auto AddCacheEntryToMap = [&entry_map, &entries_to_save, &p](TCacheEntry* entry) -> u32 { + auto iter = entry_map.find(entry); + if (iter != entry_map.end()) + return iter->second; + + // Since we are sequentially allocating texture entries, we need to save the textures in the + // same order they were collected. This is because of iterating both the address and hash maps. + // Therefore, the map is used for fast lookup, and the vector for ordering. + u32 id = static_cast(entry_map.size()); + entry_map.emplace(entry, id); + entries_to_save.push_back(entry); + return id; + }; + auto GetCacheEntryId = [&entry_map](const TCacheEntry* entry) -> std::optional { + auto iter = entry_map.find(entry); + return iter != entry_map.end() ? std::make_optional(iter->second) : std::nullopt; + }; + + // Transform the textures_by_address and textures_by_hash maps to a mapping + // of address/hash to entry ID. + std::vector> textures_by_address_list; + std::vector> textures_by_hash_list; + if (Config::Get(Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE)) + { + for (const auto& it : textures_by_address) + { + if (ShouldSaveEntry(it.second)) + { + u32 id = AddCacheEntryToMap(it.second); + textures_by_address_list.push_back(std::make_pair(it.first, id)); + } + } + for (const auto& it : textures_by_hash) + { + if (ShouldSaveEntry(it.second)) + { + u32 id = AddCacheEntryToMap(it.second); + textures_by_hash_list.push_back(std::make_pair(it.first, id)); + } + } + } + + // Save the texture cache entries out in the order the were referenced. + u32 size = static_cast(entries_to_save.size()); + p.Do(size); + for (TCacheEntry* entry : entries_to_save) + { + g_texture_cache->SerializeTexture(entry->texture.get(), entry->texture->GetConfig(), p); + entry->DoState(p); + } + p.DoMarker("TextureCacheEntries"); + + // Save references for each cache entry. + // As references are circular, we need to have everything created before linking entries. + std::set> reference_pairs; + for (const auto& it : entry_map) + { + const TCacheEntry* entry = it.first; + auto id1 = GetCacheEntryId(entry); + if (!id1) + continue; + + for (const TCacheEntry* referenced_entry : entry->references) + { + auto id2 = GetCacheEntryId(referenced_entry); + if (!id2) + continue; + + auto refpair1 = std::make_pair(*id1, *id2); + auto refpair2 = std::make_pair(*id2, *id1); + if (reference_pairs.count(refpair1) == 0 && reference_pairs.count(refpair2) == 0) + reference_pairs.insert(refpair1); + } + } + + size = static_cast(reference_pairs.size()); + p.Do(size); + for (const auto& it : reference_pairs) + { + p.Do(it.first); + p.Do(it.second); + } + + size = static_cast(textures_by_address_list.size()); + p.Do(size); + for (const auto& it : textures_by_address_list) + { + p.Do(it.first); + p.Do(it.second); + } + + size = static_cast(textures_by_hash_list.size()); + p.Do(size); + for (const auto& it : textures_by_hash_list) + { + p.Do(it.first); + p.Do(it.second); + } + + // Free the readback texture to potentially save host-mapped GPU memory, depending on where + // the driver mapped the staging buffer. + m_readback_texture.reset(); +} + +void TextureCacheBase::DoLoadState(PointerWrap& p) +{ + // Helper for getting a cache entry from an ID. + std::map id_map; + auto GetEntry = [&id_map](u32 id) { + auto iter = id_map.find(id); + return iter == id_map.end() ? nullptr : iter->second; + }; + + // Only clear out state when actually restoring/loading. + // Since we throw away entries when not in loading mode now, we don't need to check + // before inserting entries into the cache, as GetEntry will always return null. + const bool commit_state = p.GetMode() == PointerWrap::MODE_READ; + if (commit_state) + Invalidate(); + + // Preload all cache entries. + u32 size = 0; + p.Do(size); + for (u32 i = 0; i < size; i++) + { + // Even if the texture isn't valid, we still need to create the cache entry object + // to update the point in the state state. We'll just throw it away if it's invalid. + auto tex = g_texture_cache->DeserializeTexture(p); + TCacheEntry* entry = new TCacheEntry(std::move(tex->texture), std::move(tex->framebuffer)); + entry->textures_by_hash_iter = g_texture_cache->textures_by_hash.end(); + entry->DoState(p); + if (entry->texture && commit_state) + id_map.emplace(i, entry); + else + delete entry; + } + p.DoMarker("TextureCacheEntries"); + + // Link all cache entry references. + p.Do(size); + for (u32 i = 0; i < size; i++) + { + u32 id1 = 0, id2 = 0; + p.Do(id1); + p.Do(id2); + TCacheEntry* e1 = GetEntry(id1); + TCacheEntry* e2 = GetEntry(id2); + if (e1 && e2) + e1->CreateReference(e2); + } + + // Fill in address map. + p.Do(size); + for (u32 i = 0; i < size; i++) + { + u32 addr = 0; + u32 id = 0; + p.Do(addr); + p.Do(id); + + TCacheEntry* entry = GetEntry(id); + if (entry) + textures_by_address.emplace(addr, entry); + } + + // Fill in hash map. + p.Do(size); + for (u32 i = 0; i < size; i++) + { + u64 hash = 0; + u32 id = 0; + p.Do(hash); + p.Do(id); + + TCacheEntry* entry = GetEntry(id); + if (entry) + entry->textures_by_hash_iter = textures_by_hash.emplace(hash, entry); + } +} + +void TextureCacheBase::TCacheEntry::DoState(PointerWrap& p) +{ + p.Do(addr); + p.Do(size_in_bytes); + p.Do(base_hash); + p.Do(hash); + p.Do(format); + p.Do(memory_stride); + p.Do(is_efb_copy); + p.Do(is_custom_tex); + p.Do(may_have_overlapping_textures); + p.Do(tmem_only); + p.Do(has_arbitrary_mips); + p.Do(should_force_safe_hashing); + p.Do(is_xfb_copy); + p.Do(is_xfb_container); + p.Do(id); + p.Do(reference_changed); + p.Do(native_width); + p.Do(native_height); + p.Do(native_levels); + p.Do(frameCount); +} + TextureCacheBase::TCacheEntry* TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt) diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 12b39039dd..5e5a28b34b 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -24,6 +24,7 @@ class AbstractFramebuffer; class AbstractStagingTexture; +class PointerWrap; struct VideoConfig; struct TextureAndTLUTFormat @@ -185,6 +186,17 @@ public: u32 GetNumLevels() const { return texture->GetConfig().levels; } u32 GetNumLayers() const { return texture->GetConfig().layers; } AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; } + void DoState(PointerWrap& p); + }; + + // Minimal version of TCacheEntry just for TexPool + struct TexPoolEntry + { + std::unique_ptr texture; + std::unique_ptr framebuffer; + int frameCount = FRAMECOUNT_INVALID; + + TexPoolEntry(std::unique_ptr tex, std::unique_ptr fb); }; TextureCacheBase(); @@ -224,6 +236,13 @@ public: // Flushes all pending EFB copies to emulated RAM. void FlushEFBCopies(); + // Texture Serialization + void SerializeTexture(AbstractTexture* tex, const TextureConfig& config, PointerWrap& p); + std::optional DeserializeTexture(PointerWrap& p); + + // Save States + void DoState(PointerWrap& p); + // Returns false if the top/bottom row coefficients are zero. static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); @@ -256,15 +275,6 @@ protected: static std::bitset<8> valid_bind_points; private: - // Minimal version of TCacheEntry just for TexPool - struct TexPoolEntry - { - std::unique_ptr texture; - std::unique_ptr framebuffer; - int frameCount = FRAMECOUNT_INVALID; - - TexPoolEntry(std::unique_ptr tex, std::unique_ptr fb); - }; using TexAddrCache = std::multimap; using TexHashCache = std::multimap; using TexPool = std::unordered_multimap; @@ -319,6 +329,10 @@ private: // Returns an EFB copy staging texture to the pool, so it can be re-used. void ReleaseEFBCopyStagingTexture(std::unique_ptr tex); + bool CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format); + void DoSaveState(PointerWrap& p); + void DoLoadState(PointerWrap& p); + TexAddrCache textures_by_address; TexHashCache textures_by_hash; TexPool texture_pool; @@ -354,6 +368,11 @@ private: // List of pending EFB copies. It is important that the order is preserved for these, // so that overlapping textures are written to guest RAM in the order they are issued. std::vector m_pending_efb_copies; + + // Staging texture used for readbacks. + // We store this in the class so that the same staging texture can be used for multiple + // readbacks, saving the overhead of allocating a new buffer every time. + std::unique_ptr m_readback_texture; }; extern std::unique_ptr g_texture_cache; diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index c9fa12f6bb..f1150dc340 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -338,9 +338,6 @@ void VertexManagerBase::Flush() m_is_flushed = true; - // loading a state will invalidate BP, so check for it - g_video_backend->CheckInvalidState(); - #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%u, numchan=%u, dualtex=%u, ztex=%u, cole=%u, alpe=%u, ze=%u", g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans, @@ -464,6 +461,16 @@ void VertexManagerBase::Flush() void VertexManagerBase::DoState(PointerWrap& p) { + if (p.GetMode() == PointerWrap::MODE_READ) + { + // Flush old vertex data before loading state. + Flush(); + + // Clear all caches that touch RAM + // (? these don't appear to touch any emulation state that gets saved. moved to on load only.) + VertexLoaderManager::MarkAllDirty(); + } + p.Do(m_zslope); } diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index aa4cd8f6dd..8ee7eb1202 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -40,6 +40,7 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -236,41 +237,22 @@ void VideoBackendBase::PopulateBackendInfo() g_Config.Refresh(); } -// Run from the CPU thread void VideoBackendBase::DoState(PointerWrap& p) { - bool software = false; - p.Do(software); - - if (p.GetMode() == PointerWrap::MODE_READ && software == true) + if (!SConfig::GetInstance().bCPUThread) { - // change mode to abort load of incompatible save state. - p.SetMode(PointerWrap::MODE_VERIFY); + VideoCommon_DoState(p); + return; } - VideoCommon_DoState(p); - p.DoMarker("VideoCommon"); + AsyncRequests::Event ev = {}; + ev.do_save_state.p = &p; + ev.type = AsyncRequests::Event::DO_SAVE_STATE; + AsyncRequests::GetInstance()->PushEvent(ev, true); - // Refresh state. - if (p.GetMode() == PointerWrap::MODE_READ) - { - m_invalid = true; - - // Clear all caches that touch RAM - // (? these don't appear to touch any emulation state that gets saved. moved to on load only.) - VertexLoaderManager::MarkAllDirty(); - } -} - -void VideoBackendBase::CheckInvalidState() -{ - if (m_invalid) - { - m_invalid = false; - - BPReload(); - g_texture_cache->Invalidate(); - } + // Let the GPU thread sleep after loading the state, so we're not spinning if paused after loading + // a state. The next GP burst will wake it up again. + Fifo::GpuMaySleep(); } void VideoBackendBase::InitializeShared() @@ -282,8 +264,6 @@ void VideoBackendBase::InitializeShared() // do not initialize again for the config window m_initialized = true; - m_invalid = false; - CommandProcessor::Init(); Fifo::Init(); OpcodeDecoder::Init(); diff --git a/Source/Core/VideoCommon/VideoBackendBase.h b/Source/Core/VideoCommon/VideoBackendBase.h index d1dada2247..0a248dbd70 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.h +++ b/Source/Core/VideoCommon/VideoBackendBase.h @@ -63,18 +63,14 @@ public: // Called by the UI thread when the graphics config is opened. static void PopulateBackendInfo(); - // the implementation needs not do synchronization logic, because calls to it are surrounded by - // PauseAndLock now + // Wrapper function which pushes the event to the GPU thread. void DoState(PointerWrap& p); - void CheckInvalidState(); - protected: void InitializeShared(); void ShutdownShared(); bool m_initialized = false; - bool m_invalid = false; }; extern std::vector> g_available_video_backends; diff --git a/Source/Core/VideoCommon/VideoState.cpp b/Source/Core/VideoCommon/VideoState.cpp index ab84e1b001..9b6418f98f 100644 --- a/Source/Core/VideoCommon/VideoState.cpp +++ b/Source/Core/VideoCommon/VideoState.cpp @@ -10,9 +10,12 @@ #include "VideoCommon/CPMemory.h" #include "VideoCommon/CommandProcessor.h" #include "VideoCommon/Fifo.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" @@ -21,6 +24,15 @@ void VideoCommon_DoState(PointerWrap& p) { + bool software = false; + p.Do(software); + + if (p.GetMode() == PointerWrap::MODE_READ && software == true) + { + // change mode to abort load of incompatible save state. + p.SetMode(PointerWrap::MODE_VERIFY); + } + // BP Memory p.Do(bpmem); p.DoMarker("BP Memory"); @@ -63,5 +75,19 @@ void VideoCommon_DoState(PointerWrap& p) BoundingBox::DoState(p); p.DoMarker("BoundingBox"); - // TODO: search for more data that should be saved and add it here + g_framebuffer_manager->DoState(p); + p.DoMarker("FramebufferManager"); + + g_texture_cache->DoState(p); + p.DoMarker("TextureCache"); + + g_renderer->DoState(p); + p.DoMarker("Renderer"); + + // Refresh state. + if (p.GetMode() == PointerWrap::MODE_READ) + { + // Inform backend of new state from registers. + BPReload(); + } }