diff --git a/app/src/main/cpp/skyline/common/android_settings.h b/app/src/main/cpp/skyline/common/android_settings.h index ec7318ec..b422fa51 100644 --- a/app/src/main/cpp/skyline/common/android_settings.h +++ b/app/src/main/cpp/skyline/common/android_settings.h @@ -45,6 +45,7 @@ namespace skyline { useDirectMemoryImport = ktSettings.GetBool("useDirectMemoryImport"); forceMaxGpuClocks = ktSettings.GetBool("forceMaxGpuClocks"); disableShaderCache = ktSettings.GetBool("disableShaderCache"); + freeGuestTextureMemory = ktSettings.GetBool("freeGuestTextureMemory"); enableFastGpuReadbackHack = ktSettings.GetBool("enableFastGpuReadbackHack"); enableFastReadbackWrites = ktSettings.GetBool("enableFastReadbackWrites"); disableSubgroupShuffle = ktSettings.GetBool("disableSubgroupShuffle"); diff --git a/app/src/main/cpp/skyline/common/settings.h b/app/src/main/cpp/skyline/common/settings.h index 65273b26..e14c5834 100644 --- a/app/src/main/cpp/skyline/common/settings.h +++ b/app/src/main/cpp/skyline/common/settings.h @@ -77,6 +77,7 @@ namespace skyline { Setting executorFlushThreshold; //!< Number of commands that need to accumulate before they're flushed to the GPU Setting useDirectMemoryImport; //!< If buffer emulation should be done by importing guest buffer mappings Setting forceMaxGpuClocks; //!< If the GPU should be forced to run at maximum clocks + Setting freeGuestTextureMemory; //!< If guest textrue memory should be freed when the owning texture is GPU dirty // Hacks Setting enableFastGpuReadbackHack; //!< If the CPU texture readback skipping hack should be used diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index ba23cace..37b128c6 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -225,7 +225,7 @@ namespace skyline::gpu { return true; // If the texture is already CPU dirty or we can transition it to being CPU dirty then we don't need to do anything } - if (texture->accumulatedGuestWaitTime > SkipReadbackHackWaitTimeThreshold && *texture->gpu.state.settings->enableFastGpuReadbackHack) { + if (texture->accumulatedGuestWaitTime > SkipReadbackHackWaitTimeThreshold && *texture->gpu.state.settings->enableFastGpuReadbackHack && !texture->memoryFreed) { texture->dirtyState = DirtyState::Clean; return true; } @@ -494,6 +494,14 @@ namespace skyline::gpu { } } + void Texture::FreeGuest() { + // Avoid freeing memory if the backing format doesn't match, as otherwise texture data would be lost on the guest side, also avoid if fast readback is active + if (*gpu.state.settings->freeGuestTextureMemory && guest->format == format && !(accumulatedGuestWaitTime > SkipReadbackHackWaitTimeThreshold && *gpu.state.settings->enableFastGpuReadbackHack)) { + gpu.state.process->memory.FreeMemory(mirror); + memoryFreed = true; + } + } + Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 levelCount, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), backing(std::move(backing)), @@ -721,6 +729,10 @@ namespace skyline::gpu { if (!guest) return; + // FIXME (TEXMAN): This should really be tracked on the texture usage side + if (!*gpu.state.settings->freeGuestTextureMemory && !everUsedAsRt) + gpuDirty = false; + TRACE_EVENT("gpu", "Texture::SynchronizeHost"); { std::scoped_lock lock{stateMutex}; @@ -728,7 +740,7 @@ namespace skyline::gpu { // If a texture is Clean then we can just transition it to being GPU dirty and retrap it dirtyState = DirtyState::GpuDirty; gpu.state.nce->TrapRegions(*trapHandle, false); - gpu.state.process->memory.FreeMemory(mirror); + FreeGuest(); return; } else if (dirtyState != DirtyState::CpuDirty) { return; // If the texture has not been modified on the CPU, there is no need to synchronize it @@ -755,8 +767,8 @@ namespace skyline::gpu { { std::scoped_lock lock{stateMutex}; - if (dirtyState != DirtyState::CpuDirty && gpuDirty) - gpu.state.process->memory.FreeMemory(mirror); // All data can be paged out from the guest as the guest mirror won't be used + if (dirtyState == DirtyState::GpuDirty) + FreeGuest(); } } @@ -765,13 +777,16 @@ namespace skyline::gpu { return; TRACE_EVENT("gpu", "Texture::SynchronizeHostInline"); + // FIXME (TEXMAN): This should really be tracked on the texture usage side + if (!*gpu.state.settings->freeGuestTextureMemory && !everUsedAsRt) + gpuDirty = false; { std::scoped_lock lock{stateMutex}; if (gpuDirty && dirtyState == DirtyState::Clean) { dirtyState = DirtyState::GpuDirty; gpu.state.nce->TrapRegions(*trapHandle, false); - gpu.state.process->memory.FreeMemory(mirror); + FreeGuest(); return; } else if (dirtyState != DirtyState::CpuDirty) { return; @@ -792,8 +807,8 @@ namespace skyline::gpu { { std::scoped_lock lock{stateMutex}; - if (dirtyState != DirtyState::CpuDirty && gpuDirty) - gpu.state.process->memory.FreeMemory(mirror); // All data can be paged out from the guest as the guest mirror won't be used + if (dirtyState == DirtyState::GpuDirty) + FreeGuest(); } } @@ -815,6 +830,7 @@ namespace skyline::gpu { } dirtyState = cpuDirty ? DirtyState::CpuDirty : DirtyState::Clean; + memoryFreed = false; } if (layout == vk::ImageLayout::eUndefined || format != guest->format) @@ -1008,6 +1024,7 @@ namespace skyline::gpu { lastRenderPassIndex = renderPassIndex; if (renderPassUsage == texture::RenderPassUsage::RenderTarget) { + everUsedAsRt = true; pendingStageMask = vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eTessellationControlShader | vk::PipelineStageFlagBits::eTessellationEvaluationShader | @@ -1030,6 +1047,9 @@ namespace skyline::gpu { } void Texture::PopulateReadBarrier(vk::PipelineStageFlagBits dstStage, vk::PipelineStageFlags &srcStageMask, vk::PipelineStageFlags &dstStageMask) { + if (!guest) + return; + readStageMask |= dstStage; if (!(pendingStageMask & dstStage)) diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index f04b70d2..98933e4e 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -389,6 +389,7 @@ namespace skyline::gpu { CpuDirty, //!< The CPU mappings have been modified but the GPU texture is not up to date GpuDirty, //!< The GPU texture has been modified but the CPU mappings have not been updated } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU texture + bool memoryFreed{}; //!< If the guest backing memory has been freed std::recursive_mutex stateMutex; //!< Synchronizes access to the dirty state /** @@ -410,6 +411,7 @@ namespace skyline::gpu { u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass + bool everUsedAsRt{}; //!< If this texture has ever been used as a rendertarget vk::PipelineStageFlags pendingStageMask{}; //!< List of pipeline stages that are yet to be flushed for reads since the last time this texture was used an an RT vk::PipelineStageFlags readStageMask{}; //!< Set of pipeline stages that this texture has been read in since it was last used as an RT @@ -444,6 +446,12 @@ namespace skyline::gpu { */ void CopyToGuest(u8 *hostBuffer); + /** + * @brief Frees the guest side copy of the texture + * @note `stateMutex` must be locked when calling this function + */ + void FreeGuest(); + /** * @return A vector of all the buffer image copies that need to be done for every aspect of every level of every layer of the texture */ diff --git a/app/src/main/java/emu/skyline/settings/EmulationSettings.kt b/app/src/main/java/emu/skyline/settings/EmulationSettings.kt index 9c736607..3ba61123 100644 --- a/app/src/main/java/emu/skyline/settings/EmulationSettings.kt +++ b/app/src/main/java/emu/skyline/settings/EmulationSettings.kt @@ -48,6 +48,7 @@ class EmulationSettings private constructor(context : Context, prefName : String var executorFlushThreshold by sharedPreferences(context, 256, prefName = prefName) var useDirectMemoryImport by sharedPreferences(context, false, prefName = prefName) var forceMaxGpuClocks by sharedPreferences(context, false, prefName = prefName) + var freeGuestTextureMemory by sharedPreferences(context, true, prefName = prefName) var disableShaderCache by sharedPreferences(context, false, prefName = prefName) // Hacks diff --git a/app/src/main/java/emu/skyline/settings/NativeSettings.kt b/app/src/main/java/emu/skyline/settings/NativeSettings.kt index d28801e4..ad75b564 100644 --- a/app/src/main/java/emu/skyline/settings/NativeSettings.kt +++ b/app/src/main/java/emu/skyline/settings/NativeSettings.kt @@ -35,6 +35,7 @@ data class NativeSettings( var executorFlushThreshold : Int, var useDirectMemoryImport : Boolean, var forceMaxGpuClocks : Boolean, + var freeGuestTextureMemory : Boolean, var disableShaderCache : Boolean, // Hacks @@ -60,6 +61,7 @@ data class NativeSettings( pref.executorFlushThreshold, pref.useDirectMemoryImport, pref.forceMaxGpuClocks, + pref.freeGuestTextureMemory, pref.disableShaderCache, pref.enableFastGpuReadbackHack, pref.enableFastReadbackWrites, diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index 61e12817..c9cad705 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -110,6 +110,8 @@ Force Maximum GPU Clocks Forces the GPU to run at its maximum possible clock speed (May cause excessive heating and power usage) Your device does not support forcing maximum GPU clocks + Free Guest Texture Memory + Allows guest texture data to be freed from memory when unneeded (Can rarely cause crashes) Disable Shader Cache Cached shaders won\'t be loaded, will cause stutters Cached shaders will be loaded, can heavily reduce stuttering diff --git a/app/src/main/res/xml/emulation_preferences.xml b/app/src/main/res/xml/emulation_preferences.xml index ee014b85..6aa94224 100644 --- a/app/src/main/res/xml/emulation_preferences.xml +++ b/app/src/main/res/xml/emulation_preferences.xml @@ -123,6 +123,11 @@ android:summary="@string/force_max_gpu_clocks_desc" app:key="force_max_gpu_clocks" app:title="@string/force_max_gpu_clocks" /> +