diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp index ae773511..070f08ca 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp @@ -18,12 +18,12 @@ namespace skyline::soc::gm20b::engine { MacroEngineBase::MacroEngineBase(MacroState ¯oState) : macroState(macroState) {} - void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall) { + bool MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, GpfifoArgument argument, bool lastCall, const std::function &flushCallback) { // Starting a new macro at index 'macroMethodOffset / 2' if (!(macroMethodOffset & 1)) { // Flush the current macro as we are switching to another one if (macroInvocation.Valid()) { - macroState.Execute(macroInvocation.index, macroInvocation.arguments, this); + macroState.Execute(macroInvocation.index, macroInvocation.arguments, this, flushCallback); macroInvocation.Reset(); } @@ -31,12 +31,15 @@ namespace skyline::soc::gm20b::engine { macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size(); } - macroInvocation.arguments.emplace_back(argument, argumentPtr); + macroInvocation.arguments.emplace_back(argument); // Flush macro after all of the data in the method call has been sent if (lastCall && macroInvocation.Valid()) { - macroState.Execute(macroInvocation.index, macroInvocation.arguments, this); + macroState.Execute(macroInvocation.index, macroInvocation.arguments, this, flushCallback); macroInvocation.Reset(); + return false; } + + return true; }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h index 3a03dbeb..775740e7 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h @@ -80,7 +80,7 @@ namespace skyline::soc::gm20b::engine { struct { u32 index{std::numeric_limits::max()}; - std::vector arguments; + std::vector arguments; bool Valid() { return index != std::numeric_limits::max(); @@ -121,7 +121,8 @@ namespace skyline::soc::gm20b::engine { /** * @brief Handles a call to a method in the MME space * @param macroMethodOffset The target offset from EngineMethodsEnd + * @return If flushes should be skipped for subsequent GPFIFO argument fetches */ - void HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall); + bool HandleMacroCall(u32 macroMethodOffset, GpfifoArgument argument, bool lastCall, const std::function &flushCallback); }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index 6fc22c0f..5ac10f43 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -9,6 +9,7 @@ #include #include #include "channel.h" +#include "macro/macro_state.h" namespace skyline::soc::gm20b { /** @@ -88,21 +89,27 @@ namespace skyline::soc::gm20b { gpEntries(numEntries), thread(std::thread(&ChannelGpfifo::Run, this)) {} - void ChannelGpfifo::SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subChannel, bool lastCall) { + void ChannelGpfifo::SendFull(u32 method, GpfifoArgument argument, SubchannelId subChannel, bool lastCall) { if (method < engine::GPFIFO::RegisterCount) { - gpfifoEngine.CallMethod(method, argumentPtr ? *argumentPtr : argument); + gpfifoEngine.CallMethod(method, *argument); } else if (method < engine::EngineMethodsEnd) { [[likely]] - SendPure(method, argumentPtr ? *argumentPtr : argument, subChannel); + SendPure(method, *argument, subChannel); } else { switch (subChannel) { case SubchannelId::ThreeD: - channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall); + skipDirtyFlushes = channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall, + [&executor = channelCtx.executor] { + executor.Submit({}, true); + }); break; case SubchannelId::TwoD: - channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall); + skipDirtyFlushes = channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall, + [&executor = channelCtx.executor] { + executor.Submit({}, true); + }); break; default: - Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argumentPtr ? *argumentPtr : argument); + Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, *argument); break; } } @@ -168,9 +175,6 @@ namespace skyline::soc::gm20b { } auto pushBufferMappedRanges{channelCtx.asCtx->gmmu.TranslateRange(gpEntry.Address(), gpEntry.size * sizeof(u32))}; - for (auto range : pushBufferMappedRanges) { - if (channelCtx.executor.usageTracker.dirtyIntervals.Intersect(range)) - channelCtx.executor.Submit({}, true); bool pushBufferCopied{}; //!< Set by the below lambda in order to track if the pushbuffer is a copy of guest memory or not auto pushBuffer{[&]() -> span { @@ -185,21 +189,36 @@ namespace skyline::soc::gm20b { } }()}; + bool pushbufferDirty{false}; + + for (auto range : pushBufferMappedRanges) { + if (channelCtx.executor.usageTracker.dirtyIntervals.Intersect(range)) { + if (skipDirtyFlushes) + pushbufferDirty = true; + else + channelCtx.executor.Submit({}, true); + } + } + // There will be at least one entry here auto entry{pushBuffer.begin()}; + auto getArgument{[&](){ + return GpfifoArgument{pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), pushbufferDirty}; + }}; + // Executes the current split method, returning once execution is finished or the current GpEntry has reached its end auto resumeSplitMethod{[&](){ switch (resumeState.state) { case MethodResumeState::State::Inc: while (entry != pushBuffer.end() && resumeState.remaining) { - SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0); + SendFull(resumeState.address++, getArgument(), resumeState.subChannel, --resumeState.remaining == 0); entry++; } break; case MethodResumeState::State::OneInc: - SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0); + SendFull(resumeState.address++, getArgument(), resumeState.subChannel, --resumeState.remaining == 0); entry++; // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries @@ -207,7 +226,7 @@ namespace skyline::soc::gm20b { [[fallthrough]]; case MethodResumeState::State::NonInc: while (entry != pushBuffer.end() && resumeState.remaining) { - SendFull(resumeState.address, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0); + SendFull(resumeState.address, getArgument(), resumeState.subChannel, --resumeState.remaining == 0); entry++; } @@ -296,7 +315,7 @@ namespace skyline::soc::gm20b { // Slow path for methods that touch GPFIFO or macros for (u32 i{}; i < methodHeader.methodCount; i++) { entry++; - SendFull(methodHeader.methodAddress + methodOffset(i), pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); + SendFull(methodHeader.methodAddress + methodOffset(i), getArgument(), methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); } } } else { @@ -320,7 +339,7 @@ namespace skyline::soc::gm20b { if (methodHeader.Pure()) SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel); else - SendFull(methodHeader.methodAddress, methodHeader.immdData, nullptr, methodHeader.methodSubChannel, true); + SendFull(methodHeader.methodAddress, GpfifoArgument{methodHeader.immdData}, methodHeader.methodSubChannel, true); return false; } else if (methodHeader.secOp == PushBufferMethodHeader::SecOp::NonIncMethod) [[unlikely]] { diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h index d8c10c8c..22c151bf 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "engines/gpfifo.h" namespace skyline::soc::gm20b { @@ -107,6 +108,7 @@ namespace skyline::soc::gm20b { engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls CircularQueue gpEntries; std::vector pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations + bool skipDirtyFlushes{}; //!< If GPU flushing should be skipped when fetching pushbuffer contents /** * @brief Holds the required state in order to resume a method started from one call to `Process` in another @@ -132,7 +134,7 @@ namespace skyline::soc::gm20b { /** * @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods */ - void SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subchannel, bool lastCall); + void SendFull(u32 method, GpfifoArgument argument, SubchannelId subchannel, bool lastCall); /** * @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled diff --git a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp index caea1775..259578b0 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp @@ -2,11 +2,16 @@ // Copyright © 2022 yuzu Emulator Project (https://yuzu-emu.org/) // Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +#include #include #include #include "macro_state.h" namespace skyline::soc::gm20b { + static bool AnyArgsDirty(span args) { + return ranges::any_of(args, [](const GpfifoArgument &arg) { return arg.dirty; }); + } + static bool TopologyRequiresConversion(engine::maxwell3d::type::DrawTopology topology) { switch (topology) { case engine::maxwell3d::type::DrawTopology::Quads: @@ -19,24 +24,35 @@ namespace skyline::soc::gm20b { } namespace macro_hle { - bool DrawInstanced(size_t offset, span args, engine::MacroEngineBase *targetEngine) { + bool DrawInstanced(size_t offset, span args, engine::MacroEngineBase *targetEngine, const std::function &flushCallback) { + if (AnyArgsDirty(args)) + flushCallback(); + u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & *args[2]}; targetEngine->DrawInstanced(true, *args[0], *args[1], instanceCount, *args[3], *args[4]); return true; } - bool DrawIndexedInstanced(size_t offset, span args, engine::MacroEngineBase *targetEngine) { + bool DrawIndexedInstanced(size_t offset, span args, engine::MacroEngineBase *targetEngine, const std::function &flushCallback) { + if (AnyArgsDirty(args)) + flushCallback(); + u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & *args[2]}; targetEngine->DrawIndexedInstanced(true, *args[0], *args[1], instanceCount, *args[3], *args[4], *args[5]); return true; } - bool DrawInstancedIndexedIndirectWithConstantBuffer(size_t offset, span args, engine::MacroEngineBase *targetEngine) { + bool DrawInstancedIndexedIndirectWithConstantBuffer(size_t offset, span args, engine::MacroEngineBase *targetEngine, const std::function &flushCallback) { u32 topology{*args[0]}; - if (TopologyRequiresConversion(static_cast(topology)) || !args[1].argumentPtr) { - // If the passed parameters aren't dirty or the indirect topology isn't supported fallback to a non indirect draw (may wait) + bool topologyConversion{TopologyRequiresConversion(static_cast(topology))}; + + // If the indirect topology isn't supported flush and fallback to a non indirect draw + if (topologyConversion && args[1].dirty) + flushCallback(); + + if (topologyConversion || !args[1].dirty) { u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & *args[2]}; targetEngine->DrawIndexedInstanced(false, topology, *args[1], instanceCount, *args[4], *args[3], *args[5]); } else { @@ -76,7 +92,7 @@ namespace skyline::soc::gm20b { invalidatePending = true; } - void MacroState::Execute(u32 position, span args, engine::MacroEngineBase *targetEngine) { + void MacroState::Execute(u32 position, span args, engine::MacroEngineBase *targetEngine, const std::function &flushCallback) { size_t offset{macroPositions[position]}; if (invalidatePending) { @@ -91,11 +107,14 @@ namespace skyline::soc::gm20b { hleEntry.valid = true; } - if (macroHleFunctions[position].function && macroHleFunctions[position].function(offset, args, targetEngine)) + if (macroHleFunctions[position].function && macroHleFunctions[position].function(offset, args, targetEngine, flushCallback)) return; + if (AnyArgsDirty(args)) + flushCallback(); + argumentStorage.resize(args.size()); - std::transform(args.begin(), args.end(), argumentStorage.begin(), [](MacroArgument arg) { return *arg; }); + std::transform(args.begin(), args.end(), argumentStorage.begin(), [](GpfifoArgument arg) { return *arg; }); macroInterpreter.Execute(offset, argumentStorage, targetEngine); } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h index acd05eac..33457db0 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h +++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h @@ -7,11 +7,17 @@ #include "macro_interpreter.h" namespace skyline::soc::gm20b { - struct MacroArgument { + /** + * @brief A GPFIFO argument that can be either a value or a pointer to a value + */ + struct GpfifoArgument { u32 argument; - u32 *argumentPtr; + u32 *argumentPtr{}; + bool dirty{}; - MacroArgument(u32 argument, u32 *argumentPtr) : argument{argument}, argumentPtr{argumentPtr} {} + GpfifoArgument(u32 argument, u32 *argumentPtr, bool dirty) : argument{argument}, argumentPtr{argumentPtr}, dirty{dirty} {} + + explicit GpfifoArgument(u32 argument) : argument{argument} {} u32 operator*() const { return argumentPtr ? *argumentPtr : argument; @@ -19,7 +25,7 @@ namespace skyline::soc::gm20b { }; namespace macro_hle { - using Function = bool (*)(size_t offset, span args, engine::MacroEngineBase *targetEngine); + using Function = bool (*)(size_t offset, span args, engine::MacroEngineBase *targetEngine, const std::function &flushCallback); } /** @@ -41,8 +47,14 @@ namespace skyline::soc::gm20b { MacroState() : macroInterpreter{macroCode} {} + /** + * @brief Invalidates the HLE function cache + */ void Invalidate(); - void Execute(u32 position, span args, engine::MacroEngineBase *targetEngine); + /** + * @brief Executes a macro at a given position, this can either be a HLE function or the interpreter + */ + void Execute(u32 position, span args, engine::MacroEngineBase *targetEngine, const std::function &flushCallback); }; }