diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 303d004a..b2de07c7 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -203,6 +203,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/soc/gm20b/channel.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/gmmu.cpp + ${source_DIR}/skyline/soc/gm20b/macro/macro_state.cpp ${source_DIR}/skyline/soc/gm20b/macro/macro_interpreter.cpp ${source_DIR}/skyline/soc/gm20b/engines/engine.cpp ${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp index 86ca2dc0..5b864f3d 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp @@ -20,7 +20,7 @@ namespace skyline::soc::gm20b::engine { if (!(macroMethodOffset & 1)) { // Flush the current macro as we are switching to another one if (macroInvocation.Valid()) { - macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this); + macroState.Execute(macroInvocation.index, macroInvocation.arguments, this); macroInvocation.Reset(); } @@ -32,7 +32,7 @@ namespace skyline::soc::gm20b::engine { // Flush macro after all of the data in the method call has been sent if (lastCall && macroInvocation.Valid()) { - macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this); + macroState.Execute(macroInvocation.index, macroInvocation.arguments, this); macroInvocation.Reset(); } }; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h index 2b753ebe..0e4e497b 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h @@ -61,15 +61,15 @@ namespace skyline::soc::gm20b::engine { MacroState ¯oState; struct { - size_t index{std::numeric_limits::max()}; + u32 index{std::numeric_limits::max()}; std::vector arguments; bool Valid() { - return index != std::numeric_limits::max(); + return index != std::numeric_limits::max(); } void Reset() { - index = std::numeric_limits::max(); + index = std::numeric_limits::max(); arguments.clear(); } } macroInvocation{}; //!< Data for a macro that is pending execution @@ -88,6 +88,14 @@ namespace skyline::soc::gm20b::engine { */ virtual u32 ReadMethodFromMacro(u32 method) = 0; + virtual void DrawInstanced(bool setRegs, u32 drawTopology, u32 vertexArrayCount, u32 instanceCount, u32 vertexArrayStart, u32 globalBaseInstanceIndex) { + throw exception("DrawInstanced is not implemented for this engine"); + } + + virtual void DrawIndexedInstanced(bool setRegs, u32 drawTopology, u32 indexBufferCount, u32 instanceCount, u32 globalBaseVertexIndex, u32 indexBufferFirst, u32 globalBaseInstanceIndex) { + throw exception("DrawIndexedInstanced is not implemented for this engine"); + } + /** * @brief Handles a call to a method in the MME space * @param macroMethodOffset The target offset from EngineMethodsEnd diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 17b1d590..6bffa633 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -323,4 +323,29 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 Maxwell3D::ReadMethodFromMacro(u32 method) { return registers.raw[method]; } + + void Maxwell3D::DrawInstanced(bool setRegs, u32 drawTopology, u32 vertexArrayCount, u32 instanceCount, u32 vertexArrayStart, u32 globalBaseInstanceIndex) { + auto topology{static_cast(drawTopology)}; + if (setRegs) { + registers.begin->op = topology; + registers.drawVertexArray->count = vertexArrayCount; + registers.vertexArrayStart = vertexArrayStart; + registers.globalBaseInstanceIndex = globalBaseInstanceIndex; + } + + interconnect.Draw(topology, false, vertexArrayCount, vertexArrayStart, instanceCount, 0, globalBaseInstanceIndex); + } + + void Maxwell3D::DrawIndexedInstanced(bool setRegs, u32 drawTopology, u32 indexBufferCount, u32 instanceCount, u32 globalBaseVertexIndex, u32 indexBufferFirst, u32 globalBaseInstanceIndex) { + auto topology{static_cast(drawTopology)}; + if (setRegs) { + registers.begin->op = topology; + registers.drawIndexBuffer->count = indexBufferCount; + registers.indexBuffer->first = indexBufferFirst; + registers.globalBaseVertexIndex = globalBaseVertexIndex; + registers.globalBaseInstanceIndex = globalBaseInstanceIndex; + } + + interconnect.Draw(topology, true, indexBufferCount, indexBufferFirst, instanceCount, globalBaseVertexIndex, globalBaseInstanceIndex); + } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index ab4a6dde..3053a99a 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -392,5 +392,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { void CallMethodFromMacro(u32 method, u32 argument) override; u32 ReadMethodFromMacro(u32 method) override; + + void DrawInstanced(bool setRegs, u32 drawTopology, u32 vertexArrayCount, u32 instanceCount, u32 vertexArrayStart, u32 globalBaseInstanceIndex) override; + + void DrawIndexedInstanced(bool setRegs, u32 drawTopology, u32 indexBufferCount, u32 instanceCount, u32 globalBaseVertexIndex, u32 indexBufferFirst, u32 globalBaseInstanceIndex) override; }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp new file mode 100644 index 00000000..f27339b1 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.cpp @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 yuzu Emulator Project (https://yuzu-emu.org/) +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "macro_state.h" + +namespace skyline::soc::gm20b { + namespace macro_hle { + void DrawInstanced(size_t offset, span args, engine::MacroEngineBase *targetEngine) { + u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & args[2]}; + + targetEngine->DrawInstanced(true, args[0], args[1], instanceCount, args[3], args[4]); + } + + void DrawIndexedInstanced(size_t offset, span args, engine::MacroEngineBase *targetEngine) { + u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & args[2]}; + + targetEngine->DrawIndexedInstanced(true, args[0], args[1], instanceCount, args[3], args[4], args[5]); + } + + void DrawInstancedIndexedWithConstantBuffer(size_t offset, span args, engine::MacroEngineBase *targetEngine) { + // Writes globalBaseVertexIndex and globalBaseInstanceIndex to the bound constant buffer before performing a standard instanced indexed draw + u32 instanceCount{targetEngine->ReadMethodFromMacro(0xD1B) & args[2]}; + targetEngine->CallMethodFromMacro(0x8e3, 0x640); + targetEngine->CallMethodFromMacro(0x8e4, args[4]); + targetEngine->CallMethodFromMacro(0x8e5, args[5]); + targetEngine->DrawIndexedInstanced(false, args[0], args[1], instanceCount, args[4], args[3], args[5]); + targetEngine->CallMethodFromMacro(0x8e3, 0x640); + targetEngine->CallMethodFromMacro(0x8e4, 0x0); + targetEngine->CallMethodFromMacro(0x8e5, 0x0); + } + + struct HleFunctionInfo { + Function function; + u64 size; + u32 hash; + }; + + constexpr std::array functions{{ + {DrawInstanced, 0x12, 0x6F0DD310}, + {DrawIndexedInstanced, 0x17, 0x2764C4F}, + {DrawInstancedIndexedWithConstantBuffer, 0x1F, 0xF2F16988}, + }}; + + static Function LookupFunction(span code) { + for (const auto &function : functions) { + if (function.size > code.size()) + continue; + + auto macro{code.subspan(0, function.size)}; + + if (XXH32(code.data(), code.size_bytes(), 0) == function.hash) + return function.function; + } + + return {}; + } + } + + void MacroState::Invalidate() { + invalidatePending = true; + } + + void MacroState::Execute(u32 position, span args, engine::MacroEngineBase *targetEngine) { + size_t offset{macroPositions[position]}; + + if (invalidatePending) + macroHleFunctions.fill({}); + + auto &hleEntry{macroHleFunctions[position]}; + + if (!hleEntry.valid) { + hleEntry.function = macro_hle::LookupFunction(span(macroCode).subspan(offset)); + hleEntry.valid = true; + } + + if (macroHleFunctions[position].function) + macroHleFunctions[position].function(offset, args, targetEngine); + else + macroInterpreter.Execute(offset, args, targetEngine); + } +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h index c18d8c5d..4497ba8a 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h +++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h @@ -7,14 +7,29 @@ #include "macro_interpreter.h" namespace skyline::soc::gm20b { + namespace macro_hle { + using Function = void (*)(size_t offset, span args, engine::MacroEngineBase *targetEngine); + } + /** * @brief Holds per-channel macro state */ struct MacroState { + struct MacroHleEntry { + macro_hle::Function function; + bool valid; + }; + engine::MacroInterpreter macroInterpreter; //!< The macro interpreter for handling 3D/2D macros std::array macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow std::array macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time + std::array macroHleFunctions{}; //!< The HLE functions for each macro position, used to optionally override the interpreter + bool invalidatePending{}; MacroState() : macroInterpreter(macroCode) {} + + void Invalidate(); + + void Execute(u32 position, span args, engine::MacroEngineBase *targetEngine); }; }