Avoid dereferencing macro argument pointers in memory where possible

Indirect draws are implemented by having the macro arguments overflow into a seperate GP Entry that points directly to the indirect argument buffer. To HLE indirect draws a buffer needs to be created from this pointer, and it cannot be dereferenced on the CPU at any point to avoid hitting traps.
This commit is contained in:
Billy Laws 2023-02-04 22:38:50 +00:00
parent 2b93604da0
commit b313dcbdca
6 changed files with 51 additions and 24 deletions

View File

@ -18,7 +18,7 @@ namespace skyline::soc::gm20b::engine {
MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {} MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {}
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) { void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall) {
// Starting a new macro at index 'macroMethodOffset / 2' // Starting a new macro at index 'macroMethodOffset / 2'
if (!(macroMethodOffset & 1)) { if (!(macroMethodOffset & 1)) {
// Flush the current macro as we are switching to another one // Flush the current macro as we are switching to another one
@ -31,7 +31,7 @@ namespace skyline::soc::gm20b::engine {
macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size(); macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size();
} }
macroInvocation.arguments.emplace_back(argument); macroInvocation.arguments.emplace_back(argument, argumentPtr);
// Flush macro after all of the data in the method call has been sent // Flush macro after all of the data in the method call has been sent
if (lastCall && macroInvocation.Valid()) { if (lastCall && macroInvocation.Valid()) {

View File

@ -80,7 +80,7 @@ namespace skyline::soc::gm20b::engine {
struct { struct {
u32 index{std::numeric_limits<u32>::max()}; u32 index{std::numeric_limits<u32>::max()};
std::vector<u32> arguments; std::vector<MacroArgument> arguments;
bool Valid() { bool Valid() {
return index != std::numeric_limits<u32>::max(); return index != std::numeric_limits<u32>::max();
@ -114,10 +114,14 @@ namespace skyline::soc::gm20b::engine {
throw exception("DrawIndexedInstanced is not implemented for this engine"); throw exception("DrawIndexedInstanced is not implemented for this engine");
} }
virtual void DrawIndexedIndirect(u32 drawTopology, span<u8> indirectBuffer, u32 count, u32 stride) {
throw exception("DrawIndexedIndirect is not implemented for this engine");
}
/** /**
* @brief Handles a call to a method in the MME space * @brief Handles a call to a method in the MME space
* @param macroMethodOffset The target offset from EngineMethodsEnd * @param macroMethodOffset The target offset from EngineMethodsEnd
*/ */
void HandleMacroCall(u32 macroMethodOffset, u32 value, bool lastCall); void HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall);
}; };
} }

View File

@ -88,21 +88,21 @@ namespace skyline::soc::gm20b {
gpEntries(numEntries), gpEntries(numEntries),
thread(std::thread(&ChannelGpfifo::Run, this)) {} thread(std::thread(&ChannelGpfifo::Run, this)) {}
void ChannelGpfifo::SendFull(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) { void ChannelGpfifo::SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subChannel, bool lastCall) {
if (method < engine::GPFIFO::RegisterCount) { if (method < engine::GPFIFO::RegisterCount) {
gpfifoEngine.CallMethod(method, argument); gpfifoEngine.CallMethod(method, argumentPtr ? *argumentPtr : argument);
} else if (method < engine::EngineMethodsEnd) { [[likely]] } else if (method < engine::EngineMethodsEnd) { [[likely]]
SendPure(method, argument, subChannel); SendPure(method, argumentPtr ? *argumentPtr : argument, subChannel);
} else { } else {
switch (subChannel) { switch (subChannel) {
case SubchannelId::ThreeD: case SubchannelId::ThreeD:
channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall); channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall);
break; break;
case SubchannelId::TwoD: case SubchannelId::TwoD:
channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall); channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall);
break; break;
default: default:
Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argument); Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argumentPtr ? *argumentPtr : argument);
break; break;
} }
} }
@ -172,6 +172,7 @@ namespace skyline::soc::gm20b {
if (channelCtx.executor.usageTracker.dirtyIntervals.Intersect(range)) if (channelCtx.executor.usageTracker.dirtyIntervals.Intersect(range))
channelCtx.executor.Submit({}, true); channelCtx.executor.Submit({}, true);
bool pushBufferCopied{}; //!< Set by the below lambda in order to track if the pushbuffer is a copy of guest memory or not
auto pushBuffer{[&]() -> span<u32> { auto pushBuffer{[&]() -> span<u32> {
if (pushBufferMappedRanges.size() == 1) { if (pushBufferMappedRanges.size() == 1) {
return pushBufferMappedRanges.front().cast<u32>(); return pushBufferMappedRanges.front().cast<u32>();
@ -179,6 +180,7 @@ namespace skyline::soc::gm20b {
// Create an intermediate copy of pushbuffer data if it's split across multiple mappings // Create an intermediate copy of pushbuffer data if it's split across multiple mappings
pushBufferData.resize(gpEntry.size); pushBufferData.resize(gpEntry.size);
channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address()); channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
pushBufferCopied = true;
return span(pushBufferData); return span(pushBufferData);
} }
}()}; }()};
@ -190,19 +192,24 @@ namespace skyline::soc::gm20b {
auto resumeSplitMethod{[&](){ auto resumeSplitMethod{[&](){
switch (resumeState.state) { switch (resumeState.state) {
case MethodResumeState::State::Inc: case MethodResumeState::State::Inc:
while (entry != pushBuffer.end() && resumeState.remaining) while (entry != pushBuffer.end() && resumeState.remaining) {
SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
entry++;
}
break; break;
case MethodResumeState::State::OneInc: case MethodResumeState::State::OneInc:
SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
entry++;
// After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
resumeState.state = MethodResumeState::State::NonInc; resumeState.state = MethodResumeState::State::NonInc;
[[fallthrough]]; [[fallthrough]];
case MethodResumeState::State::NonInc: case MethodResumeState::State::NonInc:
while (entry != pushBuffer.end() && resumeState.remaining) while (entry != pushBuffer.end() && resumeState.remaining) {
SendFull(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0); SendFull(resumeState.address, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
entry++;
}
break; break;
} }
@ -275,7 +282,7 @@ namespace skyline::soc::gm20b {
// For pure oneinc methods we can send the initial method then send the rest as a span in one go // For pure oneinc methods we can send the initial method then send the rest as a span in one go
if (methodHeader.methodCount > (BatchCutoff + 1)) [[unlikely]] { if (methodHeader.methodCount > (BatchCutoff + 1)) [[unlikely]] {
SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel); SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel);
SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel); SendPureBatchNonInc(methodHeader.methodAddress + 1, span((++entry).base(), methodHeader.methodCount - 1), methodHeader.methodSubChannel);
entry += methodHeader.methodCount - 2; entry += methodHeader.methodCount - 2;
return false; return false;
@ -287,8 +294,10 @@ namespace skyline::soc::gm20b {
SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel); SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
} else { } else {
// Slow path for methods that touch GPFIFO or macros // Slow path for methods that touch GPFIFO or macros
for (u32 i{}; i < methodHeader.methodCount; i++) for (u32 i{}; i < methodHeader.methodCount; i++) {
SendFull(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); entry++;
SendFull(methodHeader.methodAddress + methodOffset(i), pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
}
} }
} else { } else {
startSplitMethod(State); startSplitMethod(State);
@ -311,7 +320,7 @@ namespace skyline::soc::gm20b {
if (methodHeader.Pure()) if (methodHeader.Pure())
SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel); SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
else else
SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true); SendFull(methodHeader.methodAddress, methodHeader.immdData, nullptr, methodHeader.methodSubChannel, true);
return false; return false;
} else if (methodHeader.secOp == PushBufferMethodHeader::SecOp::NonIncMethod) [[unlikely]] { } else if (methodHeader.secOp == PushBufferMethodHeader::SecOp::NonIncMethod) [[unlikely]] {

View File

@ -132,7 +132,7 @@ namespace skyline::soc::gm20b {
/** /**
* @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods * @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods
*/ */
void SendFull(u32 method, u32 argument, SubchannelId subchannel, bool lastCall); void SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subchannel, bool lastCall);
/** /**
* @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled * @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled

View File

@ -1,11 +1,12 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "macro_state.h"
#include "soc/gm20b/engines/engine.h" #include "soc/gm20b/engines/engine.h"
#include "macro_interpreter.h" #include "macro_interpreter.h"
namespace skyline::soc::gm20b::engine { namespace skyline::soc::gm20b::engine {
MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode(macroCode) {} MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode{macroCode} {}
void MacroInterpreter::Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine) { void MacroInterpreter::Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine) {
// Reset the interpreter state // Reset the interpreter state

View File

@ -7,8 +7,19 @@
#include "macro_interpreter.h" #include "macro_interpreter.h"
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
struct MacroArgument {
u32 argument;
u32 *argumentPtr;
MacroArgument(u32 argument, u32 *argumentPtr) : argument{argument}, argumentPtr{argumentPtr} {}
u32 operator*() const {
return argumentPtr ? *argumentPtr : argument;
}
};
namespace macro_hle { namespace macro_hle {
using Function = void (*)(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine); using Function = bool (*)(size_t offset, span<MacroArgument> args, engine::MacroEngineBase *targetEngine);
} }
/** /**
@ -24,12 +35,14 @@ namespace skyline::soc::gm20b {
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time
std::array<MacroHleEntry, 0x80> macroHleFunctions{}; //!< The HLE functions for each macro position, used to optionally override the interpreter std::array<MacroHleEntry, 0x80> macroHleFunctions{}; //!< The HLE functions for each macro position, used to optionally override the interpreter
std::vector<u32> argumentStorage; //!< Storage for the macro arguments during execution using the interpreter
bool invalidatePending{}; bool invalidatePending{};
MacroState() : macroInterpreter(macroCode) {} MacroState() : macroInterpreter{macroCode} {}
void Invalidate(); void Invalidate();
void Execute(u32 position, span<u32> args, engine::MacroEngineBase *targetEngine); void Execute(u32 position, span<MacroArgument> args, engine::MacroEngineBase *targetEngine);
}; };
} }