Avoid dereferencing macro argument pointers in memory where possible

Indirect draws are implemented by having the macro arguments overflow into a seperate GP Entry that points directly to the indirect argument buffer. To HLE indirect draws a buffer needs to be created from this pointer, and it cannot be dereferenced on the CPU at any point to avoid hitting traps.
This commit is contained in:
Billy Laws 2023-02-04 22:38:50 +00:00
parent 2b93604da0
commit b313dcbdca
6 changed files with 51 additions and 24 deletions

View File

@ -18,7 +18,7 @@ namespace skyline::soc::gm20b::engine {
MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {}
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall) {
// Starting a new macro at index 'macroMethodOffset / 2'
if (!(macroMethodOffset & 1)) {
// Flush the current macro as we are switching to another one
@ -31,7 +31,7 @@ namespace skyline::soc::gm20b::engine {
macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size();
}
macroInvocation.arguments.emplace_back(argument);
macroInvocation.arguments.emplace_back(argument, argumentPtr);
// Flush macro after all of the data in the method call has been sent
if (lastCall && macroInvocation.Valid()) {

View File

@ -80,7 +80,7 @@ namespace skyline::soc::gm20b::engine {
struct {
u32 index{std::numeric_limits<u32>::max()};
std::vector<u32> arguments;
std::vector<MacroArgument> arguments;
bool Valid() {
return index != std::numeric_limits<u32>::max();
@ -114,10 +114,14 @@ namespace skyline::soc::gm20b::engine {
throw exception("DrawIndexedInstanced is not implemented for this engine");
}
virtual void DrawIndexedIndirect(u32 drawTopology, span<u8> indirectBuffer, u32 count, u32 stride) {
throw exception("DrawIndexedIndirect is not implemented for this engine");
}
/**
* @brief Handles a call to a method in the MME space
* @param macroMethodOffset The target offset from EngineMethodsEnd
*/
void HandleMacroCall(u32 macroMethodOffset, u32 value, bool lastCall);
void HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall);
};
}

View File

@ -88,21 +88,21 @@ namespace skyline::soc::gm20b {
gpEntries(numEntries),
thread(std::thread(&ChannelGpfifo::Run, this)) {}
void ChannelGpfifo::SendFull(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
void ChannelGpfifo::SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subChannel, bool lastCall) {
if (method < engine::GPFIFO::RegisterCount) {
gpfifoEngine.CallMethod(method, argument);
gpfifoEngine.CallMethod(method, argumentPtr ? *argumentPtr : argument);
} else if (method < engine::EngineMethodsEnd) { [[likely]]
SendPure(method, argument, subChannel);
SendPure(method, argumentPtr ? *argumentPtr : argument, subChannel);
} else {
switch (subChannel) {
case SubchannelId::ThreeD:
channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall);
break;
case SubchannelId::TwoD:
channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall);
break;
default:
Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argumentPtr ? *argumentPtr : argument);
break;
}
}
@ -172,6 +172,7 @@ namespace skyline::soc::gm20b {
if (channelCtx.executor.usageTracker.dirtyIntervals.Intersect(range))
channelCtx.executor.Submit({}, true);
bool pushBufferCopied{}; //!< Set by the below lambda in order to track if the pushbuffer is a copy of guest memory or not
auto pushBuffer{[&]() -> span<u32> {
if (pushBufferMappedRanges.size() == 1) {
return pushBufferMappedRanges.front().cast<u32>();
@ -179,6 +180,7 @@ namespace skyline::soc::gm20b {
// Create an intermediate copy of pushbuffer data if it's split across multiple mappings
pushBufferData.resize(gpEntry.size);
channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
pushBufferCopied = true;
return span(pushBufferData);
}
}()};
@ -190,19 +192,24 @@ namespace skyline::soc::gm20b {
auto resumeSplitMethod{[&](){
switch (resumeState.state) {
case MethodResumeState::State::Inc:
while (entry != pushBuffer.end() && resumeState.remaining)
SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
while (entry != pushBuffer.end() && resumeState.remaining) {
SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
entry++;
}
break;
case MethodResumeState::State::OneInc:
SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
entry++;
// After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
resumeState.state = MethodResumeState::State::NonInc;
[[fallthrough]];
case MethodResumeState::State::NonInc:
while (entry != pushBuffer.end() && resumeState.remaining)
SendFull(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
while (entry != pushBuffer.end() && resumeState.remaining) {
SendFull(resumeState.address, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
entry++;
}
break;
}
@ -275,7 +282,7 @@ namespace skyline::soc::gm20b {
// For pure oneinc methods we can send the initial method then send the rest as a span in one go
if (methodHeader.methodCount > (BatchCutoff + 1)) [[unlikely]] {
SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel);
SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel);
SendPureBatchNonInc(methodHeader.methodAddress + 1, span((++entry).base(), methodHeader.methodCount - 1), methodHeader.methodSubChannel);
entry += methodHeader.methodCount - 2;
return false;
@ -287,8 +294,10 @@ namespace skyline::soc::gm20b {
SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
} else {
// Slow path for methods that touch GPFIFO or macros
for (u32 i{}; i < methodHeader.methodCount; i++)
SendFull(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
for (u32 i{}; i < methodHeader.methodCount; i++) {
entry++;
SendFull(methodHeader.methodAddress + methodOffset(i), pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
}
}
} else {
startSplitMethod(State);
@ -311,7 +320,7 @@ namespace skyline::soc::gm20b {
if (methodHeader.Pure())
SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
else
SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
SendFull(methodHeader.methodAddress, methodHeader.immdData, nullptr, methodHeader.methodSubChannel, true);
return false;
} else if (methodHeader.secOp == PushBufferMethodHeader::SecOp::NonIncMethod) [[unlikely]] {

View File

@ -132,7 +132,7 @@ namespace skyline::soc::gm20b {
/**
* @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods
*/
void SendFull(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);
void SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subchannel, bool lastCall);
/**
* @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled

View File

@ -1,11 +1,12 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "macro_state.h"
#include "soc/gm20b/engines/engine.h"
#include "macro_interpreter.h"
namespace skyline::soc::gm20b::engine {
MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode(macroCode) {}
MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode{macroCode} {}
void MacroInterpreter::Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine) {
// Reset the interpreter state

View File

@ -7,8 +7,19 @@
#include "macro_interpreter.h"
namespace skyline::soc::gm20b {
struct MacroArgument {
u32 argument;
u32 *argumentPtr;
MacroArgument(u32 argument, u32 *argumentPtr) : argument{argument}, argumentPtr{argumentPtr} {}
u32 operator*() const {
return argumentPtr ? *argumentPtr : argument;
}
};
namespace macro_hle {
using Function = void (*)(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine);
using Function = bool (*)(size_t offset, span<MacroArgument> args, engine::MacroEngineBase *targetEngine);
}
/**
@ -24,12 +35,14 @@ namespace skyline::soc::gm20b {
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time
std::array<MacroHleEntry, 0x80> macroHleFunctions{}; //!< The HLE functions for each macro position, used to optionally override the interpreter
std::vector<u32> argumentStorage; //!< Storage for the macro arguments during execution using the interpreter
bool invalidatePending{};
MacroState() : macroInterpreter(macroCode) {}
MacroState() : macroInterpreter{macroCode} {}
void Invalidate();
void Execute(u32 position, span<u32> args, engine::MacroEngineBase *targetEngine);
void Execute(u32 position, span<MacroArgument> args, engine::MacroEngineBase *targetEngine);
};
}