Drop 3D engine method calling fast path in GPFIFO

This ended up actually turning out to be a slow path when Maxwell 3D method handling code was inlined.
This commit is contained in:
Billy Laws 2022-08-31 15:20:56 +01:00
parent ded02e3eac
commit d810619203

View File

@ -6,8 +6,7 @@
#include <kernel/types/KProcess.h>
#include <soc.h>
#include <os.h>
#include "engines/maxwell_3d.h"
#include "engines/fermi_2d.h"
#include "channel.h"
namespace skyline::soc::gm20b {
/**
@ -95,7 +94,7 @@ namespace skyline::soc::gm20b {
} else {
switch (subChannel) {
case SubchannelId::ThreeD:
channelCtx.maxwell3D->HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
break;
case SubchannelId::TwoD:
channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
@ -108,9 +107,14 @@ namespace skyline::soc::gm20b {
}
void ChannelGpfifo::SendPure(u32 method, u32 argument, SubchannelId subChannel) {
if (subChannel == SubchannelId::ThreeD) [[likely]] {
channelCtx.maxwell3D.CallMethod(method, argument);
return;
}
switch (subChannel) {
case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethod(method, argument);
channelCtx.maxwell3D.CallMethod(method, argument);
break;
case SubchannelId::Compute:
channelCtx.keplerCompute.CallMethod(method, argument);
@ -132,7 +136,7 @@ namespace skyline::soc::gm20b {
void ChannelGpfifo::SendPureBatchNonInc(u32 method, span<u32> arguments, SubchannelId subChannel) {
switch (subChannel) {
case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethodBatchNonInc(method, arguments);
channelCtx.maxwell3D.CallMethodBatchNonInc(method, arguments);
break;
case SubchannelId::Compute:
channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments);
@ -237,62 +241,46 @@ namespace skyline::soc::gm20b {
/**
* @brief Handles execution of a specific method type as specified by the State template parameter
* @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
*/
auto dispatchCalls{[&]<bool ThreeDOnly, MethodResumeState::State State> () {
auto dispatchCalls{[&]<MethodResumeState::State State> () {
/**
* @brief Gets the offset to apply to the method address for a given dispatch loop index
*/
auto methodOffset{[] (u32 i) -> u32 {
switch (State) {
case MethodResumeState::State::Inc:
return i;
case MethodResumeState::State::OneInc:
return i ? 1 : 0;
case MethodResumeState::State::NonInc:
return 0;
}
if constexpr(State == MethodResumeState::State::Inc)
return i;
else if constexpr (State == MethodResumeState::State::OneInc)
return i ? 1 : 0;
else
return 0;
}};
constexpr u32 BatchCutoff{4}; //!< Cutoff needed to send method calls in a batch which is espcially important for UBO updates. This helps to avoid the extra overhead batching for small packets.
// TODO: Only batch for specific target methods like UBO updates, since normal dispatch is generally cheaper
if (remainingEntries >= methodHeader.methodCount) {
if (remainingEntries >= methodHeader.methodCount) { [[likely]]
if (methodHeader.Pure()) [[likely]] {
if constexpr (State == MethodResumeState::State::NonInc) {
// For pure noninc methods we can send all method calls as a span in one go
if (methodHeader.methodCount > BatchCutoff) {
if constexpr (ThreeDOnly)
channelCtx.maxwell3D->CallMethodBatchNonInc(methodHeader.methodAddress, span<u32>(&(*++entry), methodHeader.methodCount));
else
SendPureBatchNonInc(methodHeader.methodAddress, span(&(*++entry), methodHeader.methodCount), methodHeader.methodSubChannel);
if (methodHeader.methodCount > BatchCutoff) [[unlikely]] {
SendPureBatchNonInc(methodHeader.methodAddress, span(&(*++entry), methodHeader.methodCount), methodHeader.methodSubChannel);
entry += methodHeader.methodCount - 1;
return false;
}
} else if constexpr (State == MethodResumeState::State::OneInc) {
// For pure oneinc methods we can send the initial method then send the rest as a span in one go
if (methodHeader.methodCount > (BatchCutoff + 1)) {
if constexpr (ThreeDOnly) {
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress, *++entry);
channelCtx.maxwell3D->CallMethodBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry), methodHeader.methodCount - 1));
} else {
SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel);
SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel);
}
if (methodHeader.methodCount > (BatchCutoff + 1)) [[unlikely]] {
SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel);
SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel);
entry += methodHeader.methodCount - 2;
return false;
}
}
for (u32 i{}; i < methodHeader.methodCount; i++) {
if constexpr (ThreeDOnly) {
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress + methodOffset(i), *++entry);
} else {
SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
}
}
for (u32 i{}; i < methodHeader.methodCount; i++)
SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
} else {
// Slow path for methods that touch GPFIFO or macros
for (u32 i{}; i < methodHeader.methodCount; i++)
@ -308,26 +296,22 @@ namespace skyline::soc::gm20b {
/**
* @brief Handles execution of a single method
* @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
* @return If the this was the final method in the current GpEntry
*/
auto processMethod{[&] <bool ThreeDOnly> () -> bool {
auto processMethod{[&] () -> bool {
switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod:
return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::Inc>();
return dispatchCalls.operator()<MethodResumeState::State::Inc>();
case PushBufferMethodHeader::SecOp::NonIncMethod:
return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::NonInc>();
return dispatchCalls.operator()<MethodResumeState::State::NonInc>();
case PushBufferMethodHeader::SecOp::OneInc:
return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::OneInc>();
return dispatchCalls.operator()<MethodResumeState::State::OneInc>();
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
if (methodHeader.Pure()) {
if constexpr (ThreeDOnly)
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress, methodHeader.immdData);
else
SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
} else {
if (methodHeader.Pure())
SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
else
SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
}
return false;
case PushBufferMethodHeader::SecOp::EndPbSegment:
return true;
@ -337,12 +321,9 @@ namespace skyline::soc::gm20b {
}};
bool hitEnd{[&]() {
if (methodHeader.methodSubChannel == SubchannelId::ThreeD) { [[likely]]
return processMethod.operator()<true>();
} else {
channelCtx.maxwell3D->FlushEngineState(); // Flush the 3D engine state when doing any calls to other engines
return processMethod.operator()<false>();
}
if (methodHeader.methodSubChannel != SubchannelId::ThreeD) [[unlikely]]
channelCtx.maxwell3D.FlushEngineState(); // Flush the 3D engine state when doing any calls to other engines
return processMethod();
}()};
if (hitEnd)