Drop 3D engine method calling fast path in GPFIFO

This ended up actually turning out to be a slow path when Maxwell 3D method handling code was inlined.
This commit is contained in:
Billy Laws 2022-08-31 15:20:56 +01:00
parent ded02e3eac
commit d810619203

View File

@ -6,8 +6,7 @@
#include <kernel/types/KProcess.h> #include <kernel/types/KProcess.h>
#include <soc.h> #include <soc.h>
#include <os.h> #include <os.h>
#include "engines/maxwell_3d.h" #include "channel.h"
#include "engines/fermi_2d.h"
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
/** /**
@ -95,7 +94,7 @@ namespace skyline::soc::gm20b {
} else { } else {
switch (subChannel) { switch (subChannel) {
case SubchannelId::ThreeD: case SubchannelId::ThreeD:
channelCtx.maxwell3D->HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall); channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
break; break;
case SubchannelId::TwoD: case SubchannelId::TwoD:
channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall); channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
@ -108,9 +107,14 @@ namespace skyline::soc::gm20b {
} }
void ChannelGpfifo::SendPure(u32 method, u32 argument, SubchannelId subChannel) { void ChannelGpfifo::SendPure(u32 method, u32 argument, SubchannelId subChannel) {
if (subChannel == SubchannelId::ThreeD) [[likely]] {
channelCtx.maxwell3D.CallMethod(method, argument);
return;
}
switch (subChannel) { switch (subChannel) {
case SubchannelId::ThreeD: case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethod(method, argument); channelCtx.maxwell3D.CallMethod(method, argument);
break; break;
case SubchannelId::Compute: case SubchannelId::Compute:
channelCtx.keplerCompute.CallMethod(method, argument); channelCtx.keplerCompute.CallMethod(method, argument);
@ -132,7 +136,7 @@ namespace skyline::soc::gm20b {
void ChannelGpfifo::SendPureBatchNonInc(u32 method, span<u32> arguments, SubchannelId subChannel) { void ChannelGpfifo::SendPureBatchNonInc(u32 method, span<u32> arguments, SubchannelId subChannel) {
switch (subChannel) { switch (subChannel) {
case SubchannelId::ThreeD: case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethodBatchNonInc(method, arguments); channelCtx.maxwell3D.CallMethodBatchNonInc(method, arguments);
break; break;
case SubchannelId::Compute: case SubchannelId::Compute:
channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments); channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments);
@ -237,34 +241,28 @@ namespace skyline::soc::gm20b {
/** /**
* @brief Handles execution of a specific method type as specified by the State template parameter * @brief Handles execution of a specific method type as specified by the State template parameter
* @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
*/ */
auto dispatchCalls{[&]<bool ThreeDOnly, MethodResumeState::State State> () { auto dispatchCalls{[&]<MethodResumeState::State State> () {
/** /**
* @brief Gets the offset to apply to the method address for a given dispatch loop index * @brief Gets the offset to apply to the method address for a given dispatch loop index
*/ */
auto methodOffset{[] (u32 i) -> u32 { auto methodOffset{[] (u32 i) -> u32 {
switch (State) { if constexpr(State == MethodResumeState::State::Inc)
case MethodResumeState::State::Inc:
return i; return i;
case MethodResumeState::State::OneInc: else if constexpr (State == MethodResumeState::State::OneInc)
return i ? 1 : 0; return i ? 1 : 0;
case MethodResumeState::State::NonInc: else
return 0; return 0;
}
}}; }};
constexpr u32 BatchCutoff{4}; //!< Cutoff needed to send method calls in a batch which is espcially important for UBO updates. This helps to avoid the extra overhead batching for small packets. constexpr u32 BatchCutoff{4}; //!< Cutoff needed to send method calls in a batch which is espcially important for UBO updates. This helps to avoid the extra overhead batching for small packets.
// TODO: Only batch for specific target methods like UBO updates, since normal dispatch is generally cheaper // TODO: Only batch for specific target methods like UBO updates, since normal dispatch is generally cheaper
if (remainingEntries >= methodHeader.methodCount) { if (remainingEntries >= methodHeader.methodCount) { [[likely]]
if (methodHeader.Pure()) [[likely]] { if (methodHeader.Pure()) [[likely]] {
if constexpr (State == MethodResumeState::State::NonInc) { if constexpr (State == MethodResumeState::State::NonInc) {
// For pure noninc methods we can send all method calls as a span in one go // For pure noninc methods we can send all method calls as a span in one go
if (methodHeader.methodCount > BatchCutoff) { if (methodHeader.methodCount > BatchCutoff) [[unlikely]] {
if constexpr (ThreeDOnly)
channelCtx.maxwell3D->CallMethodBatchNonInc(methodHeader.methodAddress, span<u32>(&(*++entry), methodHeader.methodCount));
else
SendPureBatchNonInc(methodHeader.methodAddress, span(&(*++entry), methodHeader.methodCount), methodHeader.methodSubChannel); SendPureBatchNonInc(methodHeader.methodAddress, span(&(*++entry), methodHeader.methodCount), methodHeader.methodSubChannel);
entry += methodHeader.methodCount - 1; entry += methodHeader.methodCount - 1;
@ -272,27 +270,17 @@ namespace skyline::soc::gm20b {
} }
} else if constexpr (State == MethodResumeState::State::OneInc) { } else if constexpr (State == MethodResumeState::State::OneInc) {
// For pure oneinc methods we can send the initial method then send the rest as a span in one go // For pure oneinc methods we can send the initial method then send the rest as a span in one go
if (methodHeader.methodCount > (BatchCutoff + 1)) { if (methodHeader.methodCount > (BatchCutoff + 1)) [[unlikely]] {
if constexpr (ThreeDOnly) {
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress, *++entry);
channelCtx.maxwell3D->CallMethodBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry), methodHeader.methodCount - 1));
} else {
SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel); SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel);
SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel); SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel);
}
entry += methodHeader.methodCount - 2; entry += methodHeader.methodCount - 2;
return false; return false;
} }
} }
for (u32 i{}; i < methodHeader.methodCount; i++) { for (u32 i{}; i < methodHeader.methodCount; i++)
if constexpr (ThreeDOnly) {
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress + methodOffset(i), *++entry);
} else {
SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel); SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
}
}
} else { } else {
// Slow path for methods that touch GPFIFO or macros // Slow path for methods that touch GPFIFO or macros
for (u32 i{}; i < methodHeader.methodCount; i++) for (u32 i{}; i < methodHeader.methodCount; i++)
@ -308,26 +296,22 @@ namespace skyline::soc::gm20b {
/** /**
* @brief Handles execution of a single method * @brief Handles execution of a single method
* @tparam ThreeDOnly Whether to skip subchannel method handling and send all method calls to the 3D engine
* @return If the this was the final method in the current GpEntry * @return If the this was the final method in the current GpEntry
*/ */
auto processMethod{[&] <bool ThreeDOnly> () -> bool { auto processMethod{[&] () -> bool {
switch (methodHeader.secOp) { switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod: case PushBufferMethodHeader::SecOp::IncMethod:
return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::Inc>(); return dispatchCalls.operator()<MethodResumeState::State::Inc>();
case PushBufferMethodHeader::SecOp::NonIncMethod: case PushBufferMethodHeader::SecOp::NonIncMethod:
return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::NonInc>(); return dispatchCalls.operator()<MethodResumeState::State::NonInc>();
case PushBufferMethodHeader::SecOp::OneInc: case PushBufferMethodHeader::SecOp::OneInc:
return dispatchCalls.operator()<ThreeDOnly, MethodResumeState::State::OneInc>(); return dispatchCalls.operator()<MethodResumeState::State::OneInc>();
case PushBufferMethodHeader::SecOp::ImmdDataMethod: case PushBufferMethodHeader::SecOp::ImmdDataMethod:
if (methodHeader.Pure()) { if (methodHeader.Pure())
if constexpr (ThreeDOnly)
channelCtx.maxwell3D->CallMethod(methodHeader.methodAddress, methodHeader.immdData);
else
SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel); SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
} else { else
SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true); SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
}
return false; return false;
case PushBufferMethodHeader::SecOp::EndPbSegment: case PushBufferMethodHeader::SecOp::EndPbSegment:
return true; return true;
@ -337,12 +321,9 @@ namespace skyline::soc::gm20b {
}}; }};
bool hitEnd{[&]() { bool hitEnd{[&]() {
if (methodHeader.methodSubChannel == SubchannelId::ThreeD) { [[likely]] if (methodHeader.methodSubChannel != SubchannelId::ThreeD) [[unlikely]]
return processMethod.operator()<true>(); channelCtx.maxwell3D.FlushEngineState(); // Flush the 3D engine state when doing any calls to other engines
} else { return processMethod();
channelCtx.maxwell3D->FlushEngineState(); // Flush the 3D engine state when doing any calls to other engines
return processMethod.operator()<false>();
}
}()}; }()};
if (hitEnd) if (hitEnd)