diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index cc1dc638..6c6a7387 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -184,6 +184,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp + ${source_DIR}/skyline/soc/gm20b/engines/kepler_compute.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp index 47fefa26..29ab19c5 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp @@ -9,6 +9,7 @@ namespace skyline::soc::gm20b { : asCtx(std::move(pAsCtx)), executor(state), maxwell3D(std::make_unique(state, *this, macroState, executor)), + keplerCompute(state, *this), inline2Memory(asCtx), gpfifo(state, *this, numEntries) {} } diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.h b/app/src/main/cpp/skyline/soc/gm20b/channel.h index 28c1ff12..f0819d06 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/channel.h +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h @@ -6,6 +6,7 @@ #include #include "macro/macro_state.h" #include "engines/engine.h" +#include "engines/kepler_compute.h" #include "engines/inline2memory.h" #include "gpfifo.h" @@ -25,6 +26,7 @@ namespace skyline::soc::gm20b { gpu::interconnect::CommandExecutor executor; MacroState macroState; std::unique_ptr maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file + engine::KeplerCompute keplerCompute; engine::Inline2Memory inline2Memory; ChannelGpfifo gpfifo; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute.cpp new file mode 100644 index 00000000..1626cf79 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute.cpp @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include "kepler_compute/qmd.h" +#include "kepler_compute.h" + +namespace skyline::soc::gm20b::engine { + KeplerCompute::KeplerCompute(const DeviceState &state, ChannelContext &channelCtx) + : syncpoints(state.soc->host1x.syncpoints), i2m(channelCtx.asCtx) {} + + __attribute__((always_inline)) void KeplerCompute::CallMethod(u32 method, u32 argument) { + Logger::Verbose("Called method in Kepler compute: 0x{:X} args: 0x{:X}", method, argument); + + HandleMethod(method, argument); + } + +#define KEPLER_COMPUTE_OFFSET(field) (sizeof(typeof(Registers::field)) - sizeof(std::remove_reference_t)) / sizeof(u32) +#define KEPLER_COMPUTE_STRUCT_OFFSET(field, member) KEPLER_COMPUTE_OFFSET(field) + U32_OFFSET(std::remove_reference_t, member) + + void KeplerCompute::HandleMethod(u32 method, u32 argument) { + registers.raw[method] = argument; + + switch (method) { + case KEPLER_COMPUTE_STRUCT_OFFSET(i2m, launchDma): + i2m.LaunchDma(*registers.i2m); + return; + case KEPLER_COMPUTE_STRUCT_OFFSET(i2m, loadInlineData): + i2m.LoadInlineData(*registers.i2m, argument); + return; + case KEPLER_COMPUTE_OFFSET(sendSignalingPcasB): + Logger::Warn("Attempted to execute compute kernel!"); + return; + case KEPLER_COMPUTE_STRUCT_OFFSET(reportSemaphore, action): + throw exception("Compute semaphores are unimplemented!"); + return; + default: + return; + } + + } + + void KeplerCompute::CallMethodBatchNonInc(u32 method, span arguments) { + switch (method) { + case KEPLER_COMPUTE_STRUCT_OFFSET(i2m, loadInlineData): + i2m.LoadInlineData(*registers.i2m, arguments); + return; + default: + break; + } + + for (u32 argument : arguments) + HandleMethod(method, argument); + } + +#undef KEPLER_COMPUTE_STRUCT_OFFSET +#undef KEPLER_COMPUTE_OFFSET +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute.h b/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute.h new file mode 100644 index 00000000..42cb7582 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute.h @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) + +#pragma once + +#include "engine.h" +#include "inline2memory.h" + +namespace skyline::soc::gm20b { + struct ChannelContext; +} + +namespace skyline::soc::gm20b::engine { + /** + * @brief The Kepler Compute Engine is used to execute compute jobs on the GPU + */ + class KeplerCompute { + private: + host1x::SyncpointSet &syncpoints; + Inline2MemoryBackend i2m; + + void HandleMethod(u32 method, u32 argument); + + public: + /** + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_compute.def + */ + #pragma pack(push, 1) + union Registers { + std::array raw; + + template + using Register = util::OffsetMember; + + Register<0x44, u32> waitForIdle; + Register<0x60, Inline2MemoryBackend::RegisterState> i2m; + Register<0x85, u32> setShaderSharedMemoryWindow; + + struct InvalidateShaderCaches { + bool instruction : 1; + bool locks : 1; + bool flushData : 1; + u8 _pad0_ : 1; + bool data : 1; + u8 _pad1_ : 7; + bool constant : 1; + u32 _pad2_ : 19; + }; + static_assert(sizeof(InvalidateShaderCaches) == 0x4); + + Register<0x87, InvalidateShaderCaches> invalidateShaderCaches; + + struct SendPcas { + u32 qmdAddressShifted8; + u32 from : 24; + u8 delta; + }; + static_assert(sizeof(SendPcas) == 0x8); + + Register<0xAD, SendPcas> sendPcas; + + struct SendSignalingPcasB { + bool invalidate : 1; + bool schedule : 1; + u32 _pad_ : 30; + }; + static_assert(sizeof(SendSignalingPcasB) == 0x4); + + Register<0xAF, SendSignalingPcasB> sendSignalingPcasB; + + struct ShaderLocalMemory { + u8 sizeUpper; + u32 _pad0_ : 24; + u32 sizeLower; + u16 maxSmCount : 9; + u32 _pad1_ : 23; + }; + static_assert(sizeof(ShaderLocalMemory) == 0xC); + + Register<0xB9, ShaderLocalMemory> shaderLocalMemoryNonThrottled; + Register<0xBC, ShaderLocalMemory> shaderLocalMemoryThrottled; + + struct SpaVersion { + u8 minor; + u8 major; + u16 _pad_; + }; + static_assert(sizeof(SpaVersion) == 0x4); + + Register<0xC4, SpaVersion> spaVersion; + + Register<0x1DF, u32> shaderLocalMemoryWindow; + Register<0x1E4, Address> shaderLocalMemory; + + Register<0x54A, u32> shaderExceptions; + + Register<0x557, Address> texSamplerPool; + Register<0x559, u32> texSamplerPoolMaximumIndex; + Register<0x55D, Address> texHeaderPool; + Register<0x55F, u32> texHeaderPoolMaximumIndex; + + Register<0x582, Address> programRegion; + + struct ReportSemaphore { + enum class Op : u8 { + Release = 0, + Trap = 3 + }; + + enum class ReductionOp : u8 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7 + }; + + enum class Format : u8 { + Unsigned32 = 0, + Signed32 = 1 + }; + + enum class StructureSize : u8 { + FourWords = 0, + OneWord = 1 + }; + + Address offset; + u32 payload; + struct { + Op op : 2; + bool flushDisable : 1; + bool reductionEnable : 1; + u8 _pad0_ : 5; + ReductionOp reductionOp : 3; + u8 _pad1_ : 5; + Format format : 2; + u8 _pad2_ : 1; + bool awakenEnable : 1; + u8 _pad3_ : 7; + StructureSize structureSize : 1; + u8 _pad4_ : 3; + } action; + }; + static_assert(sizeof(ReportSemaphore) == 0x10); + + Register<0x6C0, ReportSemaphore> reportSemaphore; + + struct BindlessTexture { + u8 constantBufferSlotSelect : 3; + u32 _pad_ : 29; + }; + static_assert(sizeof(BindlessTexture) == 0x4); + + Register<0x982, BindlessTexture> bindlessTexture; + } registers{}; + static_assert(sizeof(Registers) == (EngineMethodsEnd * 0x4)); + #pragma pack(pop) + + KeplerCompute(const DeviceState &state, ChannelContext &channelCtx); + + void CallMethod(u32 method, u32 argument); + + void CallMethodBatchNonInc(u32 method, span arguments); + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute/qmd.h b/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute/qmd.h new file mode 100644 index 00000000..efb79135 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/kepler_compute/qmd.h @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) + +#pragma once + +#include + +namespace skyline::soc::gm20b::engine::kepler_compute { + #pragma pack(push, 1) + + /** + * @brief Holds the 'Compute Queue Metadata' structure which encapsulates the state needed to execute a compute task + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/compute_qmd.h + */ + struct QMD { + enum class DependentQmdType : u32 { + Queue = 0, + Grid = 1 + }; + + enum class ReleaseMemBarType : u32 { + FeNone = 0, + FeSysmem = 1 + }; + + enum class CwdMemBarType : u32 { + L1None = 0, + L1SysmemBar = 1, + L1MemBar = 2 + }; + + enum class Fp32NanBehaviour : u32 { + Legacy = 0, + Fp64Compatible = 1 + }; + + enum class Fp32F2iNanBehavior : u32 { + PassZero = 0, + PassIndefinite = 1 + }; + + enum class ApiVisibleCallLimit : u32 { + ThirtyTwo = 0, + NoCheck = 1 + }; + + enum class SharedMemoryBankMapping : u32 { + FourBytesPerBank = 0, + EightBytesPerBank = 1 + }; + + enum class SamplerIndex : u32 { + Independently = 0, + ViaHeaderIndex = 1 + }; + + enum class Fp32NarrowInstruction : u32 { + KeepDenorms = 0, + FlushDenorms = 1 + }; + + enum class L1Configuration : u32 { + DirectlyAddressableMemorySize16Kb = 0, + DirectlyAddressableMemorySize32Kb = 1, + DirectlyAddressableMemorySize48Kb = 2 + }; + + enum class ReductionOp : u32 { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7 + }; + + enum class ReductionFormat : u32 { + Unsigned32 = 0, + Signed32 = 1 + }; + + enum class StructureSize : u32 { + FourWords = 0, + OneWord = 1 + }; + + u32 outerPut : 31; + u32 outerOverflow : 1; + u32 outerGet : 31; + u32 outerStickyOverflow : 1; + + u32 innerGet : 31; + u32 innerOverflow : 1; + u32 innerPut : 31; + u32 innerStickyOverflow : 1; + + u32 qmdReservedAA; + + u32 dependentQmdPointer; + + u32 qmdGroupId : 6; + + u32 smGlobalCachingEnable : 1; + + u32 runCtaInOneSmPartition : 1; + + u32 isQueue : 1; + + u32 addToHeadOfQmdGroupLinkedList : 1; + + u32 semaphoreReleaseEnable0 : 1; + u32 semaphoreReleaseEnable1 : 1; + + u32 requireSchedulingPcas : 1; + u32 dependentQmdScheduleEnable : 1; + DependentQmdType dependentQmdType : 1; + u32 dependentQmdFieldCopy : 1; + + u32 qmdReservedB : 16; + + u32 circularQueueSize : 25; + + u32 qmdReservedC : 1; + + u32 invalidateTextureHeaderCache : 1; + u32 invalidateTextureSamplerCache : 1; + u32 invalidateTextureDataCache : 1; + u32 invalidateShaderDataCache : 1; + u32 invalidateInstructionCache : 1; + u32 invalidateShaderConstantCache : 1; + + u32 programOffset; + + u32 circularQueueAddrLower; + u32 circularQueueAddrUpper : 8; + + u32 qmdReservedD : 8; + + u32 circularQueueEntrySize : 16; + + u32 cwdReferenceCountId : 6; + u32 cwdReferenceCountDeltaMinusOne : 8; + + ReleaseMemBarType releaseMembarType : 1; + + u32 cwdReferenceCountIncrEnable : 1; + CwdMemBarType cwdMembarType : 2; + + u32 sequentiallyRunCtas : 1; + + u32 cwdReferenceCountDecrEnable : 1; + + u32 throttled : 1; + + u32 _pad0_ : 3; + + Fp32NanBehaviour fp32NanBehavior : 1; + + Fp32F2iNanBehavior fp32F2iNanBehavior : 1; + + ApiVisibleCallLimit apiVisibleCallLimit : 1; + + SharedMemoryBankMapping sharedMemoryBankMapping : 1; + + u32 _pad1_ : 2; + + SamplerIndex samplerIndex : 1; + + Fp32NarrowInstruction fp32NarrowInstruction : 1; + + u32 ctaRasterWidth; + u32 ctaRasterHeight : 16; + u32 ctaRasterDepth : 16; + + u32 ctaRasterWidthResume; + u32 ctaRasterHeightResume : 16; + u32 ctaRasterDepthResume : 16; + + u32 queueEntriesPerCtaMinusOne : 7; + + u32 _pad2_ : 3; + + u32 coalesceWaitingPeriod : 8; + + u32 _pad3_ : 14; + + u32 sharedMemorySize : 18; + + u32 qmdReservedG : 14; + + u32 qmdVersion : 4; + u32 qmdMajorVersion : 4; + + u32 qmdReservedH : 8; + + u32 ctaThreadDimension0 : 16; + u32 ctaThreadDimension1 : 16; + u32 ctaThreadDimension2 : 16; + + u32 constantBufferValid : 8; + + u32 qmdReservedI : 21; + + L1Configuration l1Configuration : 3; + + u32 smDisableMaskLower; + u32 smDisableMaskUpper; + + struct { + u32 addressLower; + u32 addressUpper : 8; + u32 qmdReservedJL : 8; + u32 _pad4_ : 4; + ReductionOp reductionOp : 3; + u32 qmdReservedKM : 1; + ReductionFormat reductionFormat : 2; + u32 reductionEnable : 1; + u32 _pad5_ : 4; + StructureSize structureSize : 1; + u32 payload; + } release[2]; + + struct { + u32 addrLower; + u32 addrUpper : 8; + u32 reservedAddr : 6; + u32 invalidate : 1; + u32 size : 17; + } constantBuffer[8]; + + u32 shaderLocalMemoryLowSize : 24; + + u32 qmdReservedN : 3; + + u32 barrierCount : 5; + u32 shaderLocalMemoryHighSize : 24; + u32 registerCount : 8; + u32 shaderLocalMemoryCrsSize : 24; + + u32 sassVersion : 8; + + u32 hwOnlyInnerGet : 31; + u32 hwOnlyRequireSchedulingPcas : 1; + u32 hwOnlyInnerPut : 31; + u32 hwOnlyScgType : 1; + u32 hwOnlySpanListHeadIndex : 30; + + u32 qmdReservedQ : 1; + + u32 hwOnlySpanListHeadIndexValid : 1; + u32 hwOnlySkedNextQmdPointer; + + u32 qmdSpareEFGHIJKLMN[10]; + + u32 debugIdLower; + u32 debugIdUpper; + }; + static_assert(sizeof(QMD) == 0x100); + #pragma pack(pop) + +} \ No newline at end of file diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index bf84cff8..009688f5 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -115,6 +115,9 @@ namespace skyline::soc::gm20b { case SubchannelId::Inline2Mem: channelCtx.inline2Memory.CallMethod(method, argument); break; + case SubchannelId::Compute: + channelCtx.keplerCompute.CallMethod(method, argument); + break; default: Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument); break; @@ -129,6 +132,9 @@ namespace skyline::soc::gm20b { case SubchannelId::Inline2Mem: channelCtx.inline2Memory.CallMethodBatchNonInc(method, arguments); break; + case SubchannelId::Compute: + channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments); + break; default: Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X} with batch args", method, subChannel); break;