diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml
index d4813593..5878864a 100644
--- a/.idea/codeStyles/Project.xml
+++ b/.idea/codeStyles/Project.xml
@@ -7,22 +7,6 @@
-
-
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
index 847d835e..82d04e98 100644
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -171,7 +171,7 @@
-
+
diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt
index 7749c3e7..7039feee 100644
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@@ -67,12 +67,12 @@ add_library(skyline SHARED
${source_DIR}/skyline/audio/resampler.cpp
${source_DIR}/skyline/audio/adpcm_decoder.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp
- ${source_DIR}/skyline/gpu/macro_interpreter.cpp
- ${source_DIR}/skyline/gpu/memory_manager.cpp
- ${source_DIR}/skyline/gpu/gpfifo.cpp
- ${source_DIR}/skyline/gpu/syncpoint.cpp
${source_DIR}/skyline/gpu/texture.cpp
- ${source_DIR}/skyline/gpu/engines/maxwell_3d.cpp
+ ${source_DIR}/skyline/soc/gmmu.cpp
+ ${source_DIR}/skyline/soc/host1x/syncpoint.cpp
+ ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
+ ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
+ ${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
${source_DIR}/skyline/input/npad.cpp
${source_DIR}/skyline/input/npad_device.cpp
${source_DIR}/skyline/input/touch.cpp
diff --git a/app/src/main/cpp/skyline/common.cpp b/app/src/main/cpp/skyline/common.cpp
index d5b219e6..30f96414 100644
--- a/app/src/main/cpp/skyline/common.cpp
+++ b/app/src/main/cpp/skyline/common.cpp
@@ -4,6 +4,7 @@
#include
#include "common.h"
#include "nce.h"
+#include "soc.h"
#include "gpu.h"
#include "audio.h"
#include "input.h"
@@ -55,6 +56,7 @@ namespace skyline {
DeviceState::DeviceState(kernel::OS *os, std::shared_ptr jvmManager, std::shared_ptr settings, std::shared_ptr logger)
: os(os), jvm(std::move(jvmManager)), settings(std::move(settings)), logger(std::move(logger)) {
// We assign these later as they use the state in their constructor and we don't want null pointers
+ soc = std::make_shared(*this);
gpu = std::make_shared(*this);
audio = std::make_shared(*this);
nce = std::make_shared(*this);
diff --git a/app/src/main/cpp/skyline/common.h b/app/src/main/cpp/skyline/common.h
index c439ac5a..2ac4c151 100644
--- a/app/src/main/cpp/skyline/common.h
+++ b/app/src/main/cpp/skyline/common.h
@@ -27,7 +27,7 @@
#include
#include
-#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
+#define FORCE_INLINE __attribute__((always_inline)) // NOLINT(cppcoreguidelines-macro-usage)
namespace fmt {
/**
@@ -605,6 +605,9 @@ namespace skyline {
struct ThreadContext;
}
class JvmManager;
+ namespace soc {
+ class SOC;
+ }
namespace gpu {
class GPU;
}
@@ -637,6 +640,7 @@ namespace skyline {
std::shared_ptr settings;
std::shared_ptr logger;
std::shared_ptr loader;
+ std::shared_ptr soc;
std::shared_ptr gpu;
std::shared_ptr audio;
std::shared_ptr nce;
diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h
index 23ad3cb2..030ea9c8 100644
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@@ -3,30 +3,16 @@
#pragma once
-#include "gpu/gpfifo.h"
-#include "gpu/syncpoint.h"
-#include "gpu/engines/maxwell_3d.h"
#include "gpu/presentation_engine.h"
namespace skyline::gpu {
/**
- * @brief A common interfaces to the GPU where all objects relevant to it are present
+ * @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this
*/
class GPU {
- private:
- const DeviceState &state;
-
public:
PresentationEngine presentation;
- vmm::MemoryManager memoryManager;
- std::shared_ptr fermi2D;
- std::shared_ptr maxwell3D;
- std::shared_ptr maxwellCompute;
- std::shared_ptr maxwellDma;
- std::shared_ptr keplerMemory;
- std::array syncpoints{};
- gpfifo::GPFIFO gpfifo;
- GPU(const DeviceState &state) : state(state), presentation(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared(state)), keplerMemory(std::make_shared(state)), maxwell3D(std::make_shared(state)), maxwellCompute(std::make_shared(state)), maxwellDma(std::make_shared(state)) {}
+ GPU(const DeviceState &state) : presentation(state) {}
};
}
diff --git a/app/src/main/cpp/skyline/gpu/engines/gpfifo.h b/app/src/main/cpp/skyline/gpu/engines/gpfifo.h
deleted file mode 100644
index c2526a62..00000000
--- a/app/src/main/cpp/skyline/gpu/engines/gpfifo.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-
-#pragma once
-
-#include "engine.h"
-
-namespace skyline {
- namespace constant {
- constexpr u32 GpfifoRegisterCount{0x40}; //!< The number of GPFIFO registers
- }
-
- namespace gpu::engine {
- /**
- * @brief The GPFIFO engine handles managing macros and semaphores
- * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
- */
- class GPFIFO : public Engine {
- private:
- /**
- * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
- */
-#pragma pack(push, 1)
- union Registers {
- std::array raw;
-
- enum class SemaphoreOperation : u8 {
- Acquire = 1,
- Release = 2,
- AcqGeq = 4,
- AcqAnd = 8,
- Reduction = 16,
- };
-
- enum class SemaphoreAcquireSwitch : u8 {
- Disabled = 0,
- Enabled = 1,
- };
-
- enum class SemaphoreReleaseWfi : u8 {
- En = 0,
- Dis = 1,
- };
-
- enum class SemaphoreReleaseSize : u8 {
- SixteenBytes = 0,
- FourBytes = 1,
- };
-
- enum class SemaphoreReduction : u8 {
- Min = 0,
- Max = 1,
- Xor = 2,
- And = 3,
- Or = 4,
- Add = 5,
- Inc = 6,
- Dec = 7,
- };
-
- enum class SemaphoreFormat : u8 {
- Signed = 0,
- Unsigned = 1,
- };
-
- enum class MemOpTlbInvalidatePdb : u8 {
- One = 0,
- All = 1,
- };
-
- enum class SyncpointOperation : u8 {
- Wait = 0,
- Incr = 1,
- };
-
- enum class SyncpointWaitSwitch : u8 {
- Dis = 0,
- En = 1,
- };
-
- enum class WfiScope : u8 {
- CurrentScgType = 0,
- All = 1,
- };
-
- enum class YieldOp : u8 {
- Nop = 0,
- PbdmaTimeslice = 1,
- RunlistTimeslice = 2,
- Tsg = 3,
- };
-
- struct {
- struct {
- u16 nvClass : 16;
- u8 engine : 5;
- u16 _pad_ : 11;
- } setObject;
-
- u32 illegal;
- u32 nop;
- u32 _pad0_;
-
- struct {
- struct {
- u32 offsetUpper : 8;
- u32 _pad0_ : 24;
- };
-
- struct {
- u8 _pad1_ : 2;
- u32 offsetLower : 30;
- };
-
- u32 payload;
-
- struct {
- SemaphoreOperation operation : 5;
- u8 _pad2_ : 7;
- SemaphoreAcquireSwitch acquireSwitch : 1;
- u8 _pad3_ : 7;
- SemaphoreReleaseWfi releaseWfi : 1;
- u8 _pad4_ : 3;
- SemaphoreReleaseSize releaseSize : 1;
- u8 _pad5_ : 2;
- SemaphoreReduction reduction : 4;
- SemaphoreFormat format : 1;
- };
- } semaphore;
-
- u32 nonStallInterrupt;
- u32 fbFlush;
- u32 _pad1_[2];
- u32 memOpC;
- u32 memOpD;
- u32 _pad2_[6];
- u32 setReference;
- u32 _pad3_[7];
-
- struct {
- u32 payload;
-
- struct {
- SyncpointOperation operation : 1;
- u8 _pad0_ : 3;
- SyncpointWaitSwitch waitSwitch : 1;
- u8 _pad1_ : 3;
- u16 index : 12;
- u16 _pad2_ : 12;
- };
- } syncpoint;
-
- struct {
- WfiScope scope : 1;
- u32 _pad_ : 31;
- } wfi;
-
- u32 crcCheck;
-
- struct {
- YieldOp op : 2;
- u32 _pad_ : 30;
- } yield;
- };
- } registers{};
- static_assert(sizeof(Registers) == (constant::GpfifoRegisterCount * sizeof(u32)));
-#pragma pack(pop)
-
- public:
- GPFIFO(const DeviceState &state) : Engine(state) {}
-
- void CallMethod(MethodParams params) override {
- state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
-
- registers.raw[params.method] = params.argument;
- };
- };
- }
-}
diff --git a/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h b/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h
deleted file mode 100644
index 266996fb..00000000
--- a/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h
+++ /dev/null
@@ -1,575 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-
-#pragma once
-
-#include
-#include "engine.h"
-
-#define MAXWELL3D_OFFSET(field) U32_OFFSET(skyline::gpu::engine::Maxwell3D::Registers, field)
-
-namespace skyline {
- namespace constant {
- constexpr u32 Maxwell3DRegisterCounter{0xE00}; //!< The number of Maxwell 3D registers
- }
-
- namespace gpu::engine {
- /**
- * @brief The Maxwell 3D engine handles processing 3D graphics
- */
- class Maxwell3D : public Engine {
- private:
- std::array macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
-
- struct {
- u32 index;
- std::vector arguments;
- } macroInvocation{}; //!< Data for a macro that is pending execution
-
- MacroInterpreter macroInterpreter;
-
- void HandleSemaphoreCounterOperation();
-
- void WriteSemaphoreResult(u64 result);
-
- public:
- /**
- * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
- */
-#pragma pack(push, 1)
- union Registers {
- std::array raw;
-
- struct Address {
- u32 high;
- u32 low;
-
- u64 Pack() {
- return (static_cast(high) << 32) | low;
- }
- };
- static_assert(sizeof(Address) == sizeof(u64));
-
- enum class MmeShadowRamControl : u32 {
- MethodTrack = 0,
- MethodTrackWithFilter = 1,
- MethodPassthrough = 2,
- MethodReplay = 3,
- };
-
- struct ViewportTransform {
- enum class Swizzle : u8 {
- PositiveX = 0,
- NegativeX = 1,
- PositiveY = 2,
- NegativeY = 3,
- PositiveZ = 4,
- NegativeZ = 5,
- PositiveW = 6,
- NegativeW = 7,
- };
-
- float scaleX;
- float scaleY;
- float scaleZ;
- float translateX;
- float translateY;
- float translateZ;
-
- struct {
- Swizzle x : 3;
- u8 _pad0_ : 1;
- Swizzle y : 3;
- u8 _pad1_ : 1;
- Swizzle z : 3;
- u8 _pad2_ : 1;
- Swizzle w : 3;
- u32 _pad3_ : 17;
- } swizzles;
-
- struct {
- u8 x : 5;
- u8 _pad0_ : 3;
- u8 y : 5;
- u32 _pad1_ : 19;
- } subpixelPrecisionBias;
- };
- static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
-
- struct Viewport {
- struct {
- u16 x;
- u16 width;
- };
-
- struct {
- u16 y;
- u16 height;
- };
-
- float depthRangeNear;
- float depthRangeFar;
- };
- static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
-
- enum class PolygonMode : u32 {
- Point = 0x1B00,
- Line = 0x1B01,
- Fill = 0x1B02,
- };
-
- union VertexAttribute {
- u32 raw;
-
- enum class Size : u8 {
- Size_1x32 = 0x12,
- Size_2x32 = 0x04,
- Size_3x32 = 0x02,
- Size_4x32 = 0x01,
- Size_1x16 = 0x1B,
- Size_2x16 = 0x0F,
- Size_3x16 = 0x05,
- Size_4x16 = 0x03,
- Size_1x8 = 0x1D,
- Size_2x8 = 0x18,
- Size_3x8 = 0x13,
- Size_4x8 = 0x0A,
- Size_10_10_10_2 = 0x30,
- Size_11_11_10 = 0x31,
- };
-
- enum class Type : u8 {
- None = 0,
- SNorm = 1,
- UNorm = 2,
- SInt = 3,
- UInt = 4,
- UScaled = 5,
- SScaled = 6,
- Float = 7,
- };
-
- struct {
- u8 bufferId : 5;
- u8 _pad0_ : 1;
- bool fixed : 1;
- u16 offset : 14;
- Size size : 6;
- Type type : 3;
- u8 _pad1_ : 1;
- bool bgra : 1;
- };
- };
- static_assert(sizeof(VertexAttribute) == sizeof(u32));
-
- enum class CompareOp : u32 {
- Never = 1,
- Less = 2,
- Equal = 3,
- LessOrEqual = 4,
- Greater = 5,
- NotEqual = 6,
- GreaterOrEqual = 7,
- Always = 8,
-
- NeverGL = 0x200,
- LessGL = 0x201,
- EqualGL = 0x202,
- LessOrEqualGL = 0x203,
- GreaterGL = 0x204,
- NotEqualGL = 0x205,
- GreaterOrEqualGL = 0x206,
- AlwaysGL = 0x207,
- };
-
- struct Blend {
- enum class Op : u32 {
- Add = 1,
- Subtract = 2,
- ReverseSubtract = 3,
- Minimum = 4,
- Maximum = 5,
-
- AddGL = 0x8006,
- SubtractGL = 0x8007,
- ReverseSubtractGL = 0x8008,
- MinimumGL = 0x800A,
- MaximumGL = 0x800B,
- };
-
- enum class Factor : u32 {
- Zero = 0x1,
- One = 0x2,
- SourceColor = 0x3,
- OneMinusSourceColor = 0x4,
- SourceAlpha = 0x5,
- OneMinusSourceAlpha = 0x6,
- DestAlpha = 0x7,
- OneMinusDestAlpha = 0x8,
- DestColor = 0x9,
- OneMinusDestColor = 0xA,
- SourceAlphaSaturate = 0xB,
- Source1Color = 0x10,
- OneMinusSource1Color = 0x11,
- Source1Alpha = 0x12,
- OneMinusSource1Alpha = 0x13,
- ConstantColor = 0x61,
- OneMinusConstantColor = 0x62,
- ConstantAlpha = 0x63,
- OneMinusConstantAlpha = 0x64,
-
- ZeroGL = 0x4000,
- OneGL = 0x4001,
- SourceColorGL = 0x4300,
- OneMinusSourceColorGL = 0x4301,
- SourceAlphaGL = 0x4302,
- OneMinusSourceAlphaGL = 0x4303,
- DestAlphaGL = 0x4304,
- OneMinusDestAlphaGL = 0x4305,
- DestColorGL = 0x4306,
- OneMinusDestColorGL = 0x4307,
- SourceAlphaSaturateGL = 0x4308,
- ConstantColorGL = 0xC001,
- OneMinusConstantColorGL = 0xC002,
- ConstantAlphaGL = 0xC003,
- OneMinusConstantAlphaGL = 0xC004,
- Source1ColorGL = 0xC900,
- OneMinusSource1ColorGL = 0xC901,
- Source1AlphaGL = 0xC902,
- OneMinusSource1AlphaGL = 0xC903,
- };
-
- struct {
- u32 seperateAlpha;
- Op colorOp;
- Factor colorSrcFactor;
- Factor colorDestFactor;
- Op alphaOp;
- Factor alphaSrcFactor;
- Factor alphaDestFactor;
- u32 _pad_;
- };
- };
- static_assert(sizeof(Blend) == (sizeof(u32) * 8));
-
- enum class StencilOp : u32 {
- Keep = 1,
- Zero = 2,
- Replace = 3,
- IncrementAndClamp = 4,
- DecrementAndClamp = 5,
- Invert = 6,
- IncrementAndWrap = 7,
- DecrementAndWrap = 8,
- };
-
- enum class FrontFace : u32 {
- Clockwise = 0x900,
- CounterClockwise = 0x901,
- };
-
- enum class CullFace : u32 {
- Front = 0x404,
- Back = 0x405,
- FrontAndBack = 0x408,
- };
-
- union ColorWriteMask {
- u32 raw;
-
- struct {
- u8 r : 4;
- u8 g : 4;
- u8 b : 4;
- u8 a : 4;
- };
- };
- static_assert(sizeof(ColorWriteMask) == sizeof(u32));
-
- struct SemaphoreInfo {
- enum class Op : u8 {
- Release = 0,
- Acquire = 1,
- Counter = 2,
- Trap = 3,
- };
-
- enum class ReductionOp : u8 {
- Add = 0,
- Min = 1,
- Max = 2,
- Inc = 3,
- Dec = 4,
- And = 5,
- Or = 6,
- Xor = 7,
- };
-
- enum class Unit : u8 {
- VFetch = 1,
- VP = 2,
- Rast = 4,
- StrmOut = 5,
- GP = 6,
- ZCull = 7,
- Prop = 10,
- Crop = 15,
- };
-
- enum class SyncCondition : u8 {
- NotEqual = 0,
- GreaterThan = 1,
- };
-
- enum class Format : u8 {
- U32 = 0,
- I32 = 1,
- };
-
- enum class CounterType : u8 {
- Zero = 0x0,
- InputVertices = 0x1,
- InputPrimitives = 0x3,
- VertexShaderInvocations = 0x5,
- GeometryShaderInvocations = 0x7,
- GeometryShaderPrimitives = 0x9,
- ZcullStats0 = 0xA,
- TransformFeedbackPrimitivesWritten = 0xB,
- ZcullStats1 = 0xC,
- ZcullStats2 = 0xE,
- ClipperInputPrimitives = 0xF,
- ZcullStats3 = 0x10,
- ClipperOutputPrimitives = 0x11,
- PrimitivesGenerated = 0x12,
- FragmentShaderInvocations = 0x13,
- SamplesPassed = 0x15,
- TransformFeedbackOffset = 0x1A,
- TessControlShaderInvocations = 0x1B,
- TessEvaluationShaderInvocations = 0x1D,
- TessEvaluationShaderPrimitives = 0x1F,
- };
-
- enum class StructureSize : u8 {
- FourWords = 0,
- OneWord = 1,
- };
-
- Op op : 2;
- bool flushDisable : 1;
- bool reductionEnable : 1;
- bool fenceEnable : 1;
- u8 _pad0_ : 4;
- ReductionOp reductionOp : 3;
- Unit unit : 4;
- SyncCondition syncCondition : 1;
- Format format : 2;
- u8 _pad1_ : 1;
- bool awakenEnable : 1;
- u8 _pad2_ : 2;
- CounterType counterType : 5;
- StructureSize structureSize : 1;
- };
- static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
-
- enum class CoordOrigin : u8 {
- LowerLeft = 0,
- UpperLeft = 1,
- };
-
- struct {
- u32 _pad0_[0x40]; // 0x0
- u32 noOperation; // 0x40
- u32 _pad1_[0x3]; // 0x41
- u32 waitForIdle; // 0x44
-
- struct {
- u32 instructionRamPointer; // 0x45
- u32 instructionRamLoad; // 0x46
- u32 startAddressRamPointer; // 0x47
- u32 startAddressRamLoad; // 0x48
- MmeShadowRamControl shadowRamControl; // 0x49
- } mme;
-
- u32 _pad2_[0x68]; // 0x4A
-
- struct {
- u16 id : 12;
- u8 _pad0_ : 4;
- bool flushCache : 1;
- u8 _pad1_ : 3;
- bool increment : 1;
- u16 _pad2_ : 11;
- } syncpointAction; // 0xB2
-
- u32 _pad3_[0x2C]; // 0xB3
- u32 rasterizerEnable; // 0xDF
- u32 _pad4_[0x1A0]; // 0xE0
- std::array viewportTransform; // 0x280
- std::array viewport; // 0x300
- u32 _pad5_[0x2B]; // 0x340
-
- struct {
- PolygonMode front; // 0x36B
- PolygonMode back; // 0x36C
- } polygonMode;
-
- u32 _pad6_[0x68]; // 0x36D
-
- struct {
- u32 compareRef; // 0x3D5
- u32 writeMask; // 0x3D6
- u32 compareMask; // 0x3D7
- } stencilBackExtra;
-
- u32 _pad7_[0x13]; // 0x3D8
- u32 rtSeparateFragData; // 0x3EB
- u32 _pad8_[0x6C]; // 0x3EC
- std::array vertexAttributeState; // 0x458
- u32 _pad9_[0x4B]; // 0x478
- CompareOp depthTestFunc; // 0x4C3
- float alphaTestRef; // 0x4C4
- CompareOp alphaTestFunc; // 0x4C5
- u32 drawTFBStride; // 0x4C6
-
- struct {
- float r; // 0x4C7
- float g; // 0x4C8
- float b; // 0x4C9
- float a; // 0x4CA
- } blendConstant;
-
- u32 _pad10_[0x4]; // 0x4CB
-
- struct {
- u32 seperateAlpha; // 0x4CF
- Blend::Op colorOp; // 0x4D0
- Blend::Factor colorSrcFactor; // 0x4D1
- Blend::Factor colorDestFactor; // 0x4D2
- Blend::Op alphaOp; // 0x4D3
- Blend::Factor alphaSrcFactor; // 0x4D4
- u32 _pad_; // 0x4D5
- Blend::Factor alphaDestFactor; // 0x4D6
-
- u32 enableCommon; // 0x4D7
- std::array enable; // 0x4D8 For each render target
- } blend;
-
- u32 stencilEnable; // 0x4E0
-
- struct {
- StencilOp failOp; // 0x4E1
- StencilOp zFailOp; // 0x4E2
- StencilOp zPassOp; // 0x4E3
-
- struct {
- CompareOp op; // 0x4E4
- i32 ref; // 0x4E5
- u32 mask; // 0x4E6
- } compare;
-
- u32 writeMask; // 0x4E7
- } stencilFront;
-
- u32 _pad11_[0x4]; // 0x4E8
- float lineWidthSmooth; // 0x4EC
- float lineWidthAliased; // 0x4D
- u32 _pad12_[0x1F]; // 0x4EE
- u32 drawBaseVertex; // 0x50D
- u32 drawBaseInstance; // 0x50E
- u32 _pad13_[0x35]; // 0x50F
- u32 clipDistanceEnable; // 0x544
- u32 sampleCounterEnable; // 0x545
- float pointSpriteSize; // 0x546
- u32 zCullStatCountersEnable; // 0x547
- u32 pointSpriteEnable; // 0x548
- u32 _pad14_; // 0x549
- u32 shaderExceptions; // 0x54A
- u32 _pad15_[0x2]; // 0x54B
- u32 multisampleEnable; // 0x54D
- u32 depthTargetEnable; // 0x54E
-
- struct {
- bool alphaToCoverage : 1;
- u8 _pad0_ : 3;
- bool alphaToOne : 1;
- u32 _pad1_ : 27;
- } multisampleControl; // 0x54F
-
- u32 _pad16_[0x7]; // 0x550
-
- struct {
- Address address; // 0x557
- u32 maximumIndex; // 0x559
- } texSamplerPool;
-
- u32 _pad17_; // 0x55A
- u32 polygonOffsetFactor; // 0x55B
- u32 lineSmoothEnable; // 0x55C
-
- struct {
- Address address; // 0x55D
- u32 maximumIndex; // 0x55F
- } texHeaderPool;
-
- u32 _pad18_[0x5]; // 0x560
-
- u32 stencilTwoSideEnable; // 0x565
-
- struct {
- StencilOp failOp; // 0x566
- StencilOp zFailOp; // 0x567
- StencilOp zPassOp; // 0x568
- CompareOp compareOp; // 0x569
- } stencilBack;
-
- u32 _pad19_[0x17]; // 0x56A
-
- struct {
- u8 _unk_ : 2;
- CoordOrigin origin : 1;
- u16 enable : 10;
- u32 _pad_ : 19;
- } pointCoordReplace; // 0x581
-
- u32 _pad20_[0xC4]; // 0x582
- u32 cullFaceEnable; // 0x646
- FrontFace frontFace; // 0x647
- CullFace cullFace; // 0x648
- u32 pixelCentreImage; // 0x649
- u32 _pad21_; // 0x64A
- u32 viewportTransformEnable; // 0x64B
- u32 _pad22_[0x34]; // 0x64A
- std::array colorMask; // 0x680 For each render target
- u32 _pad23_[0x38]; // 0x688
-
- struct {
- Address address; // 0x6C0
- u32 payload; // 0x6C2
- SemaphoreInfo info; // 0x6C3
- } semaphore;
-
- u32 _pad24_[0xBC]; // 0x6C4
- std::array independentBlend; // 0x780 For each render target
- u32 _pad25_[0x100]; // 0x7C0
- u32 firmwareCall[0x20]; // 0x8C0
- };
- };
- static_assert(sizeof(Registers) == (constant::Maxwell3DRegisterCounter * sizeof(u32)));
-#pragma pack(pop)
-
- Registers registers{};
- Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
-
- std::array macroCode{}; //!< This stores GPU macros, the 256kb size is from Ryujinx
-
- Maxwell3D(const DeviceState &state);
-
- /**
- * @brief Resets the Maxwell 3D registers to their default values
- */
- void ResetRegs();
-
- void CallMethod(MethodParams params) override;
- };
- }
-}
diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.h b/app/src/main/cpp/skyline/gpu/gpfifo.h
deleted file mode 100644
index 2c69c07f..00000000
--- a/app/src/main/cpp/skyline/gpu/gpfifo.h
+++ /dev/null
@@ -1,170 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-
-#pragma once
-
-#include
-#include "engines/gpfifo.h"
-#include "memory_manager.h"
-
-namespace skyline::gpu {
- namespace gpfifo {
- /**
- * @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
- * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
- * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
- */
- struct GpEntry {
- enum class Fetch : u8 {
- Unconditional = 0,
- Conditional = 1,
- };
-
- union {
- u32 entry0;
-
- struct {
- Fetch fetch : 1;
- u8 _pad_ : 1;
- u32 get : 30;
- };
- };
-
- enum class Opcode : u8 {
- Nop = 0,
- Illegal = 1,
- Crc = 2,
- PbCrc = 3,
- };
-
- enum class Priv : u8 {
- User = 0,
- Kernel = 1,
- };
-
- enum class Level : u8 {
- Main = 0,
- Subroutine = 1,
- };
-
- enum class Sync : u8 {
- Proceed = 0,
- Wait = 1,
- };
-
- union {
- u32 entry1;
-
- struct {
- union {
- u8 getHi;
- Opcode opcode;
- };
-
- Priv priv : 1;
- Level level : 1;
- u32 size : 21;
- Sync sync : 1;
- };
- };
-
- constexpr u64 Address() const {
- return (static_cast(getHi) << 32) | (static_cast(get) << 2);
- }
- };
- static_assert(sizeof(GpEntry) == sizeof(u64));
-
- /**
- * @brief A single pushbuffer method header that describes a compressed method sequence
- * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
- * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
- */
- union PushBufferMethodHeader {
- u32 raw;
-
- enum class TertOp : u8 {
- Grp0IncMethod = 0,
- Grp0SetSubDevMask = 1,
- Grp0StoreSubDevMask = 2,
- Grp0UseSubDevMask = 3,
- Grp2NonIncMethod = 0,
- };
-
- enum class SecOp : u8 {
- Grp0UseTert = 0,
- IncMethod = 1,
- Grp2UseTert = 2,
- NonIncMethod = 3,
- ImmdDataMethod = 4,
- OneInc = 5,
- Reserved6 = 6,
- EndPbSegment = 7,
- };
-
- u16 methodAddress : 12;
- struct {
- u8 _pad0_ : 4;
- u16 subDeviceMask : 12;
- };
-
- struct {
- u16 _pad1_ : 13;
- u8 methodSubChannel : 3;
- union {
- TertOp tertOp : 3;
- u16 methodCount : 13;
- u16 immdData : 13;
- };
- };
-
- struct {
- u32 _pad2_ : 29;
- SecOp secOp : 3;
- };
- };
- static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
-
- /**
- * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
- * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
- */
- class GPFIFO {
- const DeviceState &state;
- engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
- std::array, 8> subchannels;
- std::optional> pushBuffers;
- std::thread thread; //!< The thread that manages processing of pushbuffers
- std::vector pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
-
- /**
- * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
- */
- void Process(GpEntry gpEntry);
-
- /**
- * @brief Sends a method call to the GPU hardware
- */
- void Send(MethodParams params);
-
- public:
- GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
-
- ~GPFIFO();
-
- /**
- * @param numBuffers The amount of push-buffers to allocate in the circular buffer
- */
- void Initialize(size_t numBuffers);
-
- /**
- * @brief Executes all pending entries in the FIFO
- */
- void Run();
-
- /**
- * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
- */
- void Push(span entries);
- };
- }
-}
diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.h b/app/src/main/cpp/skyline/gpu/memory_manager.h
deleted file mode 100644
index b01cd39c..00000000
--- a/app/src/main/cpp/skyline/gpu/memory_manager.h
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-
-#pragma once
-
-#include
-
-namespace skyline {
- namespace constant {
- constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
- }
-
- namespace gpu::vmm {
- enum class ChunkState {
- Unmapped, //!< The chunk is unmapped
- Reserved, //!< The chunk is reserved
- Mapped //!< The chunk is mapped and a CPU side address is present
- };
-
- struct ChunkDescriptor {
- u64 virtAddr; //!< The address of the chunk in the virtual address space
- u64 size; //!< The size of the chunk in bytes
- u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
- ChunkState state;
-
- ChunkDescriptor(u64 virtAddr, u64 size, u8 *cpuPtr, ChunkState state) : virtAddr(virtAddr), size(size), cpuPtr(cpuPtr), state(state) {}
-
- /**
- * @return If the given chunk can be contained wholly within this chunk
- */
- inline bool CanContain(const ChunkDescriptor &chunk) {
- return (chunk.virtAddr >= virtAddr) && ((size + virtAddr) >= (chunk.size + chunk.virtAddr));
- }
- };
-
- /**
- * @brief The MemoryManager class handles mapping between a virtual address space and an application's address space
- */
- class MemoryManager {
- private:
- const DeviceState &state;
- std::vector chunks;
- std::shared_mutex vmmMutex;
-
- /**
- * @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
- * @note vmmMutex MUST be locked when calling this
- * @param desiredState The state of the chunk to find
- * @param size The minimum size of the chunk to find
- * @param alignment The minimum alignment of the chunk to find
- * @return The first applicable chunk
- */
- std::optional FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
-
- /**
- * @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
- * @note vmmMutex MUST be locked when calling this
- * @param newChunk The chunk to insert
- * @return The base virtual address of the inserted chunk
- */
- u64 InsertChunk(const ChunkDescriptor &newChunk);
-
- public:
- MemoryManager(const DeviceState &state);
-
- /**
- * @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
- * @param size The size of the region to reserve
- * @param alignment The alignment of the region to reserve
- * @return The base virtual address of the reserved region
- */
- u64 ReserveSpace(u64 size, u64 alignment);
-
- /**
- * @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
- * @param virtAddr The virtual base address of the region to allocate
- * @param size The size of the region to allocate
- * @return The base virtual address of the reserved region
- */
- u64 ReserveFixed(u64 virtAddr, u64 size);
-
- /**
- * @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
- * @param cpuPtr A pointer to the region to be mapped into the virtual address space
- * @param size The size of the region to map
- * @return The base virtual address of the mapped region
- */
- u64 MapAllocate(u8 *cpuPtr, u64 size);
-
- /**
- * @brief Maps a CPU memory region to a fixed region in the virtual address space
- * @param virtAddr The target virtual address of the region
- * @param cpuPtr A pointer to the region to be mapped into the virtual address space
- * @param size The size of the region to map
- * @return The base virtual address of the mapped region
- */
- u64 MapFixed(u64 virtAddr, u8 *cpuPtr, u64 size);
-
- /**
- * @brief Unmaps all chunks in the given region from the virtual address space
- * @return Whether the operation succeeded
- */
- bool Unmap(u64 virtAddr, u64 size);
-
- void Read(u8 *destination, u64 virtAddr, u64 size);
-
- /**
- * @brief Reads in a span from a region of the virtual address space
- */
- template
- void Read(span destination, u64 virtAddr) {
- Read(reinterpret_cast(destination.data()), virtAddr, destination.size_bytes());
- }
-
- /**
- * @brief Reads in an object from a region of the virtual address space
- * @tparam T The type of object to return
- */
- template
- T Read(u64 virtAddr) {
- T obj;
- Read(reinterpret_cast(&obj), virtAddr, sizeof(T));
- return obj;
- }
-
- void Write(u8 *source, u64 virtAddr, u64 size);
-
- /**
- * @brief Writes out a span to a region of the virtual address space
- */
- template
- void Write(span source, u64 virtAddr) {
- Write(reinterpret_cast(source.data()), virtAddr, source.size_bytes());
- }
-
- /**
- * @brief Reads in an object from a region of the virtual address space
- */
- template
- void Write(T source, u64 virtAddr) {
- Write(reinterpret_cast(&source), virtAddr, sizeof(T));
- }
- };
- }
-}
diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp
index 1f75249c..ac3b9164 100644
--- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp
+++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp
@@ -9,7 +9,7 @@ extern skyline::u16 Fps;
extern skyline::u32 FrameTime;
namespace skyline::gpu {
- PresentationEngine::PresentationEngine(const DeviceState &state) : state(state), vsyncEvent(std::make_shared(state, true)), bufferEvent(std::make_shared(state, true)), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
+ PresentationEngine::PresentationEngine(const DeviceState &state) : state(state), vsyncEvent(std::make_shared(state, true)), bufferEvent(std::make_shared(state, true)), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
auto desc{presentationTrack.Serialize()};
desc.set_name("Presentation");
perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc);
diff --git a/app/src/main/cpp/skyline/gpu/syncpoint.h b/app/src/main/cpp/skyline/gpu/syncpoint.h
deleted file mode 100644
index ae5dc8a3..00000000
--- a/app/src/main/cpp/skyline/gpu/syncpoint.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-
-#pragma once
-
-#include
-
-namespace skyline {
- namespace constant {
- constexpr size_t MaxHwSyncpointCount{192}; //!< The maximum number of host1x syncpoints on T210
- }
-
- namespace gpu {
- /**
- * @brief The Syncpoint class represents a single syncpoint in the GPU which is used for GPU -> CPU synchronisation
- */
- class Syncpoint {
- private:
- struct Waiter {
- u32 threshold; //!< The syncpoint value to wait on to be reached
- std::function callback; //!< The callback to do after the wait has ended
- };
-
- std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
- std::map waiterMap;
- u64 nextWaiterId{1};
-
- public:
- std::atomic value{};
-
- /**
- * @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
- * @note The callback will be called immediately if the syncpoint has already reached the given threshold
- * @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
- */
- u64 RegisterWaiter(u32 threshold, const std::function &callback);
-
- /**
- * @brief Removes a waiter given by 'id' from the pending waiter map
- */
- void DeregisterWaiter(u64 id);
-
- /**
- * @brief Increments the syncpoint by 1
- * @return The new value of the syncpoint
- */
- u32 Increment();
-
- /**
- * @brief Waits for the syncpoint to reach given threshold
- * @return false if the timeout was reached, otherwise true
- */
- bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
- };
- }
-}
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_as_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_as_gpu.cpp
index 54172dd2..354e1467 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_as_gpu.cpp
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-#include
+#include
#include
#include "nvmap.h"
#include "nvhost_as_gpu.h"
@@ -36,9 +36,9 @@ namespace skyline::service::nvdrv::device {
u64 size{static_cast(region.pages) * static_cast(region.pageSize)};
if (region.flags.fixed)
- region.offset = state.gpu->memoryManager.ReserveFixed(region.offset, size);
+ region.offset = state.soc->gmmu.ReserveFixed(region.offset, size);
else
- region.offset = state.gpu->memoryManager.ReserveSpace(size, region.align);
+ region.offset = state.soc->gmmu.ReserveSpace(size, region.align);
if (region.offset == 0) {
state.logger->Warn("Failed to allocate GPU address space region!");
@@ -56,7 +56,7 @@ namespace skyline::service::nvdrv::device {
// Non-fixed regions are unmapped so that they can be used by future non-fixed mappings
if (!region.fixed)
- if (!state.gpu->memoryManager.Unmap(offset, region.size))
+ if (!state.soc->gmmu.Unmap(offset, region.size))
state.logger->Warn("Failed to unmap region at 0x{:X}", offset);
regionMap.erase(offset);
@@ -94,7 +94,7 @@ namespace skyline::service::nvdrv::device {
u64 gpuAddress{data.offset + data.bufferOffset};
u8 *cpuPtr{region->second.ptr + data.bufferOffset};
- if (!state.gpu->memoryManager.MapFixed(gpuAddress, cpuPtr, data.mappingSize)) {
+ if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, data.mappingSize)) {
state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress);
return NvStatus::BadParameter;
}
@@ -110,9 +110,9 @@ namespace skyline::service::nvdrv::device {
u64 size{data.mappingSize ? data.mappingSize : mapping->size};
if (data.flags.fixed)
- data.offset = state.gpu->memoryManager.MapFixed(data.offset, cpuPtr, size);
+ data.offset = state.soc->gmmu.MapFixed(data.offset, cpuPtr, size);
else
- data.offset = state.gpu->memoryManager.MapAllocate(cpuPtr, size);
+ data.offset = state.soc->gmmu.MapAllocate(cpuPtr, size);
if (data.offset == 0) {
state.logger->Warn("Failed to map GPU address space region!");
@@ -184,7 +184,7 @@ namespace skyline::service::nvdrv::device {
u8 *cpuPtr{mapping->ptr + (static_cast(entry.mapOffset) << MinAlignmentShift)};
u64 size{static_cast(entry.pages) << MinAlignmentShift};
- state.gpu->memoryManager.MapFixed(virtAddr, cpuPtr, size);
+ state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size);
} catch (const std::out_of_range &) {
state.logger->Warn("Invalid NvMap handle: 0x{:X}", entry.nvmapHandle);
return NvStatus::BadParameter;
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
index 6c602b73..c60dbd64 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+#include
#include
-#include
#include
#include "nvhost_channel.h"
@@ -25,7 +25,7 @@ namespace skyline::service::nvdrv::device {
NvStatus NvHostChannel::SubmitGpfifo(IoctlType type, span buffer, span inlineBuffer) {
struct Data {
- gpu::gpfifo::GpEntry *entries; // In
+ soc::gm20b::GpEntry *entries; // In
u32 numEntries; // In
union {
struct __attribute__((__packed__)) {
@@ -53,9 +53,9 @@ namespace skyline::service::nvdrv::device {
throw exception("Waiting on a fence through SubmitGpfifo is unimplemented");
}
- state.gpu->gpfifo.Push([&]() {
+ state.soc->gm20b.gpfifo.Push([&]() {
if (type == IoctlType::Ioctl2)
- return inlineBuffer.cast();
+ return inlineBuffer.cast();
else
return span(data.entries, data.numEntries);
}());
@@ -110,7 +110,7 @@ namespace skyline::service::nvdrv::device {
u32 _res_[3]; // In
} &data = buffer.as();
- state.gpu->gpfifo.Initialize(data.numEntries);
+ state.soc->gm20b.gpfifo.Initialize(data.numEntries);
auto driver{nvdrv::driver.lock()};
channelFence.UpdateValue(driver->hostSyncpoint);
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.cpp
index 9ad1a5ba..d7780417 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.cpp
@@ -2,7 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2019-2020 Ryujinx Team and Contributors
-#include
+#include
#include
#include
#include "nvhost_ctrl.h"
@@ -46,20 +46,20 @@ namespace skyline::service::nvdrv::device {
state = State::Signalled;
}
- void SyncpointEvent::Cancel(const std::shared_ptr &gpuState) {
+ void SyncpointEvent::Cancel(soc::host1x::Host1X &host1x) {
std::lock_guard lock(mutex);
- gpuState->syncpoints.at(fence.id).DeregisterWaiter(waiterId);
+ host1x.syncpoints.at(fence.id).DeregisterWaiter(waiterId);
Signal();
event->ResetSignal();
}
- void SyncpointEvent::Wait(const std::shared_ptr &gpuState, const Fence &pFence) {
+ void SyncpointEvent::Wait(soc::host1x::Host1X &host1x, const Fence &pFence) {
std::lock_guard lock(mutex);
fence = pFence;
state = State::Waiting;
- waiterId = gpuState->syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
+ waiterId = host1x.syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
}
NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state) {}
@@ -105,7 +105,7 @@ namespace skyline::service::nvdrv::device {
SyncpointEventValue value; // InOut
} &data = buffer.as();
- if (data.fence.id >= constant::MaxHwSyncpointCount)
+ if (data.fence.id >= soc::host1x::SyncpointCount)
return NvStatus::BadValue;
if (data.timeout == 0)
@@ -149,7 +149,7 @@ namespace skyline::service::nvdrv::device {
if (event->state == SyncpointEvent::State::Cancelled || event->state == SyncpointEvent::State::Available || event->state == SyncpointEvent::State::Signalled) {
state.logger->Debug("Waiting on syncpoint event: {} with fence: ({}, {})", eventSlot, data.fence.id, data.fence.value);
- event->Wait(state.gpu, data.fence);
+ event->Wait(state.soc->host1x, data.fence);
data.value.val = 0;
@@ -189,7 +189,7 @@ namespace skyline::service::nvdrv::device {
if (event->state == SyncpointEvent::State::Waiting) {
event->state = SyncpointEvent::State::Cancelling;
state.logger->Debug("Cancelling waiting syncpoint event: {}", eventSlot);
- event->Cancel(state.gpu);
+ event->Cancel(state.soc->host1x);
}
event->state = SyncpointEvent::State::Cancelled;
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.h
index b2b0f682..6ed9e624 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.h
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_ctrl.h
@@ -41,12 +41,12 @@ namespace skyline {
/**
* @brief Removes any wait requests on a syncpoint event and resets its state
*/
- void Cancel(const std::shared_ptr &gpuState);
+ void Cancel(soc::host1x::Host1X &host1x);
/**
* @brief Asynchronously waits on a syncpoint event using the given fence
*/
- void Wait(const std::shared_ptr &gpuState, const Fence &fence);
+ void Wait(soc::host1x::Host1X &host1x, const Fence &fence);
};
/**
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.cpp
index 3e84092d..ee95b4a8 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.cpp
@@ -2,7 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-#include
+#include
#include "nvhost_syncpoint.h"
namespace skyline::service::nvdrv {
@@ -28,7 +28,7 @@ namespace skyline::service::nvdrv {
}
u32 NvHostSyncpoint::FindFreeSyncpoint() {
- for (u32 i{1}; i < constant::MaxHwSyncpointCount; i++)
+ for (u32 i{1}; i < syncpoints.size(); i++)
if (!syncpoints[i].reserved)
return i;
@@ -71,7 +71,7 @@ namespace skyline::service::nvdrv {
if (!syncpoints.at(id).reserved)
throw exception("Cannot update an unreserved syncpoint!");
- syncpoints.at(id).counterMin = state.gpu->syncpoints.at(id).value.load();
+ syncpoints.at(id).counterMin = state.soc->host1x.syncpoints.at(id).value.load();
return syncpoints.at(id).counterMin;
}
}
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.h
index 072dc80f..55297674 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.h
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_syncpoint.h
@@ -3,7 +3,7 @@
#pragma once
-#include
+#include
namespace skyline::service::nvdrv {
/**
@@ -22,7 +22,7 @@ namespace skyline::service::nvdrv {
};
const DeviceState &state;
- std::array syncpoints{};
+ std::array syncpoints{};
std::mutex reservationLock;
/**
diff --git a/app/src/main/cpp/skyline/soc.h b/app/src/main/cpp/skyline/soc.h
new file mode 100644
index 00000000..f2201cf7
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc.h
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include "soc/gmmu.h"
+#include "soc/host1x.h"
+#include "soc/gm20b.h"
+
+namespace skyline::soc {
+ /**
+ * @brief An interface into all emulated components of the Tegra X1 SoC
+ * @note Refer to the Tegra X1 Processor Block Diagram (1.2) for more information
+ */
+ class SOC {
+ public:
+ gmmu::GraphicsMemoryManager gmmu;
+ host1x::Host1X host1x;
+ gm20b::GM20B gm20b;
+
+ SOC(const DeviceState &state) : gmmu(state), gm20b(state) {}
+ };
+}
diff --git a/app/src/main/cpp/skyline/soc/gm20b.h b/app/src/main/cpp/skyline/soc/gm20b.h
new file mode 100644
index 00000000..15389e99
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/gm20b.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include "gm20b/engines/maxwell_3d.h"
+#include "gm20b/gpfifo.h"
+
+namespace skyline::soc::gm20b {
+ /**
+ * @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
+ * @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly
+ */
+ class GM20B {
+ public:
+ engine::Engine fermi2D;
+ engine::maxwell3d::Maxwell3D maxwell3D;
+ engine::Engine maxwellCompute;
+ engine::Engine maxwellDma;
+ engine::Engine keplerMemory;
+ GPFIFO gpfifo;
+
+ GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {}
+ };
+}
diff --git a/app/src/main/cpp/skyline/gpu/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
similarity index 97%
rename from app/src/main/cpp/skyline/gpu/engines/engine.h
rename to app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
index 778bb951..fcf30e18 100644
--- a/app/src/main/cpp/skyline/gpu/engines/engine.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
@@ -7,7 +7,7 @@
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
-namespace skyline::gpu {
+namespace skyline::soc::gm20b {
enum class EngineID {
Fermi2D = 0x902D,
KeplerMemory = 0xA140,
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
new file mode 100644
index 00000000..f78cf8ff
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include "engine.h"
+
+namespace skyline::soc::gm20b::engine {
+ /**
+ * @brief The GPFIFO engine handles managing macros and semaphores
+ * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
+ */
+ class GPFIFO : public Engine {
+ public:
+ static constexpr u32 RegisterCount{0x40}; //!< The number of GPFIFO registers
+
+ private:
+ /**
+ * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
+ */
+#pragma pack(push, 1)
+ union Registers {
+ std::array raw;
+
+ enum class SemaphoreOperation : u8 {
+ Acquire = 1,
+ Release = 2,
+ AcqGeq = 4,
+ AcqAnd = 8,
+ Reduction = 16,
+ };
+
+ enum class SemaphoreAcquireSwitch : u8 {
+ Disabled = 0,
+ Enabled = 1,
+ };
+
+ enum class SemaphoreReleaseWfi : u8 {
+ En = 0,
+ Dis = 1,
+ };
+
+ enum class SemaphoreReleaseSize : u8 {
+ SixteenBytes = 0,
+ FourBytes = 1,
+ };
+
+ enum class SemaphoreReduction : u8 {
+ Min = 0,
+ Max = 1,
+ Xor = 2,
+ And = 3,
+ Or = 4,
+ Add = 5,
+ Inc = 6,
+ Dec = 7,
+ };
+
+ enum class SemaphoreFormat : u8 {
+ Signed = 0,
+ Unsigned = 1,
+ };
+
+ enum class MemOpTlbInvalidatePdb : u8 {
+ One = 0,
+ All = 1,
+ };
+
+ enum class SyncpointOperation : u8 {
+ Wait = 0,
+ Incr = 1,
+ };
+
+ enum class SyncpointWaitSwitch : u8 {
+ Dis = 0,
+ En = 1,
+ };
+
+ enum class WfiScope : u8 {
+ CurrentScgType = 0,
+ All = 1,
+ };
+
+ enum class YieldOp : u8 {
+ Nop = 0,
+ PbdmaTimeslice = 1,
+ RunlistTimeslice = 2,
+ Tsg = 3,
+ };
+
+ struct {
+ struct {
+ u16 nvClass : 16;
+ u8 engine : 5;
+ u16 _pad_ : 11;
+ } setObject;
+
+ u32 illegal;
+ u32 nop;
+ u32 _pad0_;
+
+ struct {
+ struct {
+ u32 offsetUpper : 8;
+ u32 _pad0_ : 24;
+ };
+
+ struct {
+ u8 _pad1_ : 2;
+ u32 offsetLower : 30;
+ };
+
+ u32 payload;
+
+ struct {
+ SemaphoreOperation operation : 5;
+ u8 _pad2_ : 7;
+ SemaphoreAcquireSwitch acquireSwitch : 1;
+ u8 _pad3_ : 7;
+ SemaphoreReleaseWfi releaseWfi : 1;
+ u8 _pad4_ : 3;
+ SemaphoreReleaseSize releaseSize : 1;
+ u8 _pad5_ : 2;
+ SemaphoreReduction reduction : 4;
+ SemaphoreFormat format : 1;
+ };
+ } semaphore;
+
+ u32 nonStallInterrupt;
+ u32 fbFlush;
+ u32 _pad1_[2];
+ u32 memOpC;
+ u32 memOpD;
+ u32 _pad2_[6];
+ u32 setReference;
+ u32 _pad3_[7];
+
+ struct {
+ u32 payload;
+
+ struct {
+ SyncpointOperation operation : 1;
+ u8 _pad0_ : 3;
+ SyncpointWaitSwitch waitSwitch : 1;
+ u8 _pad1_ : 3;
+ u16 index : 12;
+ u16 _pad2_ : 12;
+ };
+ } syncpoint;
+
+ struct {
+ WfiScope scope : 1;
+ u32 _pad_ : 31;
+ } wfi;
+
+ u32 crcCheck;
+
+ struct {
+ YieldOp op : 2;
+ u32 _pad_ : 30;
+ } yield;
+ };
+ } registers{};
+ static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
+#pragma pack(pop)
+
+ public:
+ GPFIFO(const DeviceState &state) : Engine(state) {}
+
+ void CallMethod(MethodParams params) override {
+ state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
+
+ registers.raw[params.method] = params.argument;
+ };
+ };
+}
diff --git a/app/src/main/cpp/skyline/gpu/macro_interpreter.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
similarity index 96%
rename from app/src/main/cpp/skyline/gpu/macro_interpreter.cpp
rename to app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
index e8e2912e..c3706840 100644
--- a/app/src/main/cpp/skyline/gpu/macro_interpreter.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
@@ -1,11 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-#include "engines/maxwell_3d.h"
-#include "memory_manager.h"
-#include "macro_interpreter.h"
+#include
+#include
-namespace skyline::gpu {
+namespace skyline::soc::gm20b::engine::maxwell3d {
void MacroInterpreter::Execute(size_t offset, const std::vector &args) {
// Reset the interpreter state
registers = {};
@@ -28,9 +27,11 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
+
case Opcode::Operation::AddImmediate:
HandleAssignment(opcode->assignmentOperation, opcode->dest, registers[opcode->srcA] + opcode->immediate);
break;
+
case Opcode::Operation::BitfieldReplace: {
u32 src{registers[opcode->srcB]};
u32 dest{registers[opcode->srcA]};
@@ -47,6 +48,7 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, dest);
break;
}
+
case Opcode::Operation::BitfieldExtractShiftLeftImmediate: {
u32 src{registers[opcode->srcB]};
u32 dest{registers[opcode->srcA]};
@@ -56,6 +58,7 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
+
case Opcode::Operation::BitfieldExtractShiftLeftRegister: {
u32 src{registers[opcode->srcB]};
u32 dest{registers[opcode->srcA]};
@@ -65,17 +68,19 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
+
case Opcode::Operation::ReadImmediate: {
u32 result{maxwell3D.registers.raw[registers[opcode->srcA] + opcode->immediate]};
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
+
case Opcode::Operation::Branch: {
if (delayedOpcode != nullptr)
throw exception("Cannot branch while inside a delay slot");
u32 value{registers[opcode->srcA]};
- bool branch{(opcode->branchCondition == Opcode::BranchCondition::Zero) ? (value == 0) : (value != 0)};
+ bool branch{(opcode->branchCondition == Opcode::BranchCondition::Zero) == (value == 0)};
if (branch) {
if (opcode->noDelay) {
@@ -91,6 +96,7 @@ namespace skyline::gpu {
}
break;
}
+
default:
throw exception("Unknown MME opcode encountered: 0x{:X}", static_cast(opcode->operation));
}
@@ -186,15 +192,14 @@ namespace skyline::gpu {
}
}
- FORCE_INLINE void MacroInterpreter::Send(u32 argument) {
- maxwell3D.CallMethod(MethodParams{methodAddress.address, argument, 0, true});
-
+ FORCE_INLINE void MacroInterpreter::Send(u32 pArgument) {
+ maxwell3D.CallMethod(MethodParams{methodAddress.address, pArgument, 0, true});
methodAddress.address += methodAddress.increment;
}
FORCE_INLINE void MacroInterpreter::WriteRegister(u8 reg, u32 value) {
// Register 0 should always be zero so block writes to it
- if (reg == 0)
+ if (reg == 0) [[unlikely]]
return;
registers[reg] = value;
diff --git a/app/src/main/cpp/skyline/gpu/macro_interpreter.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.h
similarity index 82%
rename from app/src/main/cpp/skyline/gpu/macro_interpreter.h
rename to app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.h
index 10207ff8..7c28fc35 100644
--- a/app/src/main/cpp/skyline/gpu/macro_interpreter.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.h
@@ -5,10 +5,8 @@
#include
-namespace skyline::gpu {
- namespace engine {
- class Maxwell3D;
- }
+namespace skyline::soc::gm20b::engine::maxwell3d {
+ class Maxwell3D; // A forward declaration of Maxwell3D as we don't want to import it here
/**
* @brief The MacroInterpreter class handles interpreting macros. Macros are small programs that run on the GPU and are used for things like instanced rendering.
@@ -105,14 +103,13 @@ namespace skyline::gpu {
};
};
- engine::Maxwell3D &maxwell3D;
+ Maxwell3D &maxwell3D; //!< A reference to the parent engine object
- std::array registers{};
-
- Opcode *opcode{};
- const u32 *argument{};
+ Opcode *opcode{}; //!< A pointer to the instruction that is currently being executed
+ std::array registers{}; //!< The state of all the general-purpose registers in the macro interpreter
+ const u32 *argument{}; //!< A pointer to the argument buffer for the program, it is read from sequentially
MethodAddress methodAddress{};
- bool carryFlag{};
+ bool carryFlag{}; //!< A flag representing if an arithmetic operation has set the most significant bit
/**
* @brief Steps forward one macro instruction, including delay slots
@@ -135,10 +132,13 @@ namespace skyline::gpu {
*/
void Send(u32 argument);
+ /**
+ * @brief Writes to the specified register with sanity checking
+ */
void WriteRegister(u8 reg, u32 value);
public:
- MacroInterpreter(engine::Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
+ MacroInterpreter(Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
/**
* @brief Executes a GPU macro from macro memory with the given arguments
diff --git a/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
similarity index 91%
rename from app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp
rename to app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
index ec25bff5..bbda7e43 100644
--- a/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
@@ -1,10 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
-#include
-#include "maxwell_3d.h"
+#include
-namespace skyline::gpu::engine {
+namespace skyline::soc::gm20b::engine::maxwell3d {
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) {
ResetRegs();
}
@@ -77,9 +76,9 @@ namespace skyline::gpu::engine {
state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", params.method, params.argument);
// Methods that are greater than the register size are for macro control
- if (params.method > constant::Maxwell3DRegisterCounter) {
+ if (params.method > RegisterCount) {
if (!(params.method & 1))
- macroInvocation.index = ((params.method - constant::Maxwell3DRegisterCounter) >> 1) % macroPositions.size();
+ macroInvocation.index = ((params.method - RegisterCount) >> 1) % macroPositions.size();
macroInvocation.arguments.push_back(params.argument);
@@ -100,6 +99,8 @@ namespace skyline::gpu::engine {
else if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodReplay)
params.argument = shadowRegisters.raw[params.method];
+ #define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field)
+
switch (params.method) {
case MAXWELL3D_OFFSET(mme.instructionRamLoad):
if (registers.mme.instructionRamPointer >= macroCode.size())
@@ -118,7 +119,7 @@ namespace skyline::gpu::engine {
break;
case MAXWELL3D_OFFSET(syncpointAction):
state.logger->Debug("Increment syncpoint: {}", static_cast(registers.syncpointAction.id));
- state.gpu->syncpoints.at(registers.syncpointAction.id).Increment();
+ state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment();
break;
case MAXWELL3D_OFFSET(semaphore.info):
switch (registers.semaphore.info.op) {
@@ -137,6 +138,8 @@ namespace skyline::gpu::engine {
registers.raw[0xD00] = 1;
break;
}
+
+ #undef MAXWELL3D_OFFSET
}
void Maxwell3D::HandleSemaphoreCounterOperation() {
@@ -158,7 +161,7 @@ namespace skyline::gpu::engine {
switch (registers.semaphore.info.structureSize) {
case Registers::SemaphoreInfo::StructureSize::OneWord:
- state.gpu->memoryManager.Write(static_cast(result), registers.semaphore.address.Pack());
+ state.soc->gmmu.Write(static_cast(result), registers.semaphore.address.Pack());
break;
case Registers::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks
@@ -168,7 +171,7 @@ namespace skyline::gpu::engine {
u64 nsTime{util::GetTimeNs()};
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
- state.gpu->memoryManager.Write(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
+ state.soc->gmmu.Write(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
break;
}
}
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h
new file mode 100644
index 00000000..8b1b96f1
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include "engine.h"
+#include "maxwell/macro_interpreter.h"
+
+namespace skyline::soc::gm20b::engine::maxwell3d {
+ /**
+ * @brief The Maxwell 3D engine handles processing 3D graphics
+ */
+ class Maxwell3D : public Engine {
+ private:
+ std::array macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
+
+ struct {
+ u32 index;
+ std::vector arguments;
+ } macroInvocation{}; //!< Data for a macro that is pending execution
+
+ MacroInterpreter macroInterpreter;
+
+ void HandleSemaphoreCounterOperation();
+
+ void WriteSemaphoreResult(u64 result);
+
+ public:
+ static constexpr u32 RegisterCount{0xE00}; //!< The number of Maxwell 3D registers
+
+ /**
+ * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
+ */
+#pragma pack(push, 1)
+ union Registers {
+ std::array raw;
+
+ struct Address {
+ u32 high;
+ u32 low;
+
+ u64 Pack() {
+ return (static_cast(high) << 32) | low;
+ }
+ };
+ static_assert(sizeof(Address) == sizeof(u64));
+
+ enum class MmeShadowRamControl : u32 {
+ MethodTrack = 0,
+ MethodTrackWithFilter = 1,
+ MethodPassthrough = 2,
+ MethodReplay = 3,
+ };
+
+ struct ViewportTransform {
+ enum class Swizzle : u8 {
+ PositiveX = 0,
+ NegativeX = 1,
+ PositiveY = 2,
+ NegativeY = 3,
+ PositiveZ = 4,
+ NegativeZ = 5,
+ PositiveW = 6,
+ NegativeW = 7,
+ };
+
+ float scaleX;
+ float scaleY;
+ float scaleZ;
+ float translateX;
+ float translateY;
+ float translateZ;
+
+ struct {
+ Swizzle x : 3;
+ u8 _pad0_ : 1;
+ Swizzle y : 3;
+ u8 _pad1_ : 1;
+ Swizzle z : 3;
+ u8 _pad2_ : 1;
+ Swizzle w : 3;
+ u32 _pad3_ : 17;
+ } swizzles;
+
+ struct {
+ u8 x : 5;
+ u8 _pad0_ : 3;
+ u8 y : 5;
+ u32 _pad1_ : 19;
+ } subpixelPrecisionBias;
+ };
+ static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
+
+ struct Viewport {
+ struct {
+ u16 x;
+ u16 width;
+ };
+
+ struct {
+ u16 y;
+ u16 height;
+ };
+
+ float depthRangeNear;
+ float depthRangeFar;
+ };
+ static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
+
+ enum class PolygonMode : u32 {
+ Point = 0x1B00,
+ Line = 0x1B01,
+ Fill = 0x1B02,
+ };
+
+ union VertexAttribute {
+ u32 raw;
+
+ enum class Size : u8 {
+ Size_1x32 = 0x12,
+ Size_2x32 = 0x04,
+ Size_3x32 = 0x02,
+ Size_4x32 = 0x01,
+ Size_1x16 = 0x1B,
+ Size_2x16 = 0x0F,
+ Size_3x16 = 0x05,
+ Size_4x16 = 0x03,
+ Size_1x8 = 0x1D,
+ Size_2x8 = 0x18,
+ Size_3x8 = 0x13,
+ Size_4x8 = 0x0A,
+ Size_10_10_10_2 = 0x30,
+ Size_11_11_10 = 0x31,
+ };
+
+ enum class Type : u8 {
+ None = 0,
+ SNorm = 1,
+ UNorm = 2,
+ SInt = 3,
+ UInt = 4,
+ UScaled = 5,
+ SScaled = 6,
+ Float = 7,
+ };
+
+ struct {
+ u8 bufferId : 5;
+ u8 _pad0_ : 1;
+ bool fixed : 1;
+ u16 offset : 14;
+ Size size : 6;
+ Type type : 3;
+ u8 _pad1_ : 1;
+ bool bgra : 1;
+ };
+ };
+ static_assert(sizeof(VertexAttribute) == sizeof(u32));
+
+ enum class CompareOp : u32 {
+ Never = 1,
+ Less = 2,
+ Equal = 3,
+ LessOrEqual = 4,
+ Greater = 5,
+ NotEqual = 6,
+ GreaterOrEqual = 7,
+ Always = 8,
+
+ NeverGL = 0x200,
+ LessGL = 0x201,
+ EqualGL = 0x202,
+ LessOrEqualGL = 0x203,
+ GreaterGL = 0x204,
+ NotEqualGL = 0x205,
+ GreaterOrEqualGL = 0x206,
+ AlwaysGL = 0x207,
+ };
+
+ struct Blend {
+ enum class Op : u32 {
+ Add = 1,
+ Subtract = 2,
+ ReverseSubtract = 3,
+ Minimum = 4,
+ Maximum = 5,
+
+ AddGL = 0x8006,
+ SubtractGL = 0x8007,
+ ReverseSubtractGL = 0x8008,
+ MinimumGL = 0x800A,
+ MaximumGL = 0x800B,
+ };
+
+ enum class Factor : u32 {
+ Zero = 0x1,
+ One = 0x2,
+ SourceColor = 0x3,
+ OneMinusSourceColor = 0x4,
+ SourceAlpha = 0x5,
+ OneMinusSourceAlpha = 0x6,
+ DestAlpha = 0x7,
+ OneMinusDestAlpha = 0x8,
+ DestColor = 0x9,
+ OneMinusDestColor = 0xA,
+ SourceAlphaSaturate = 0xB,
+ Source1Color = 0x10,
+ OneMinusSource1Color = 0x11,
+ Source1Alpha = 0x12,
+ OneMinusSource1Alpha = 0x13,
+ ConstantColor = 0x61,
+ OneMinusConstantColor = 0x62,
+ ConstantAlpha = 0x63,
+ OneMinusConstantAlpha = 0x64,
+
+ ZeroGL = 0x4000,
+ OneGL = 0x4001,
+ SourceColorGL = 0x4300,
+ OneMinusSourceColorGL = 0x4301,
+ SourceAlphaGL = 0x4302,
+ OneMinusSourceAlphaGL = 0x4303,
+ DestAlphaGL = 0x4304,
+ OneMinusDestAlphaGL = 0x4305,
+ DestColorGL = 0x4306,
+ OneMinusDestColorGL = 0x4307,
+ SourceAlphaSaturateGL = 0x4308,
+ ConstantColorGL = 0xC001,
+ OneMinusConstantColorGL = 0xC002,
+ ConstantAlphaGL = 0xC003,
+ OneMinusConstantAlphaGL = 0xC004,
+ Source1ColorGL = 0xC900,
+ OneMinusSource1ColorGL = 0xC901,
+ Source1AlphaGL = 0xC902,
+ OneMinusSource1AlphaGL = 0xC903,
+ };
+
+ struct {
+ u32 seperateAlpha;
+ Op colorOp;
+ Factor colorSrcFactor;
+ Factor colorDestFactor;
+ Op alphaOp;
+ Factor alphaSrcFactor;
+ Factor alphaDestFactor;
+ u32 _pad_;
+ };
+ };
+ static_assert(sizeof(Blend) == (sizeof(u32) * 8));
+
+ enum class StencilOp : u32 {
+ Keep = 1,
+ Zero = 2,
+ Replace = 3,
+ IncrementAndClamp = 4,
+ DecrementAndClamp = 5,
+ Invert = 6,
+ IncrementAndWrap = 7,
+ DecrementAndWrap = 8,
+ };
+
+ enum class FrontFace : u32 {
+ Clockwise = 0x900,
+ CounterClockwise = 0x901,
+ };
+
+ enum class CullFace : u32 {
+ Front = 0x404,
+ Back = 0x405,
+ FrontAndBack = 0x408,
+ };
+
+ union ColorWriteMask {
+ u32 raw;
+
+ struct {
+ u8 r : 4;
+ u8 g : 4;
+ u8 b : 4;
+ u8 a : 4;
+ };
+ };
+ static_assert(sizeof(ColorWriteMask) == sizeof(u32));
+
+ struct SemaphoreInfo {
+ enum class Op : u8 {
+ Release = 0,
+ Acquire = 1,
+ Counter = 2,
+ Trap = 3,
+ };
+
+ enum class ReductionOp : u8 {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ };
+
+ enum class Unit : u8 {
+ VFetch = 1,
+ VP = 2,
+ Rast = 4,
+ StrmOut = 5,
+ GP = 6,
+ ZCull = 7,
+ Prop = 10,
+ Crop = 15,
+ };
+
+ enum class SyncCondition : u8 {
+ NotEqual = 0,
+ GreaterThan = 1,
+ };
+
+ enum class Format : u8 {
+ U32 = 0,
+ I32 = 1,
+ };
+
+ enum class CounterType : u8 {
+ Zero = 0x0,
+ InputVertices = 0x1,
+ InputPrimitives = 0x3,
+ VertexShaderInvocations = 0x5,
+ GeometryShaderInvocations = 0x7,
+ GeometryShaderPrimitives = 0x9,
+ ZcullStats0 = 0xA,
+ TransformFeedbackPrimitivesWritten = 0xB,
+ ZcullStats1 = 0xC,
+ ZcullStats2 = 0xE,
+ ClipperInputPrimitives = 0xF,
+ ZcullStats3 = 0x10,
+ ClipperOutputPrimitives = 0x11,
+ PrimitivesGenerated = 0x12,
+ FragmentShaderInvocations = 0x13,
+ SamplesPassed = 0x15,
+ TransformFeedbackOffset = 0x1A,
+ TessControlShaderInvocations = 0x1B,
+ TessEvaluationShaderInvocations = 0x1D,
+ TessEvaluationShaderPrimitives = 0x1F,
+ };
+
+ enum class StructureSize : u8 {
+ FourWords = 0,
+ OneWord = 1,
+ };
+
+ Op op : 2;
+ bool flushDisable : 1;
+ bool reductionEnable : 1;
+ bool fenceEnable : 1;
+ u8 _pad0_ : 4;
+ ReductionOp reductionOp : 3;
+ Unit unit : 4;
+ SyncCondition syncCondition : 1;
+ Format format : 2;
+ u8 _pad1_ : 1;
+ bool awakenEnable : 1;
+ u8 _pad2_ : 2;
+ CounterType counterType : 5;
+ StructureSize structureSize : 1;
+ };
+ static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
+
+ enum class CoordOrigin : u8 {
+ LowerLeft = 0,
+ UpperLeft = 1,
+ };
+
+ struct {
+ u32 _pad0_[0x40]; // 0x0
+ u32 noOperation; // 0x40
+ u32 _pad1_[0x3]; // 0x41
+ u32 waitForIdle; // 0x44
+
+ struct {
+ u32 instructionRamPointer; // 0x45
+ u32 instructionRamLoad; // 0x46
+ u32 startAddressRamPointer; // 0x47
+ u32 startAddressRamLoad; // 0x48
+ MmeShadowRamControl shadowRamControl; // 0x49
+ } mme;
+
+ u32 _pad2_[0x68]; // 0x4A
+
+ struct {
+ u16 id : 12;
+ u8 _pad0_ : 4;
+ bool flushCache : 1;
+ u8 _pad1_ : 3;
+ bool increment : 1;
+ u16 _pad2_ : 11;
+ } syncpointAction; // 0xB2
+
+ u32 _pad3_[0x2C]; // 0xB3
+ u32 rasterizerEnable; // 0xDF
+ u32 _pad4_[0x1A0]; // 0xE0
+ std::array viewportTransform; // 0x280
+ std::array viewport; // 0x300
+ u32 _pad5_[0x2B]; // 0x340
+
+ struct {
+ PolygonMode front; // 0x36B
+ PolygonMode back; // 0x36C
+ } polygonMode;
+
+ u32 _pad6_[0x68]; // 0x36D
+
+ struct {
+ u32 compareRef; // 0x3D5
+ u32 writeMask; // 0x3D6
+ u32 compareMask; // 0x3D7
+ } stencilBackExtra;
+
+ u32 _pad7_[0x13]; // 0x3D8
+ u32 rtSeparateFragData; // 0x3EB
+ u32 _pad8_[0x6C]; // 0x3EC
+ std::array vertexAttributeState; // 0x458
+ u32 _pad9_[0x4B]; // 0x478
+ CompareOp depthTestFunc; // 0x4C3
+ float alphaTestRef; // 0x4C4
+ CompareOp alphaTestFunc; // 0x4C5
+ u32 drawTFBStride; // 0x4C6
+
+ struct {
+ float r; // 0x4C7
+ float g; // 0x4C8
+ float b; // 0x4C9
+ float a; // 0x4CA
+ } blendConstant;
+
+ u32 _pad10_[0x4]; // 0x4CB
+
+ struct {
+ u32 seperateAlpha; // 0x4CF
+ Blend::Op colorOp; // 0x4D0
+ Blend::Factor colorSrcFactor; // 0x4D1
+ Blend::Factor colorDestFactor; // 0x4D2
+ Blend::Op alphaOp; // 0x4D3
+ Blend::Factor alphaSrcFactor; // 0x4D4
+ u32 _pad_; // 0x4D5
+ Blend::Factor alphaDestFactor; // 0x4D6
+
+ u32 enableCommon; // 0x4D7
+ std::array enable; // 0x4D8 For each render target
+ } blend;
+
+ u32 stencilEnable; // 0x4E0
+
+ struct {
+ StencilOp failOp; // 0x4E1
+ StencilOp zFailOp; // 0x4E2
+ StencilOp zPassOp; // 0x4E3
+
+ struct {
+ CompareOp op; // 0x4E4
+ i32 ref; // 0x4E5
+ u32 mask; // 0x4E6
+ } compare;
+
+ u32 writeMask; // 0x4E7
+ } stencilFront;
+
+ u32 _pad11_[0x4]; // 0x4E8
+ float lineWidthSmooth; // 0x4EC
+ float lineWidthAliased; // 0x4D
+ u32 _pad12_[0x1F]; // 0x4EE
+ u32 drawBaseVertex; // 0x50D
+ u32 drawBaseInstance; // 0x50E
+ u32 _pad13_[0x35]; // 0x50F
+ u32 clipDistanceEnable; // 0x544
+ u32 sampleCounterEnable; // 0x545
+ float pointSpriteSize; // 0x546
+ u32 zCullStatCountersEnable; // 0x547
+ u32 pointSpriteEnable; // 0x548
+ u32 _pad14_; // 0x549
+ u32 shaderExceptions; // 0x54A
+ u32 _pad15_[0x2]; // 0x54B
+ u32 multisampleEnable; // 0x54D
+ u32 depthTargetEnable; // 0x54E
+
+ struct {
+ bool alphaToCoverage : 1;
+ u8 _pad0_ : 3;
+ bool alphaToOne : 1;
+ u32 _pad1_ : 27;
+ } multisampleControl; // 0x54F
+
+ u32 _pad16_[0x7]; // 0x550
+
+ struct {
+ Address address; // 0x557
+ u32 maximumIndex; // 0x559
+ } texSamplerPool;
+
+ u32 _pad17_; // 0x55A
+ u32 polygonOffsetFactor; // 0x55B
+ u32 lineSmoothEnable; // 0x55C
+
+ struct {
+ Address address; // 0x55D
+ u32 maximumIndex; // 0x55F
+ } texHeaderPool;
+
+ u32 _pad18_[0x5]; // 0x560
+
+ u32 stencilTwoSideEnable; // 0x565
+
+ struct {
+ StencilOp failOp; // 0x566
+ StencilOp zFailOp; // 0x567
+ StencilOp zPassOp; // 0x568
+ CompareOp compareOp; // 0x569
+ } stencilBack;
+
+ u32 _pad19_[0x17]; // 0x56A
+
+ struct {
+ u8 _unk_ : 2;
+ CoordOrigin origin : 1;
+ u16 enable : 10;
+ u32 _pad_ : 19;
+ } pointCoordReplace; // 0x581
+
+ u32 _pad20_[0xC4]; // 0x582
+ u32 cullFaceEnable; // 0x646
+ FrontFace frontFace; // 0x647
+ CullFace cullFace; // 0x648
+ u32 pixelCentreImage; // 0x649
+ u32 _pad21_; // 0x64A
+ u32 viewportTransformEnable; // 0x64B
+ u32 _pad22_[0x34]; // 0x64A
+ std::array colorMask; // 0x680 For each render target
+ u32 _pad23_[0x38]; // 0x688
+
+ struct {
+ Address address; // 0x6C0
+ u32 payload; // 0x6C2
+ SemaphoreInfo info; // 0x6C3
+ } semaphore;
+
+ u32 _pad24_[0xBC]; // 0x6C4
+ std::array independentBlend; // 0x780 For each render target
+ u32 _pad25_[0x100]; // 0x7C0
+ u32 firmwareCall[0x20]; // 0x8C0
+ };
+ };
+ static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
+#pragma pack(pop)
+
+ Registers registers{};
+ Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
+
+ std::array macroCode{}; //!< This stores GPU macros, the 256kb size is from Ryujinx
+
+ Maxwell3D(const DeviceState &state);
+
+ /**
+ * @brief Resets the Maxwell 3D registers to their default values
+ */
+ void ResetRegs();
+
+ void CallMethod(MethodParams params) override;
+ };
+}
diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
similarity index 89%
rename from app/src/main/cpp/skyline/gpu/gpfifo.cpp
rename to app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
index bd5e3afe..1b3a41c9 100644
--- a/app/src/main/cpp/skyline/gpu/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@@ -4,30 +4,28 @@
#include
#include
#include
-#include
-#include
-#include "gpfifo.h"
+#include
-namespace skyline::gpu::gpfifo {
+namespace skyline::soc::gm20b {
void GPFIFO::Send(MethodParams params) {
state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
if (params.method == 0) {
switch (static_cast(params.argument)) {
case EngineID::Fermi2D:
- subchannels.at(params.subChannel) = state.gpu->fermi2D;
+ subchannels.at(params.subChannel) = &state.soc->gm20b.fermi2D;
break;
case EngineID::KeplerMemory:
- subchannels.at(params.subChannel) = state.gpu->keplerMemory;
+ subchannels.at(params.subChannel) = &state.soc->gm20b.keplerMemory;
break;
case EngineID::Maxwell3D:
- subchannels.at(params.subChannel) = state.gpu->maxwell3D;
+ subchannels.at(params.subChannel) = &state.soc->gm20b.maxwell3D;
break;
case EngineID::MaxwellCompute:
- subchannels.at(params.subChannel) = state.gpu->maxwellCompute;
+ subchannels.at(params.subChannel) = &state.soc->gm20b.maxwellCompute;
break;
case EngineID::MaxwellDma:
- subchannels.at(params.subChannel) = state.gpu->maxwellDma;
+ subchannels.at(params.subChannel) = &state.soc->gm20b.maxwellDma;
break;
default:
throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel);
@@ -35,7 +33,7 @@ namespace skyline::gpu::gpfifo {
state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel);
return;
- } else if (params.method < constant::GpfifoRegisterCount) {
+ } else if (params.method < engine::GPFIFO::RegisterCount) {
gpfifoEngine.CallMethod(params);
} else {
if (subchannels.at(params.subChannel) == nullptr)
@@ -58,7 +56,7 @@ namespace skyline::gpu::gpfifo {
}
pushBufferData.resize(gpEntry.size);
- state.gpu->memoryManager.Read(pushBufferData, gpEntry.Address());
+ state.soc->gmmu.Read(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it
@@ -66,28 +64,29 @@ namespace skyline::gpu::gpfifo {
continue;
PushBufferMethodHeader methodHeader{.raw = *entry};
-
switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod:
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{static_cast(methodHeader.methodAddress + i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
-
break;
+
case PushBufferMethodHeader::SecOp::NonIncMethod:
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
-
break;
+
case PushBufferMethodHeader::SecOp::OneInc:
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{static_cast(methodHeader.methodAddress + static_cast(i)), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
-
break;
+
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(MethodParams{methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true});
break;
+
case PushBufferMethodHeader::SecOp::EndPbSegment:
return;
+
default:
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast(methodHeader.secOp));
break;
diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
new file mode 100644
index 00000000..2e601753
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include
+#include "engines/gpfifo.h"
+
+namespace skyline::soc::gm20b {
+ /**
+ * @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
+ * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
+ * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
+ */
+ struct GpEntry {
+ enum class Fetch : u8 {
+ Unconditional = 0,
+ Conditional = 1,
+ };
+
+ union {
+ u32 entry0;
+
+ struct {
+ Fetch fetch : 1;
+ u8 _pad_ : 1;
+ u32 get : 30;
+ };
+ };
+
+ enum class Opcode : u8 {
+ Nop = 0,
+ Illegal = 1,
+ Crc = 2,
+ PbCrc = 3,
+ };
+
+ enum class Priv : u8 {
+ User = 0,
+ Kernel = 1,
+ };
+
+ enum class Level : u8 {
+ Main = 0,
+ Subroutine = 1,
+ };
+
+ enum class Sync : u8 {
+ Proceed = 0,
+ Wait = 1,
+ };
+
+ union {
+ u32 entry1;
+
+ struct {
+ union {
+ u8 getHi;
+ Opcode opcode;
+ };
+
+ Priv priv : 1;
+ Level level : 1;
+ u32 size : 21;
+ Sync sync : 1;
+ };
+ };
+
+ constexpr u64 Address() const {
+ return (static_cast(getHi) << 32) | (static_cast(get) << 2);
+ }
+ };
+ static_assert(sizeof(GpEntry) == sizeof(u64));
+
+ /**
+ * @brief A single pushbuffer method header that describes a compressed method sequence
+ * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
+ * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
+ */
+ union PushBufferMethodHeader {
+ u32 raw;
+
+ enum class TertOp : u8 {
+ Grp0IncMethod = 0,
+ Grp0SetSubDevMask = 1,
+ Grp0StoreSubDevMask = 2,
+ Grp0UseSubDevMask = 3,
+ Grp2NonIncMethod = 0,
+ };
+
+ enum class SecOp : u8 {
+ Grp0UseTert = 0,
+ IncMethod = 1,
+ Grp2UseTert = 2,
+ NonIncMethod = 3,
+ ImmdDataMethod = 4,
+ OneInc = 5,
+ Reserved6 = 6,
+ EndPbSegment = 7,
+ };
+
+ u16 methodAddress : 12;
+ struct {
+ u8 _pad0_ : 4;
+ u16 subDeviceMask : 12;
+ };
+
+ struct {
+ u16 _pad1_ : 13;
+ u8 methodSubChannel : 3;
+ union {
+ TertOp tertOp : 3;
+ u16 methodCount : 13;
+ u16 immdData : 13;
+ };
+ };
+
+ struct {
+ u32 _pad2_ : 29;
+ SecOp secOp : 3;
+ };
+ };
+ static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
+
+ /**
+ * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
+ * @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries
+ * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
+ */
+ class GPFIFO {
+ const DeviceState &state;
+ engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
+ std::array subchannels;
+ std::optional> pushBuffers;
+ std::thread thread; //!< The thread that manages processing of pushbuffers
+ std::vector pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
+
+ /**
+ * @brief Sends a method call to the GPU hardware
+ */
+ void Send(MethodParams params);
+
+ /**
+ * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
+ */
+ void Process(GpEntry gpEntry);
+
+ public:
+ GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
+
+ ~GPFIFO();
+
+ /**
+ * @param numBuffers The amount of push-buffers to allocate in the circular buffer
+ */
+ void Initialize(size_t numBuffers);
+
+ /**
+ * @brief Executes all pending entries in the FIFO
+ */
+ void Run();
+
+ /**
+ * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
+ */
+ void Push(span entries);
+ };
+}
diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.cpp b/app/src/main/cpp/skyline/soc/gmmu.cpp
similarity index 59%
rename from app/src/main/cpp/skyline/gpu/memory_manager.cpp
rename to app/src/main/cpp/skyline/soc/gmmu.cpp
index d0c960a0..99e5e674 100644
--- a/app/src/main/cpp/skyline/gpu/memory_manager.cpp
+++ b/app/src/main/cpp/skyline/soc/gmmu.cpp
@@ -2,10 +2,12 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include
-#include "memory_manager.h"
+#include "gmmu.h"
-namespace skyline::gpu::vmm {
- MemoryManager::MemoryManager(const DeviceState &state) : state(state) {
+namespace skyline::soc::gmmu {
+ constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
+
+ GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) {
constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space
constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero
@@ -14,9 +16,9 @@ namespace skyline::gpu::vmm {
chunks.push_back(baseChunk);
}
- std::optional MemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
+ std::optional GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool {
- return (alignment ? util::IsAligned(chunk.virtAddr, alignment) : true) && chunk.size > size && chunk.state == desiredState;
+ return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState;
})};
if (chunk != chunks.end())
@@ -25,12 +27,12 @@ namespace skyline::gpu::vmm {
return std::nullopt;
}
- u64 MemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
+ u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
auto chunkEnd{chunks.end()};
for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) {
if (chunk->CanContain(newChunk)) {
auto oldChunk{*chunk};
- u64 newSize{newChunk.virtAddr - chunk->virtAddr};
+ u64 newSize{newChunk.virtualAddress - chunk->virtualAddress};
u64 extension{chunk->size - newSize - newChunk.size};
if (newSize == 0) {
@@ -41,16 +43,16 @@ namespace skyline::gpu::vmm {
}
if (extension)
- chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtAddr + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
+ chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
- return newChunk.virtAddr;
- } else if (chunk->virtAddr + chunk->size > newChunk.virtAddr) {
- chunk->size = newChunk.virtAddr - chunk->virtAddr;
+ return newChunk.virtualAddress;
+ } else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) {
+ chunk->size = newChunk.virtualAddress - chunk->virtualAddress;
// Deletes all chunks that are within the chunk being inserted and split the final one
auto tailChunk{std::next(chunk)};
while (tailChunk != chunkEnd) {
- if (tailChunk->virtAddr + tailChunk->size >= newChunk.virtAddr + newChunk.size)
+ if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size)
break;
tailChunk = chunks.erase(tailChunk);
@@ -61,8 +63,8 @@ namespace skyline::gpu::vmm {
if (tailChunk == chunkEnd)
break;
- u64 chunkSliceOffset{newChunk.virtAddr + newChunk.size - tailChunk->virtAddr};
- tailChunk->virtAddr += chunkSliceOffset;
+ u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress};
+ tailChunk->virtualAddress += chunkSliceOffset;
tailChunk->size -= chunkSliceOffset;
if (tailChunk->state == ChunkState::Mapped)
tailChunk->cpuPtr += chunkSliceOffset;
@@ -74,19 +76,19 @@ namespace skyline::gpu::vmm {
else
chunks.insert(std::next(headChunk), newChunk);
- return newChunk.virtAddr;
+ return newChunk.virtualAddress;
}
}
throw exception("Failed to insert chunk into GPU address space!");
}
- u64 MemoryManager::ReserveSpace(u64 size, u64 alignment) {
- size = util::AlignUp(size, constant::GpuPageSize);
+ u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) {
+ size = util::AlignUp(size, GpuPageSize);
- std::unique_lock lock(vmmMutex);
+ std::unique_lock lock(mutex);
auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)};
- if (!newChunk)
+ if (!newChunk) [[unlikely]]
return 0;
auto chunk{*newChunk};
@@ -96,22 +98,22 @@ namespace skyline::gpu::vmm {
return InsertChunk(chunk);
}
- u64 MemoryManager::ReserveFixed(u64 virtAddr, u64 size) {
- if (!util::IsAligned(virtAddr, constant::GpuPageSize))
+ u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) {
+ if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
- size = util::AlignUp(size, constant::GpuPageSize);
+ size = util::AlignUp(size, GpuPageSize);
- std::unique_lock lock(vmmMutex);
- return InsertChunk(ChunkDescriptor(virtAddr, size, nullptr, ChunkState::Reserved));
+ std::unique_lock lock(mutex);
+ return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved));
}
- u64 MemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
- size = util::AlignUp(size, constant::GpuPageSize);
+ u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
+ size = util::AlignUp(size, GpuPageSize);
- std::unique_lock lock(vmmMutex);
+ std::unique_lock lock(mutex);
auto mappedChunk{FindChunk(ChunkState::Unmapped, size)};
- if (!mappedChunk)
+ if (!mappedChunk) [[unlikely]]
return 0;
auto chunk{*mappedChunk};
@@ -122,23 +124,23 @@ namespace skyline::gpu::vmm {
return InsertChunk(chunk);
}
- u64 MemoryManager::MapFixed(u64 virtAddr, u8 *cpuPtr, u64 size) {
- if (!util::IsAligned(virtAddr, constant::GpuPageSize))
+ u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) {
+ if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
- size = util::AlignUp(size, constant::GpuPageSize);
+ size = util::AlignUp(size, GpuPageSize);
- std::unique_lock lock(vmmMutex);
- return InsertChunk(ChunkDescriptor(virtAddr, size, cpuPtr, ChunkState::Mapped));
+ std::unique_lock lock(mutex);
+ return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped));
}
- bool MemoryManager::Unmap(u64 virtAddr, u64 size) {
- if (!util::IsAligned(virtAddr, constant::GpuPageSize))
+ bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) {
+ if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return false;
try {
- std::unique_lock lock(vmmMutex);
- InsertChunk(ChunkDescriptor(virtAddr, size, nullptr, ChunkState::Unmapped));
+ std::unique_lock lock(mutex);
+ InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped));
} catch (const std::exception &e) {
return false;
}
@@ -146,20 +148,20 @@ namespace skyline::gpu::vmm {
return true;
}
- void MemoryManager::Read(u8 *destination, u64 virtAddr, u64 size) {
- std::shared_lock lock(vmmMutex);
+ void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) {
+ std::shared_lock lock(mutex);
- auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtAddr, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
- return address < chunk.virtAddr;
+ auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
+ return address < chunk.virtualAddress;
})};
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
- throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
+ throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
chunk--;
u64 initialSize{size};
- u64 chunkOffset{virtAddr - chunk->virtAddr};
+ u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *source{chunk->cpuPtr + chunkOffset};
u64 sourceSize{std::min(chunk->size - chunkOffset, size)};
@@ -170,7 +172,7 @@ namespace skyline::gpu::vmm {
size -= sourceSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
- throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
+ throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
source = chunk->cpuPtr;
sourceSize = std::min(chunk->size, size);
@@ -178,20 +180,20 @@ namespace skyline::gpu::vmm {
}
}
- void MemoryManager::Write(u8 *source, u64 virtAddr, u64 size) {
- std::shared_lock lock(vmmMutex);
+ void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) {
+ std::shared_lock lock(mutex);
- auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtAddr, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
- return address < chunk.virtAddr;
+ auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
+ return address < chunk.virtualAddress;
})};
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
- throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
+ throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
chunk--;
u64 initialSize{size};
- u64 chunkOffset{virtAddr - chunk->virtAddr};
+ u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *destination{chunk->cpuPtr + chunkOffset};
u64 destinationSize{std::min(chunk->size - chunkOffset, size)};
@@ -202,7 +204,7 @@ namespace skyline::gpu::vmm {
size -= destinationSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
- throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
+ throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
destination = chunk->cpuPtr;
destinationSize = std::min(chunk->size, size);
diff --git a/app/src/main/cpp/skyline/soc/gmmu.h b/app/src/main/cpp/skyline/soc/gmmu.h
new file mode 100644
index 00000000..1d82a211
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/gmmu.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include
+
+namespace skyline::soc::gmmu {
+ enum class ChunkState {
+ Unmapped, //!< The chunk is unmapped
+ Reserved, //!< The chunk is reserved
+ Mapped //!< The chunk is mapped and a CPU side address is present
+ };
+
+ struct ChunkDescriptor {
+ u64 virtualAddress; //!< The address of the chunk in the virtual address space
+ u64 size; //!< The size of the chunk in bytes
+ u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
+ ChunkState state;
+
+ ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {}
+
+ /**
+ * @return If the given chunk can be contained wholly within this chunk
+ */
+ inline bool CanContain(const ChunkDescriptor &chunk) {
+ return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress));
+ }
+ };
+
+ /**
+ * @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
+ * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment
+ */
+ class GraphicsMemoryManager {
+ private:
+ const DeviceState &state;
+ std::vector chunks;
+ std::shared_mutex mutex;
+
+ /**
+ * @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
+ * @note vmmMutex MUST be locked when calling this
+ * @param desiredState The state of the chunk to find
+ * @param size The minimum size of the chunk to find
+ * @param alignment The minimum alignment of the chunk to find
+ * @return The first applicable chunk
+ */
+ std::optional FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
+
+ /**
+ * @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
+ * @note vmmMutex MUST be locked when calling this
+ * @param newChunk The chunk to insert
+ * @return The base virtual address of the inserted chunk
+ */
+ u64 InsertChunk(const ChunkDescriptor &newChunk);
+
+ public:
+ GraphicsMemoryManager(const DeviceState &state);
+
+ /**
+ * @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
+ * @param size The size of the region to reserve
+ * @param alignment The alignment of the region to reserve
+ * @return The base virtual address of the reserved region
+ */
+ u64 ReserveSpace(u64 size, u64 alignment);
+
+ /**
+ * @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
+ * @param virtualAddress The virtual base address of the region to allocate
+ * @param size The size of the region to allocate
+ * @return The base virtual address of the reserved region
+ */
+ u64 ReserveFixed(u64 virtualAddress, u64 size);
+
+ /**
+ * @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
+ * @param cpuPtr A pointer to the region to be mapped into the virtual address space
+ * @param size The size of the region to map
+ * @return The base virtual address of the mapped region
+ */
+ u64 MapAllocate(u8 *cpuPtr, u64 size);
+
+ /**
+ * @brief Maps a CPU memory region to a fixed region in the virtual address space
+ * @param virtualAddress The target virtual address of the region
+ * @param cpuPtr A pointer to the region to be mapped into the virtual address space
+ * @param size The size of the region to map
+ * @return The base virtual address of the mapped region
+ */
+ u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size);
+
+ /**
+ * @brief Unmaps all chunks in the given region from the virtual address space
+ * @return Whether the operation succeeded
+ */
+ bool Unmap(u64 virtualAddress, u64 size);
+
+ void Read(u8 *destination, u64 virtualAddress, u64 size);
+
+ /**
+ * @brief Reads in a span from a region of the virtual address space
+ */
+ template
+ void Read(span destination, u64 virtualAddress) {
+ Read(reinterpret_cast(destination.data()), virtualAddress, destination.size_bytes());
+ }
+
+ /**
+ * @brief Reads in an object from a region of the virtual address space
+ * @tparam T The type of object to return
+ */
+ template
+ T Read(u64 virtualAddress) {
+ T obj;
+ Read(reinterpret_cast(&obj), virtualAddress, sizeof(T));
+ return obj;
+ }
+
+ void Write(u8 *source, u64 virtualAddress, u64 size);
+
+ /**
+ * @brief Writes out a span to a region of the virtual address space
+ */
+ template
+ void Write(span source, u64 virtualAddress) {
+ Write(reinterpret_cast(source.data()), virtualAddress, source.size_bytes());
+ }
+
+ /**
+ * @brief Reads in an object from a region of the virtual address space
+ */
+ template
+ void Write(T source, u64 virtualAddress) {
+ Write(reinterpret_cast(&source), virtualAddress, sizeof(T));
+ }
+ };
+}
diff --git a/app/src/main/cpp/skyline/soc/host1x.h b/app/src/main/cpp/skyline/soc/host1x.h
new file mode 100644
index 00000000..006c0a2c
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/host1x.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include "host1x/syncpoint.h"
+
+namespace skyline::soc::host1x {
+ /**
+ * @brief An abstraction for the graphics host, this handles DMA on behalf of the CPU when communicating to it's clients alongside handling syncpts
+ * @note This is different from the GM20B Host, it serves a similar function and has an interface for accessing Host1X syncpts
+ */
+ class Host1X {
+ public:
+ std::array syncpoints{};
+ };
+}
diff --git a/app/src/main/cpp/skyline/gpu/syncpoint.cpp b/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp
similarity index 98%
rename from app/src/main/cpp/skyline/gpu/syncpoint.cpp
rename to app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp
index c48d438b..39ba28c7 100644
--- a/app/src/main/cpp/skyline/gpu/syncpoint.cpp
+++ b/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp
@@ -3,7 +3,7 @@
#include "syncpoint.h"
-namespace skyline::gpu {
+namespace skyline::soc::host1x {
u64 Syncpoint::RegisterWaiter(u32 threshold, const std::function &callback) {
if (value >= threshold) {
callback();
diff --git a/app/src/main/cpp/skyline/soc/host1x/syncpoint.h b/app/src/main/cpp/skyline/soc/host1x/syncpoint.h
new file mode 100644
index 00000000..1a0a73c0
--- /dev/null
+++ b/app/src/main/cpp/skyline/soc/host1x/syncpoint.h
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include
+
+namespace skyline::soc::host1x {
+ constexpr size_t SyncpointCount{192}; //!< The number of host1x syncpoints on T210
+
+ /**
+ * @brief The Syncpoint class represents a single syncpoint in the GPU which is used for GPU -> CPU synchronisation
+ */
+ class Syncpoint {
+ private:
+ struct Waiter {
+ u32 threshold; //!< The syncpoint value to wait on to be reached
+ std::function callback; //!< The callback to do after the wait has ended
+ };
+
+ std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
+ std::map waiterMap;
+ u64 nextWaiterId{1};
+
+ public:
+ std::atomic value{};
+
+ /**
+ * @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
+ * @note The callback will be called immediately if the syncpoint has already reached the given threshold
+ * @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
+ */
+ u64 RegisterWaiter(u32 threshold, const std::function &callback);
+
+ /**
+ * @brief Removes a waiter given by 'id' from the pending waiter map
+ */
+ void DeregisterWaiter(u64 id);
+
+ /**
+ * @brief Increments the syncpoint by 1
+ * @return The new value of the syncpoint
+ */
+ u32 Increment();
+
+ /**
+ * @brief Waits for the syncpoint to reach given threshold
+ * @return false if the timeout was reached, otherwise true
+ */
+ bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
+ };
+}