diff --git a/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp new file mode 100644 index 00000000..ea3b3705 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include "maxwell_3d.h" + +namespace skyline::gpu::engine { + Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) { + ResetRegs(); + } + + void Maxwell3D::ResetRegs() { + memset(®s, 0, sizeof(regs)); + + regs.rasterizerEnable = true; + + for (auto &transform : regs.viewportTransform) { + transform.swizzles.x = Regs::ViewportTransform::Swizzle::PositiveX; + transform.swizzles.y = Regs::ViewportTransform::Swizzle::PositiveY; + transform.swizzles.z = Regs::ViewportTransform::Swizzle::PositiveZ; + transform.swizzles.w = Regs::ViewportTransform::Swizzle::PositiveW; + } + + for (auto &viewport : regs.viewport) { + viewport.depthRangeFar = 1.0f; + viewport.depthRangeNear = 0.0f; + } + + regs.polygonMode.front = Regs::PolygonMode::Fill; + regs.polygonMode.back = Regs::PolygonMode::Fill; + + regs.stencilFront.failOp = regs.stencilFront.zFailOp = regs.stencilFront.zPassOp = Regs::StencilOp::Keep; + regs.stencilFront.func.op = Regs::CompareOp::Always; + regs.stencilFront.func.mask = 0xFFFFFFFF; + regs.stencilFront.mask = 0xFFFFFFFF; + + regs.stencilBack.stencilTwoSideEnable = true; + regs.stencilBack.failOp = regs.stencilBack.zFailOp = regs.stencilBack.zPassOp = Regs::StencilOp::Keep; + regs.stencilBack.funcOp = Regs::CompareOp::Always; + regs.stencilBackExtra.funcMask = 0xFFFFFFFF; + regs.stencilBackExtra.mask = 0xFFFFFFFF; + + regs.rtSeparateFragData = true; + + for (auto &attribute : regs.vertexAttributeState) + attribute.fixed = true; + + regs.depthTestFunc = Regs::CompareOp::Always; + + regs.blend.colorOp = regs.blend.alphaOp = Regs::Blend::Op::Add; + regs.blend.colorSrcFactor = regs.blend.alphaSrcFactor = Regs::Blend::Factor::One; + regs.blend.colorDestFactor = regs.blend.alphaDestFactor = Regs::Blend::Factor::Zero; + + regs.lineWidthSmooth = 1.0f; + regs.lineWidthAliased = 1.0f; + + regs.pointSpriteSize = 1.0f; + + regs.frontFace = Regs::FrontFace::CounterClockwise; + regs.cullFace = Regs::CullFace::Back; + + for (auto &mask : regs.colorMask) + mask.r = mask.g = mask.b = mask.a = 1; + + for (auto &blend : regs.independentBlend) { + blend.colorOp = blend.alphaOp = Regs::Blend::Op::Add; + blend.colorSrcFactor = blend.alphaSrcFactor = Regs::Blend::Factor::One; + blend.colorDestFactor = blend.alphaDestFactor = Regs::Blend::Factor::Zero; + } + } + + void Maxwell3D::CallMethod(MethodParams params) { + state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", params.method, params.argument); + + // Methods that are greater than the register size are for macro control + if (params.method > constant::Maxwell3DRegisterSize) { + if (!(params.method & 1)) + macroInvocation.index = ((params.method - constant::Maxwell3DRegisterSize) >> 1) % macroPositions.size(); + + macroInvocation.arguments.push_back(params.argument); + + // Macros are always executed on the last method call in a pushbuffer entry + if (params.lastCall) { + macroInterpreter.Execute(macroPositions[macroInvocation.index], macroInvocation.arguments); + + macroInvocation.arguments.clear(); + macroInvocation.index = 0; + } + return; + } + + regs.raw[params.method] = params.argument; + + if (shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodTrack || shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodTrackWithFilter) + shadowRegs.raw[params.method] = params.argument; + else if (shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodReplay) + params.argument = shadowRegs.raw[params.method]; + + switch (params.method) { + case MAXWELL3D_OFFSET(mme.instructionRamLoad): + if (regs.mme.instructionRamPointer >= macroCode.size()) + throw exception("Macro memory is full!"); + + macroCode[regs.mme.instructionRamPointer++] = params.argument; + break; + case MAXWELL3D_OFFSET(mme.startAddressRamLoad): + if (regs.mme.startAddressRamPointer >= macroPositions.size()) + throw exception("Maximum amount of macros reached!"); + + macroPositions[regs.mme.startAddressRamPointer++] = params.argument; + break; + case MAXWELL3D_OFFSET(mme.shadowRamControl): + shadowRegs.mme.shadowRamControl = static_cast(params.argument); + break; + case MAXWELL3D_OFFSET(syncpointAction): + state.gpu->syncpoints.at(regs.syncpointAction.id).Increment(); + break; + case MAXWELL3D_OFFSET(semaphore.info): + switch (regs.semaphore.info.op) { + case Regs::SemaphoreInfo::Op::Release: + WriteSemaphoreResult(regs.semaphore.payload); + break; + case Regs::SemaphoreInfo::Op::Counter: + HandleSemaphoreCounterOperation(); + break; + default: + state.logger->Warn("Unsupported semaphore operation: 0x{:X}", static_cast(regs.semaphore.info.op)); + break; + } + break; + case MAXWELL3D_OFFSET(firmwareCall[4]): + regs.raw[0xd00] = 1; + break; + } + } + + void Maxwell3D::HandleSemaphoreCounterOperation() { + switch (regs.semaphore.info.counterType) { + case Regs::SemaphoreInfo::CounterType::Zero: + WriteSemaphoreResult(0); + break; + default: + state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast(regs.semaphore.info.counterType)); + break; + } + } + + void Maxwell3D::WriteSemaphoreResult(u64 result) { + struct FourWordResult { + u64 value; + u64 timestamp; + }; + + switch (regs.semaphore.info.structureSize) { + case Regs::SemaphoreInfo::StructureSize::OneWord: + state.gpu->memoryManager.Write(static_cast(result), regs.semaphore.address.Pack()); + break; + case Regs::SemaphoreInfo::StructureSize::FourWords: { + // Convert the current nanosecond time to GPU ticks + constexpr u64 NsToTickNumerator = 384; + constexpr u64 NsToTickDenominator = 625; + + u64 nsTime = util::GetTimeNs(); + u64 timestamp = (nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator; + + state.gpu->memoryManager.Write(FourWordResult{result, timestamp}, regs.semaphore.address.Pack()); + break; + } + } + } +} diff --git a/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h b/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h new file mode 100644 index 00000000..6a0eaf88 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h @@ -0,0 +1,560 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include +#include +#include +#include "engine.h" + +#define MAXWELL3D_OFFSET(field) U32_OFFSET(skyline::gpu::engine::Maxwell3D::Regs, field) + +namespace skyline { + namespace constant { + constexpr u32 Maxwell3DRegisterSize = 0xe00; //!< The size of the GPFIFO's register space in units of u32 + } + + namespace gpu::engine { + /** + * @brief The Maxwell 3D engine handles processing 3D graphics + */ + class Maxwell3D : public Engine { + private: + std::array macroPositions{}; //!< This holds the positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time + + struct { + u32 index; + std::vector arguments; + } macroInvocation{}; //!< This hold the index and arguments of the macro that is pending execution + + MacroInterpreter macroInterpreter; + + void HandleSemaphoreCounterOperation(); + + void WriteSemaphoreResult(u64 result); + + public: + /** + * @brief This holds the Maxwell3D engine's register space + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478 + */ + union Regs { + struct Address { + u32 high; + u32 low; + + u64 Pack() { + return (static_cast(high) << 32) | low; + } + }; + static_assert(sizeof(Address) == sizeof(u64)); + + enum class MmeShadowRamControl : u32 { + MethodTrack = 0, + MethodTrackWithFilter = 1, + MethodPassthrough = 2, + MethodReplay = 3 + }; + + struct ViewportTransform { + enum class Swizzle : u8 { + PositiveX = 0, + NegativeX = 1, + PositiveY = 2, + NegativeY = 3, + PositiveZ = 4, + NegativeZ = 5, + PositiveW = 6, + NegativeW = 7, + }; + + float scaleX; + float scaleY; + float scaleZ; + float translateX; + float translateY; + float translateZ; + + struct __attribute__((__packed__)) { + Swizzle x : 3; + u8 _pad0_ : 1; + Swizzle y : 3; + u8 _pad1_ : 1; + Swizzle z : 3; + u8 _pad2_ : 1; + Swizzle w : 3; + u32 _pad3_ : 17; + } swizzles; + + struct __attribute__((__packed__)) { + u8 x : 5; + u8 _pad0_ : 3; + u8 y : 5; + u32 _pad1_ : 19; + } subpixelPrecisionBias; + }; + static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32))); + + struct Viewport { + struct { + u16 x; + u16 width; + }; + + struct { + u16 y; + u16 height; + }; + + float depthRangeNear; + float depthRangeFar; + }; + static_assert(sizeof(Viewport) == (0x4 * sizeof(u32))); + + enum class PolygonMode : u32 { + Point = 0x1b00, + Line = 0x1b01, + Fill = 0x1b02, + }; + + union VertexAttribute { + enum class Size : u8 { + Size_1x32 = 0x12, + Size_2x32 = 0x04, + Size_3x32 = 0x02, + Size_4x32 = 0x01, + Size_1x16 = 0x1b, + Size_2x16 = 0x0f, + Size_3x16 = 0x05, + Size_4x16 = 0x03, + Size_1x8 = 0x1d, + Size_2x8 = 0x18, + Size_3x8 = 0x13, + Size_4x8 = 0x0a, + Size_10_10_10_2 = 0x30, + Size_11_11_10 = 0x31, + }; + + enum class Type : u8 { + None = 0, + SNorm = 1, + UNorm = 2, + SInt = 3, + UInt = 4, + UScaled = 5, + SScaled = 6, + Float = 7, + }; + + struct __attribute__((__packed__)) { + u8 bufferId : 5; + u8 _pad0_ : 1; + bool fixed : 1; + u16 offset : 14; + Size size : 6; + Type type : 3; + u8 _pad1_ : 1; + bool bgra : 1; + }; + + u32 raw; + }; + static_assert(sizeof(VertexAttribute) == sizeof(u32)); + + enum class CompareOp : u32 { + Never = 1, + Less = 2, + Equal = 3, + LessOrEqual = 4, + Greater = 5, + NotEqual = 6, + GreaterOrEqual = 7, + Always = 8, + + NeverGL = 0x200, + LessGL = 0x201, + EqualGL = 0x202, + LessOrEqualGL = 0x203, + GreaterGL = 0x204, + NotEqualGL = 0x205, + GreaterOrEqualGL = 0x206, + AlwaysGL = 0x207, + }; + + struct Blend { + enum class Op : u32 { + Add = 1, + Subtract = 2, + ReverseSubtract = 3, + Minimum = 4, + Maximum = 5, + + AddGL = 0x8006, + SubtractGL = 0x8007, + ReverseSubtractGL = 0x8008, + MinimumGL = 0x800a, + MaximumGL = 0x800b + }; + + enum class Factor : u32 { + Zero = 0x1, + One = 0x2, + SourceColor = 0x3, + OneMinusSourceColor = 0x4, + SourceAlpha = 0x5, + OneMinusSourceAlpha = 0x6, + DestAlpha = 0x7, + OneMinusDestAlpha = 0x8, + DestColor = 0x9, + OneMinusDestColor = 0xa, + SourceAlphaSaturate = 0xb, + Source1Color = 0x10, + OneMinusSource1Color = 0x11, + Source1Alpha = 0x12, + OneMinusSource1Alpha = 0x13, + ConstantColor = 0x61, + OneMinusConstantColor = 0x62, + ConstantAlpha = 0x63, + OneMinusConstantAlpha = 0x64, + + ZeroGL = 0x4000, + OneGL = 0x4001, + SourceColorGL = 0x4300, + OneMinusSourceColorGL = 0x4301, + SourceAlphaGL = 0x4302, + OneMinusSourceAlphaGL = 0x4303, + DestAlphaGL = 0x4304, + OneMinusDestAlphaGL = 0x4305, + DestColorGL = 0x4306, + OneMinusDestColorGL = 0x4307, + SourceAlphaSaturateGL = 0x4308, + ConstantColorGL = 0xc001, + OneMinusConstantColorGL = 0xc002, + ConstantAlphaGL = 0xc003, + OneMinusConstantAlphaGL = 0xc004, + Source1ColorGL = 0xc900, + OneMinusSource1ColorGL = 0xc901, + Source1AlphaGL = 0xc902, + OneMinusSource1AlphaGL = 0xc903, + }; + + struct { + u32 seperateAlpha; + Op colorOp; + Factor colorSrcFactor; + Factor colorDestFactor; + Op alphaOp; + Factor alphaSrcFactor; + Factor alphaDestFactor; + u32 _pad_; + }; + }; + static_assert(sizeof(Blend) == (sizeof(u32) * 8)); + + enum class StencilOp : u32 { + Keep = 1, + Zero = 2, + Replace = 3, + IncrementAndClamp = 4, + DecrementAndClamp = 5, + Invert = 6, + IncrementAndWrap = 7, + DecrementAndWrap = 8, + }; + + enum class FrontFace : u32 { + Clockwise = 0x900, + CounterClockwise = 0x901, + }; + + enum class CullFace : u32 { + Front = 0x404, + Back = 0x405, + FrontAndBack = 0x408, + }; + + union ColorMask { + struct __attribute__((__packed__)) { + u8 r : 4; + u8 g : 4; + u8 b : 4; + u8 a : 4; + }; + + u32 raw; + }; + static_assert(sizeof(ColorMask) == sizeof(u32)); + + struct __attribute__((__packed__)) SemaphoreInfo { + enum class Op : u8 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3 + }; + + enum class ReductionOp : u8 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + }; + + enum class Unit : u8 { + VFetch = 1, + VP = 2, + Rast = 4, + StrmOut = 5, + GP = 6, + ZCull = 7, + Prop = 10, + Crop = 15, + }; + + enum class SyncCondition : u8 { + NotEqual = 0, + GreaterThan = 1, + }; + + enum class Format : u8 { + U32 = 0, + I32 = 1, + }; + + enum class CounterType : u8 { + Zero = 0x0, + InputVertices = 0x1, + InputPrimitives = 0x3, + VertexShaderInvocations = 0x5, + GeometryShaderInvocations = 0x7, + GeometryShaderPrimitives = 0x9, + ZcullStats0 = 0xa, + TransformFeedbackPrimitivesWritten = 0xb, + ZcullStats1 = 0xc, + ZcullStats2 = 0xe, + ClipperInputPrimitives = 0xf, + ZcullStats3 = 0x10, + ClipperOutputPrimitives = 0x11, + PrimitivesGenerated = 0x12, + FragmentShaderInvocations = 0x13, + SamplesPassed = 0x15, + TransformFeedbackOffset = 0x1a, + TessControlShaderInvocations = 0x1b, + TessEvaluationShaderInvocations = 0x1d, + TessEvaluationShaderPrimitives = 0x1f + }; + + enum class StructureSize : u8 { + FourWords = 0, + OneWord = 1, + }; + + Op op : 2; + bool flushDisable : 1; + bool reductionEnable : 1; + bool fenceEnable : 1; + u8 _pad0_ : 4; + ReductionOp reductionOp : 3; + Unit unit : 4; + SyncCondition syncCondition : 1; + Format format : 2; + u8 _pad1_ : 1; + bool awakenEnable : 1; + u8 _pad2_ : 2; + CounterType counterType : 5; + StructureSize structureSize : 1; + }; + static_assert(sizeof(SemaphoreInfo) == sizeof(u32)); + + struct { + u32 _pad0_[0x40]; // 0x0 + u32 noOperation; // 0x40 + u32 _pad1_[0x3]; // 0x41 + u32 waitForIdle; // 0x44 + + struct { + u32 instructionRamPointer; // 0x45 + u32 instructionRamLoad; // 0x46 + u32 startAddressRamPointer; // 0x47 + u32 startAddressRamLoad; // 0x48 + MmeShadowRamControl shadowRamControl; // 0x49 + } mme; + + u32 _pad2_[0x68]; // 0x4a + + struct { + u16 id : 12; + u8 _pad0_ : 4; + bool flushCache : 1; + u8 _pad1_ : 3; + bool increment : 1; + u16 _pad2_ : 11; + } syncpointAction; // 0xb2 + + u32 _pad3_[0x2c]; // 0xb3 + u32 rasterizerEnable; // 0xdf + u32 _pad4_[0x1a0]; // 0xe0 + std::array viewportTransform; // 0x280 + std::array viewport; // 0x300 + u32 _pad5_[0x2b]; // 0x340 + + struct { + PolygonMode front; // 0x36b + PolygonMode back; // 0x36c + } polygonMode; + + u32 _pad6_[0x68]; // 0x36d + + struct { + u32 funcRef; // 0x3d5 + u32 mask; // 0x3d6 + u32 funcMask; // 0x3d7 + } stencilBackExtra; + + u32 _pad7_[0x13]; // 0x3d8 + u32 rtSeparateFragData; // 0x3eb + u32 _pad8_[0x6c]; // 0x3ec + std::array vertexAttributeState; // 0x458 + u32 _pad9_[0x4b]; // 0x478 + CompareOp depthTestFunc; // 0x4c3 + float alphaTestRef; // 0x4c4 + CompareOp alphaTestFunc; // 0x4c5 + u32 drawTFBStride; // 0x4c6 + + struct { + float r; // 0x4c7 + float g; // 0x4c8 + float b; // 0x4c9 + float a; // 0x4ca + } blendConstant; + + u32 _pad10_[0x4]; // 0x4cb + + struct { + u32 seperateAlpha; // 0x4cf + Blend::Op colorOp; // 0x4d0 + Blend::Factor colorSrcFactor; // 0x4d1 + Blend::Factor colorDestFactor; // 0x4d2 + Blend::Op alphaOp; // 0x4d3 + Blend::Factor alphaSrcFactor; // 0x4d4 + u32 _pad_; // 0x4d5 + Blend::Factor alphaDestFactor; // 0x4d6 + + u32 enableCommon; // 0x4d7 + std::array enable; // 0x4d8 For each render target + } blend; + + u32 stencilEnable; // 0x4e0 + + struct { + StencilOp failOp; // 0x4e1 + StencilOp zFailOp; // 0x4e2 + StencilOp zPassOp; // 0x4e3 + + struct { + CompareOp op; // 0x4e4 + i32 ref; // 0x4e5 + u32 mask; // 0x4e6 + } func; + + u32 mask; // 0x4e7 + } stencilFront; + + u32 _pad11_[0x4]; // 0x4e8 + float lineWidthSmooth; // 0x4ec + float lineWidthAliased; // 0x4d + u32 _pad12_[0x1f]; // 0x4ee + u32 drawBaseVertex; // 0x50d + u32 drawBaseInstance; // 0x50e + u32 _pad13_[0x35]; // 0x50f + u32 clipDistanceEnable; // 0x544 + u32 sampleCounterEnable; // 0x545 + float pointSpriteSize; // 0x546 + u32 zCullStatCountersEnable; // 0x547 + u32 pointSpriteEnable; // 0x548 + u32 _pad14_; // 0x549 + u32 shaderExceptions; // 0x54a + u32 _pad15_[0x2]; // 0x54b + u32 multisampleEnable; // 0x54d + u32 depthTargetEnable; // 0x54e + + struct __attribute__((__packed__)) { + bool alphaToCoverage : 1; + u8 _pad0_ : 3; + bool alphaToOne : 1; + u32 _pad1_ : 27; + } multisampleControl; // 0x54f + + u32 _pad16_[0x7]; // 0x550 + + struct { + Address address; // 0x557 + u32 maximumIndex; // 0x559 + } texSamplerPool; + + u32 _pad17_; // 0x55a + u32 polygonOffsetFactor; // 0x55b + u32 lineSmoothEnable; // 0x55c + + struct { + Address address; // 0x55d + u32 maximumIndex; // 0x55f + } texHeaderPool; + + u32 _pad18_[0x5]; // 0x560 + + struct { + u32 stencilTwoSideEnable; // 0x565 + StencilOp failOp; // 0x566 + StencilOp zFailOp; // 0x567 + StencilOp zPassOp; // 0x568 + CompareOp funcOp; // 0x569 + } stencilBack; + + u32 _pad19_[0xdc]; // 0x56a + u32 cullFaceEnable; // 0x646 + FrontFace frontFace; // 0x647 + CullFace cullFace; // 0x648 + u32 pixelCentreImage; // 0x649 + u32 _pad20_[0x36]; // 0x64a + std::array colorMask; // 0x680 For each render target + u32 _pad21_[0x38]; // 0x688 + + struct { + Address address; // 0x6c0 + u32 payload; // 0x6c2 + SemaphoreInfo info; // 0x6c3 + } semaphore; + + u32 _pad22_[0xbc]; // 0x6c4 + std::array independentBlend; // 0x780 For each render target + u32 _pad23_[0x100]; // 0x7c0 + u32 firmwareCall[0x20]; // 0x8c0 + }; + + std::array raw; + }; + static_assert(sizeof(Regs) == (constant::Maxwell3DRegisterSize * sizeof(u32))); + + Regs regs{}; //!< The maxwell 3D register space + Regs shadowRegs{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register + + std::array macroCode{}; //!< This is used to store GPU macros, the 256kb size is from Ryujinx + + Maxwell3D(const DeviceState &state); + + /** + * @brief Resets the Maxwell 3D registers to their default values + */ + void ResetRegs(); + + void CallMethod(MethodParams params); + }; + } +} \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/macro_interpreter.cpp b/app/src/main/cpp/skyline/gpu/macro_interpreter.cpp new file mode 100644 index 00000000..faaadd4d --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/macro_interpreter.cpp @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "engines/maxwell_3d.h" +#include "memory_manager.h" +#include "macro_interpreter.h" + +namespace skyline::gpu { + void MacroInterpreter::Execute(size_t offset, const std::vector &args) { + // Reset the interpreter state + registers = {}; + carryFlag = false; + methodAddress.raw = 0; + opcode = reinterpret_cast(&maxwell3D.macroCode[offset]); + argument = args.data(); + + // The first argument is stored in register 1 + registers[1] = *argument++; + + while (Step()); + } + + FORCE_INLINE bool MacroInterpreter::Step(Opcode *delayedOpcode) { + switch (opcode->operation) { + case Opcode::Operation::AluRegister: { + u32 result = HandleAlu(opcode->aluOperation, registers[opcode->srcA], registers[opcode->srcB]); + + HandleAssignment(opcode->assignmentOperation, opcode->dest, result); + break; + } + case Opcode::Operation::AddImmediate: + HandleAssignment(opcode->assignmentOperation, opcode->dest, registers[opcode->srcA] + opcode->immediate); + break; + case Opcode::Operation::BitfieldReplace: { + u32 src = registers[opcode->srcB]; + u32 dest = registers[opcode->srcA]; + + // Extract the source region + src = (src >> opcode->bitfield.srcBit) & opcode->bitfield.GetMask(); + + // Mask out the bits that we will replace + dest &= ~(opcode->bitfield.GetMask() << opcode->bitfield.destBit); + + // Replace the bitfield region in the destination with the region from the source + dest |= src << opcode->bitfield.destBit; + + HandleAssignment(opcode->assignmentOperation, opcode->dest, dest); + break; + } + case Opcode::Operation::BitfieldExtractShiftLeftImmediate: { + u32 src = registers[opcode->srcB]; + u32 dest = registers[opcode->srcA]; + + u32 result = ((src >> dest) & opcode->bitfield.GetMask()) << opcode->bitfield.destBit; + + HandleAssignment(opcode->assignmentOperation, opcode->dest, result); + break; + } + case Opcode::Operation::BitfieldExtractShiftLeftRegister: { + u32 src = registers[opcode->srcB]; + u32 dest = registers[opcode->srcA]; + + u32 result = ((src >> opcode->bitfield.srcBit) & opcode->bitfield.GetMask()) << dest; + + HandleAssignment(opcode->assignmentOperation, opcode->dest, result); + break; + } + case Opcode::Operation::ReadImmediate: { + u32 result = maxwell3D.regs.raw[registers[opcode->srcA] + opcode->immediate]; + HandleAssignment(opcode->assignmentOperation, opcode->dest, result); + break; + } + case Opcode::Operation::Branch: { + if (delayedOpcode != nullptr) + throw exception("Cannot branch while inside a delay slot"); + + u32 value = registers[opcode->srcA]; + bool branch = (opcode->branchCondition == Opcode::BranchCondition::Zero) ? (value == 0) : (value != 0); + + if (branch) { + if (opcode->noDelay) { + opcode += opcode->immediate; + return true; + } else { + Opcode* targetOpcode = opcode + opcode->immediate; + + // Step into delay slot + opcode++; + return Step(targetOpcode); + } + } + break; + } + } + + if (opcode->exit && (delayedOpcode == nullptr)) { + // Exit has a delay slot + opcode++; + Step(opcode); + return false; + } + + if (delayedOpcode != nullptr) + opcode = delayedOpcode; + else + opcode++; + + return true; + } + + FORCE_INLINE u32 MacroInterpreter::HandleAlu(Opcode::AluOperation operation, u32 srcA, u32 srcB) { + switch (operation) { + case Opcode::AluOperation::Add: { + u64 result = static_cast(srcA) + srcB; + + carryFlag = result >> 32; + return static_cast(result); + } + case Opcode::AluOperation::AddWithCarry: { + u64 result = static_cast(srcA) + srcB + carryFlag; + + carryFlag = result >> 32; + return static_cast(result); + } + case Opcode::AluOperation::Subtract: { + u64 result = static_cast(srcA) - srcB; + + carryFlag = result & 0xffffffff; + return static_cast(result); + } + case Opcode::AluOperation::SubtractWithBorrow: { + u64 result = static_cast(srcA) - srcB - !carryFlag; + + carryFlag = result & 0xffffffff; + return static_cast(result); + } + case Opcode::AluOperation::BitwiseXor: + return srcA ^ srcB; + case Opcode::AluOperation::BitwiseOr: + return srcA | srcB; + case Opcode::AluOperation::BitwiseAnd: + return srcA & srcB; + case Opcode::AluOperation::BitwiseAndNot: + return srcA & ~srcB; + case Opcode::AluOperation::BitwiseNand: + return ~(srcA & srcB); + } + } + + FORCE_INLINE void MacroInterpreter::HandleAssignment(Opcode::AssignmentOperation operation, u8 reg, u32 result) { + switch (operation) { + case Opcode::AssignmentOperation::IgnoreAndFetch: + WriteRegister(reg, *argument++); + break; + case Opcode::AssignmentOperation::Move: + WriteRegister(reg, result); + break; + case Opcode::AssignmentOperation::MoveAndSetMethod: + WriteRegister(reg, result); + methodAddress.raw = result; + break; + case Opcode::AssignmentOperation::FetchAndSend: + WriteRegister(reg, *argument++); + Send(result); + break; + case Opcode::AssignmentOperation::MoveAndSend: + WriteRegister(reg, result); + Send(result); + break; + case Opcode::AssignmentOperation::FetchAndSetMethod: + WriteRegister(reg, *argument++); + methodAddress.raw = result; + break; + case Opcode::AssignmentOperation::MoveAndSetMethodThenFetchAndSend: + WriteRegister(reg, result); + methodAddress.raw = result; + Send(*argument++); + break; + case Opcode::AssignmentOperation::MoveAndSetMethodThenSendHigh: + WriteRegister(reg, result); + methodAddress.raw = result; + Send(methodAddress.increment); + break; + } + } + + FORCE_INLINE void MacroInterpreter::Send(u32 argument) { + maxwell3D.CallMethod(MethodParams{methodAddress.address, argument, 0, true}); + + methodAddress.address += methodAddress.increment; + } + + FORCE_INLINE void MacroInterpreter::WriteRegister(u8 reg, u32 value) { + // Register 0 should always be zero so block writes to it + if (reg == 0) + return; + + registers[reg] = value; + } +} diff --git a/app/src/main/cpp/skyline/gpu/macro_interpreter.h b/app/src/main/cpp/skyline/gpu/macro_interpreter.h new file mode 100644 index 00000000..ea042c7c --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/macro_interpreter.h @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include + +namespace skyline::gpu { + namespace engine { + class Maxwell3D; + } + + class MacroInterpreter { + private: + /** + * @brief This holds a single macro opcode + */ + union Opcode { + enum class Operation : u8 { + AluRegister = 0, + AddImmediate = 1, + BitfieldReplace = 2, + BitfieldExtractShiftLeftImmediate = 3, + BitfieldExtractShiftLeftRegister = 4, + ReadImmediate = 5, + Branch = 7, + }; + + enum class AssignmentOperation : u8 { + IgnoreAndFetch = 0, + Move = 1, + MoveAndSetMethod = 2, + FetchAndSend = 3, + MoveAndSend = 4, + FetchAndSetMethod = 5, + MoveAndSetMethodThenFetchAndSend = 6, + MoveAndSetMethodThenSendHigh = 7, + }; + + enum class AluOperation : u8 { + Add = 0, + AddWithCarry = 1, + Subtract = 2, + SubtractWithBorrow = 3, + BitwiseXor = 8, + BitwiseOr = 9, + BitwiseAnd = 10, + BitwiseAndNot = 11, + BitwiseNand = 12, + }; + + enum class BranchCondition : u8 { + Zero = 0, + NonZero = 1, + }; + + struct __attribute__((__packed__)) { + Operation operation : 3; + u8 _pad0_ : 1; + AssignmentOperation assignmentOperation : 3; + }; + + struct __attribute__((__packed__)) { + u8 _pad1_ : 4; + BranchCondition branchCondition : 1; + u8 noDelay : 1; + u8 _pad2_ : 1; + u8 exit : 1; + u8 dest : 3; + u8 srcA : 3; + u8 srcB : 3; + AluOperation aluOperation : 5; + }; + + struct __attribute__((__packed__)) { + u16 _pad3_ : 14; + i32 immediate : 18; + }; + + struct __attribute__((__packed__)) { + u32 _pad_ : 17; + u8 srcBit : 5; + u8 size : 5; + u8 destBit : 5; + + u32 GetMask() { + return (1 << size) - 1; + } + } bitfield; + + u32 raw; + }; + static_assert(sizeof(Opcode) == sizeof(u32)); + + /** + * @brief This holds information about the Maxwell 3D method to be called in 'Send' + */ + union MethodAddress { + struct { + u16 address : 12; + u8 increment : 6; + }; + + u32 raw; + }; + + engine::Maxwell3D &maxwell3D; + + std::array registers{}; + + Opcode *opcode{}; + const u32 *argument{}; + MethodAddress methodAddress{}; + bool carryFlag{}; + + /** + * @brief Steps forward one macro instruction, including delay slots + * @param delayedOpcode The target opcode to be jumped to after executing the instruction + */ + bool Step(Opcode *delayedOpcode = nullptr); + + /** + * @brief Performs an ALU operation on the given source values and returns the result as a u32 + */ + u32 HandleAlu(Opcode::AluOperation operation, u32 srcA, u32 srcB); + + /** + * @brief Handles an opcode's assignment operation + */ + void HandleAssignment(Opcode::AssignmentOperation operation, u8 reg, u32 result); + + /** + * @brief Sends a method call to the Maxwell 3D + */ + void Send(u32 argument); + + void WriteRegister(u8 reg, u32 value); + + public: + MacroInterpreter(engine::Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {} + + /** + * @brief Executes a GPU macro from macro memory with the given arguments + */ + void Execute(size_t offset, const std::vector &args); + }; +} diff --git a/app/src/main/cpp/skyline/nce/guest.cpp b/app/src/main/cpp/skyline/nce/guest.cpp index 206cc022..450b8b69 100644 --- a/app/src/main/cpp/skyline/nce/guest.cpp +++ b/app/src/main/cpp/skyline/nce/guest.cpp @@ -9,8 +9,6 @@ #include #include "guest_common.h" -#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage) - namespace skyline::guest { FORCE_INLINE void SaveCtxStack() { asm("SUB SP, SP, #240\n\t" diff --git a/app/src/main/cpp/skyline/nce/guest_common.h b/app/src/main/cpp/skyline/nce/guest_common.h index 05bcc34e..03e2ab18 100644 --- a/app/src/main/cpp/skyline/nce/guest_common.h +++ b/app/src/main/cpp/skyline/nce/guest_common.h @@ -5,6 +5,8 @@ #include +#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage) + namespace skyline { using u128 = __uint128_t; //!< Unsigned 128-bit integer using u64 = __uint64_t; //!< Unsigned 64-bit integer