Implement the basis of the Maxwell 3D engine together with a macro

interpreter.

The Maxwell 3D engine handles all 3D rendering, currently only non
rendering related methods are implemented. Macros are small pieces of
code that run on the GPU and allow methods to be quickly called for
things like instanced drawing.
This commit is contained in:
Billy Laws 2020-08-09 14:39:27 +01:00 committed by ◱ PixelyIon
parent 68d5a48df1
commit 7503496bb0
6 changed files with 1082 additions and 2 deletions

View File

@ -0,0 +1,172 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <gpu/syncpoint.h>
#include "maxwell_3d.h"
namespace skyline::gpu::engine {
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) {
ResetRegs();
}
void Maxwell3D::ResetRegs() {
memset(&regs, 0, sizeof(regs));
regs.rasterizerEnable = true;
for (auto &transform : regs.viewportTransform) {
transform.swizzles.x = Regs::ViewportTransform::Swizzle::PositiveX;
transform.swizzles.y = Regs::ViewportTransform::Swizzle::PositiveY;
transform.swizzles.z = Regs::ViewportTransform::Swizzle::PositiveZ;
transform.swizzles.w = Regs::ViewportTransform::Swizzle::PositiveW;
}
for (auto &viewport : regs.viewport) {
viewport.depthRangeFar = 1.0f;
viewport.depthRangeNear = 0.0f;
}
regs.polygonMode.front = Regs::PolygonMode::Fill;
regs.polygonMode.back = Regs::PolygonMode::Fill;
regs.stencilFront.failOp = regs.stencilFront.zFailOp = regs.stencilFront.zPassOp = Regs::StencilOp::Keep;
regs.stencilFront.func.op = Regs::CompareOp::Always;
regs.stencilFront.func.mask = 0xFFFFFFFF;
regs.stencilFront.mask = 0xFFFFFFFF;
regs.stencilBack.stencilTwoSideEnable = true;
regs.stencilBack.failOp = regs.stencilBack.zFailOp = regs.stencilBack.zPassOp = Regs::StencilOp::Keep;
regs.stencilBack.funcOp = Regs::CompareOp::Always;
regs.stencilBackExtra.funcMask = 0xFFFFFFFF;
regs.stencilBackExtra.mask = 0xFFFFFFFF;
regs.rtSeparateFragData = true;
for (auto &attribute : regs.vertexAttributeState)
attribute.fixed = true;
regs.depthTestFunc = Regs::CompareOp::Always;
regs.blend.colorOp = regs.blend.alphaOp = Regs::Blend::Op::Add;
regs.blend.colorSrcFactor = regs.blend.alphaSrcFactor = Regs::Blend::Factor::One;
regs.blend.colorDestFactor = regs.blend.alphaDestFactor = Regs::Blend::Factor::Zero;
regs.lineWidthSmooth = 1.0f;
regs.lineWidthAliased = 1.0f;
regs.pointSpriteSize = 1.0f;
regs.frontFace = Regs::FrontFace::CounterClockwise;
regs.cullFace = Regs::CullFace::Back;
for (auto &mask : regs.colorMask)
mask.r = mask.g = mask.b = mask.a = 1;
for (auto &blend : regs.independentBlend) {
blend.colorOp = blend.alphaOp = Regs::Blend::Op::Add;
blend.colorSrcFactor = blend.alphaSrcFactor = Regs::Blend::Factor::One;
blend.colorDestFactor = blend.alphaDestFactor = Regs::Blend::Factor::Zero;
}
}
void Maxwell3D::CallMethod(MethodParams params) {
state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", params.method, params.argument);
// Methods that are greater than the register size are for macro control
if (params.method > constant::Maxwell3DRegisterSize) {
if (!(params.method & 1))
macroInvocation.index = ((params.method - constant::Maxwell3DRegisterSize) >> 1) % macroPositions.size();
macroInvocation.arguments.push_back(params.argument);
// Macros are always executed on the last method call in a pushbuffer entry
if (params.lastCall) {
macroInterpreter.Execute(macroPositions[macroInvocation.index], macroInvocation.arguments);
macroInvocation.arguments.clear();
macroInvocation.index = 0;
}
return;
}
regs.raw[params.method] = params.argument;
if (shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodTrack || shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodTrackWithFilter)
shadowRegs.raw[params.method] = params.argument;
else if (shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodReplay)
params.argument = shadowRegs.raw[params.method];
switch (params.method) {
case MAXWELL3D_OFFSET(mme.instructionRamLoad):
if (regs.mme.instructionRamPointer >= macroCode.size())
throw exception("Macro memory is full!");
macroCode[regs.mme.instructionRamPointer++] = params.argument;
break;
case MAXWELL3D_OFFSET(mme.startAddressRamLoad):
if (regs.mme.startAddressRamPointer >= macroPositions.size())
throw exception("Maximum amount of macros reached!");
macroPositions[regs.mme.startAddressRamPointer++] = params.argument;
break;
case MAXWELL3D_OFFSET(mme.shadowRamControl):
shadowRegs.mme.shadowRamControl = static_cast<Regs::MmeShadowRamControl>(params.argument);
break;
case MAXWELL3D_OFFSET(syncpointAction):
state.gpu->syncpoints.at(regs.syncpointAction.id).Increment();
break;
case MAXWELL3D_OFFSET(semaphore.info):
switch (regs.semaphore.info.op) {
case Regs::SemaphoreInfo::Op::Release:
WriteSemaphoreResult(regs.semaphore.payload);
break;
case Regs::SemaphoreInfo::Op::Counter:
HandleSemaphoreCounterOperation();
break;
default:
state.logger->Warn("Unsupported semaphore operation: 0x{:X}", static_cast<u8>(regs.semaphore.info.op));
break;
}
break;
case MAXWELL3D_OFFSET(firmwareCall[4]):
regs.raw[0xd00] = 1;
break;
}
}
void Maxwell3D::HandleSemaphoreCounterOperation() {
switch (regs.semaphore.info.counterType) {
case Regs::SemaphoreInfo::CounterType::Zero:
WriteSemaphoreResult(0);
break;
default:
state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast<u8>(regs.semaphore.info.counterType));
break;
}
}
void Maxwell3D::WriteSemaphoreResult(u64 result) {
struct FourWordResult {
u64 value;
u64 timestamp;
};
switch (regs.semaphore.info.structureSize) {
case Regs::SemaphoreInfo::StructureSize::OneWord:
state.gpu->memoryManager.Write<u32>(static_cast<u32>(result), regs.semaphore.address.Pack());
break;
case Regs::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks
constexpr u64 NsToTickNumerator = 384;
constexpr u64 NsToTickDenominator = 625;
u64 nsTime = util::GetTimeNs();
u64 timestamp = (nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator;
state.gpu->memoryManager.Write<FourWordResult>(FourWordResult{result, timestamp}, regs.semaphore.address.Pack());
break;
}
}
}
}

View File

@ -0,0 +1,560 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <array>
#include <common.h>
#include <gpu/texture.h>
#include <gpu/macro_interpreter.h>
#include "engine.h"
#define MAXWELL3D_OFFSET(field) U32_OFFSET(skyline::gpu::engine::Maxwell3D::Regs, field)
namespace skyline {
namespace constant {
constexpr u32 Maxwell3DRegisterSize = 0xe00; //!< The size of the GPFIFO's register space in units of u32
}
namespace gpu::engine {
/**
* @brief The Maxwell 3D engine handles processing 3D graphics
*/
class Maxwell3D : public Engine {
private:
std::array<size_t, 0x80> macroPositions{}; //!< This holds the positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
struct {
u32 index;
std::vector<u32> arguments;
} macroInvocation{}; //!< This hold the index and arguments of the macro that is pending execution
MacroInterpreter macroInterpreter;
void HandleSemaphoreCounterOperation();
void WriteSemaphoreResult(u64 result);
public:
/**
* @brief This holds the Maxwell3D engine's register space
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
*/
union Regs {
struct Address {
u32 high;
u32 low;
u64 Pack() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
enum class MmeShadowRamControl : u32 {
MethodTrack = 0,
MethodTrackWithFilter = 1,
MethodPassthrough = 2,
MethodReplay = 3
};
struct ViewportTransform {
enum class Swizzle : u8 {
PositiveX = 0,
NegativeX = 1,
PositiveY = 2,
NegativeY = 3,
PositiveZ = 4,
NegativeZ = 5,
PositiveW = 6,
NegativeW = 7,
};
float scaleX;
float scaleY;
float scaleZ;
float translateX;
float translateY;
float translateZ;
struct __attribute__((__packed__)) {
Swizzle x : 3;
u8 _pad0_ : 1;
Swizzle y : 3;
u8 _pad1_ : 1;
Swizzle z : 3;
u8 _pad2_ : 1;
Swizzle w : 3;
u32 _pad3_ : 17;
} swizzles;
struct __attribute__((__packed__)) {
u8 x : 5;
u8 _pad0_ : 3;
u8 y : 5;
u32 _pad1_ : 19;
} subpixelPrecisionBias;
};
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
struct Viewport {
struct {
u16 x;
u16 width;
};
struct {
u16 y;
u16 height;
};
float depthRangeNear;
float depthRangeFar;
};
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
enum class PolygonMode : u32 {
Point = 0x1b00,
Line = 0x1b01,
Fill = 0x1b02,
};
union VertexAttribute {
enum class Size : u8 {
Size_1x32 = 0x12,
Size_2x32 = 0x04,
Size_3x32 = 0x02,
Size_4x32 = 0x01,
Size_1x16 = 0x1b,
Size_2x16 = 0x0f,
Size_3x16 = 0x05,
Size_4x16 = 0x03,
Size_1x8 = 0x1d,
Size_2x8 = 0x18,
Size_3x8 = 0x13,
Size_4x8 = 0x0a,
Size_10_10_10_2 = 0x30,
Size_11_11_10 = 0x31,
};
enum class Type : u8 {
None = 0,
SNorm = 1,
UNorm = 2,
SInt = 3,
UInt = 4,
UScaled = 5,
SScaled = 6,
Float = 7,
};
struct __attribute__((__packed__)) {
u8 bufferId : 5;
u8 _pad0_ : 1;
bool fixed : 1;
u16 offset : 14;
Size size : 6;
Type type : 3;
u8 _pad1_ : 1;
bool bgra : 1;
};
u32 raw;
};
static_assert(sizeof(VertexAttribute) == sizeof(u32));
enum class CompareOp : u32 {
Never = 1,
Less = 2,
Equal = 3,
LessOrEqual = 4,
Greater = 5,
NotEqual = 6,
GreaterOrEqual = 7,
Always = 8,
NeverGL = 0x200,
LessGL = 0x201,
EqualGL = 0x202,
LessOrEqualGL = 0x203,
GreaterGL = 0x204,
NotEqualGL = 0x205,
GreaterOrEqualGL = 0x206,
AlwaysGL = 0x207,
};
struct Blend {
enum class Op : u32 {
Add = 1,
Subtract = 2,
ReverseSubtract = 3,
Minimum = 4,
Maximum = 5,
AddGL = 0x8006,
SubtractGL = 0x8007,
ReverseSubtractGL = 0x8008,
MinimumGL = 0x800a,
MaximumGL = 0x800b
};
enum class Factor : u32 {
Zero = 0x1,
One = 0x2,
SourceColor = 0x3,
OneMinusSourceColor = 0x4,
SourceAlpha = 0x5,
OneMinusSourceAlpha = 0x6,
DestAlpha = 0x7,
OneMinusDestAlpha = 0x8,
DestColor = 0x9,
OneMinusDestColor = 0xa,
SourceAlphaSaturate = 0xb,
Source1Color = 0x10,
OneMinusSource1Color = 0x11,
Source1Alpha = 0x12,
OneMinusSource1Alpha = 0x13,
ConstantColor = 0x61,
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
ZeroGL = 0x4000,
OneGL = 0x4001,
SourceColorGL = 0x4300,
OneMinusSourceColorGL = 0x4301,
SourceAlphaGL = 0x4302,
OneMinusSourceAlphaGL = 0x4303,
DestAlphaGL = 0x4304,
OneMinusDestAlphaGL = 0x4305,
DestColorGL = 0x4306,
OneMinusDestColorGL = 0x4307,
SourceAlphaSaturateGL = 0x4308,
ConstantColorGL = 0xc001,
OneMinusConstantColorGL = 0xc002,
ConstantAlphaGL = 0xc003,
OneMinusConstantAlphaGL = 0xc004,
Source1ColorGL = 0xc900,
OneMinusSource1ColorGL = 0xc901,
Source1AlphaGL = 0xc902,
OneMinusSource1AlphaGL = 0xc903,
};
struct {
u32 seperateAlpha;
Op colorOp;
Factor colorSrcFactor;
Factor colorDestFactor;
Op alphaOp;
Factor alphaSrcFactor;
Factor alphaDestFactor;
u32 _pad_;
};
};
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
enum class StencilOp : u32 {
Keep = 1,
Zero = 2,
Replace = 3,
IncrementAndClamp = 4,
DecrementAndClamp = 5,
Invert = 6,
IncrementAndWrap = 7,
DecrementAndWrap = 8,
};
enum class FrontFace : u32 {
Clockwise = 0x900,
CounterClockwise = 0x901,
};
enum class CullFace : u32 {
Front = 0x404,
Back = 0x405,
FrontAndBack = 0x408,
};
union ColorMask {
struct __attribute__((__packed__)) {
u8 r : 4;
u8 g : 4;
u8 b : 4;
u8 a : 4;
};
u32 raw;
};
static_assert(sizeof(ColorMask) == sizeof(u32));
struct __attribute__((__packed__)) SemaphoreInfo {
enum class Op : u8 {
Release = 0,
Acquire = 1,
Counter = 2,
Trap = 3
};
enum class ReductionOp : u8 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
};
enum class Unit : u8 {
VFetch = 1,
VP = 2,
Rast = 4,
StrmOut = 5,
GP = 6,
ZCull = 7,
Prop = 10,
Crop = 15,
};
enum class SyncCondition : u8 {
NotEqual = 0,
GreaterThan = 1,
};
enum class Format : u8 {
U32 = 0,
I32 = 1,
};
enum class CounterType : u8 {
Zero = 0x0,
InputVertices = 0x1,
InputPrimitives = 0x3,
VertexShaderInvocations = 0x5,
GeometryShaderInvocations = 0x7,
GeometryShaderPrimitives = 0x9,
ZcullStats0 = 0xa,
TransformFeedbackPrimitivesWritten = 0xb,
ZcullStats1 = 0xc,
ZcullStats2 = 0xe,
ClipperInputPrimitives = 0xf,
ZcullStats3 = 0x10,
ClipperOutputPrimitives = 0x11,
PrimitivesGenerated = 0x12,
FragmentShaderInvocations = 0x13,
SamplesPassed = 0x15,
TransformFeedbackOffset = 0x1a,
TessControlShaderInvocations = 0x1b,
TessEvaluationShaderInvocations = 0x1d,
TessEvaluationShaderPrimitives = 0x1f
};
enum class StructureSize : u8 {
FourWords = 0,
OneWord = 1,
};
Op op : 2;
bool flushDisable : 1;
bool reductionEnable : 1;
bool fenceEnable : 1;
u8 _pad0_ : 4;
ReductionOp reductionOp : 3;
Unit unit : 4;
SyncCondition syncCondition : 1;
Format format : 2;
u8 _pad1_ : 1;
bool awakenEnable : 1;
u8 _pad2_ : 2;
CounterType counterType : 5;
StructureSize structureSize : 1;
};
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
struct {
u32 _pad0_[0x40]; // 0x0
u32 noOperation; // 0x40
u32 _pad1_[0x3]; // 0x41
u32 waitForIdle; // 0x44
struct {
u32 instructionRamPointer; // 0x45
u32 instructionRamLoad; // 0x46
u32 startAddressRamPointer; // 0x47
u32 startAddressRamLoad; // 0x48
MmeShadowRamControl shadowRamControl; // 0x49
} mme;
u32 _pad2_[0x68]; // 0x4a
struct {
u16 id : 12;
u8 _pad0_ : 4;
bool flushCache : 1;
u8 _pad1_ : 3;
bool increment : 1;
u16 _pad2_ : 11;
} syncpointAction; // 0xb2
u32 _pad3_[0x2c]; // 0xb3
u32 rasterizerEnable; // 0xdf
u32 _pad4_[0x1a0]; // 0xe0
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
std::array<Viewport, 0x10> viewport; // 0x300
u32 _pad5_[0x2b]; // 0x340
struct {
PolygonMode front; // 0x36b
PolygonMode back; // 0x36c
} polygonMode;
u32 _pad6_[0x68]; // 0x36d
struct {
u32 funcRef; // 0x3d5
u32 mask; // 0x3d6
u32 funcMask; // 0x3d7
} stencilBackExtra;
u32 _pad7_[0x13]; // 0x3d8
u32 rtSeparateFragData; // 0x3eb
u32 _pad8_[0x6c]; // 0x3ec
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
u32 _pad9_[0x4b]; // 0x478
CompareOp depthTestFunc; // 0x4c3
float alphaTestRef; // 0x4c4
CompareOp alphaTestFunc; // 0x4c5
u32 drawTFBStride; // 0x4c6
struct {
float r; // 0x4c7
float g; // 0x4c8
float b; // 0x4c9
float a; // 0x4ca
} blendConstant;
u32 _pad10_[0x4]; // 0x4cb
struct {
u32 seperateAlpha; // 0x4cf
Blend::Op colorOp; // 0x4d0
Blend::Factor colorSrcFactor; // 0x4d1
Blend::Factor colorDestFactor; // 0x4d2
Blend::Op alphaOp; // 0x4d3
Blend::Factor alphaSrcFactor; // 0x4d4
u32 _pad_; // 0x4d5
Blend::Factor alphaDestFactor; // 0x4d6
u32 enableCommon; // 0x4d7
std::array<u32, 8> enable; // 0x4d8 For each render target
} blend;
u32 stencilEnable; // 0x4e0
struct {
StencilOp failOp; // 0x4e1
StencilOp zFailOp; // 0x4e2
StencilOp zPassOp; // 0x4e3
struct {
CompareOp op; // 0x4e4
i32 ref; // 0x4e5
u32 mask; // 0x4e6
} func;
u32 mask; // 0x4e7
} stencilFront;
u32 _pad11_[0x4]; // 0x4e8
float lineWidthSmooth; // 0x4ec
float lineWidthAliased; // 0x4d
u32 _pad12_[0x1f]; // 0x4ee
u32 drawBaseVertex; // 0x50d
u32 drawBaseInstance; // 0x50e
u32 _pad13_[0x35]; // 0x50f
u32 clipDistanceEnable; // 0x544
u32 sampleCounterEnable; // 0x545
float pointSpriteSize; // 0x546
u32 zCullStatCountersEnable; // 0x547
u32 pointSpriteEnable; // 0x548
u32 _pad14_; // 0x549
u32 shaderExceptions; // 0x54a
u32 _pad15_[0x2]; // 0x54b
u32 multisampleEnable; // 0x54d
u32 depthTargetEnable; // 0x54e
struct __attribute__((__packed__)) {
bool alphaToCoverage : 1;
u8 _pad0_ : 3;
bool alphaToOne : 1;
u32 _pad1_ : 27;
} multisampleControl; // 0x54f
u32 _pad16_[0x7]; // 0x550
struct {
Address address; // 0x557
u32 maximumIndex; // 0x559
} texSamplerPool;
u32 _pad17_; // 0x55a
u32 polygonOffsetFactor; // 0x55b
u32 lineSmoothEnable; // 0x55c
struct {
Address address; // 0x55d
u32 maximumIndex; // 0x55f
} texHeaderPool;
u32 _pad18_[0x5]; // 0x560
struct {
u32 stencilTwoSideEnable; // 0x565
StencilOp failOp; // 0x566
StencilOp zFailOp; // 0x567
StencilOp zPassOp; // 0x568
CompareOp funcOp; // 0x569
} stencilBack;
u32 _pad19_[0xdc]; // 0x56a
u32 cullFaceEnable; // 0x646
FrontFace frontFace; // 0x647
CullFace cullFace; // 0x648
u32 pixelCentreImage; // 0x649
u32 _pad20_[0x36]; // 0x64a
std::array<ColorMask, 8> colorMask; // 0x680 For each render target
u32 _pad21_[0x38]; // 0x688
struct {
Address address; // 0x6c0
u32 payload; // 0x6c2
SemaphoreInfo info; // 0x6c3
} semaphore;
u32 _pad22_[0xbc]; // 0x6c4
std::array<Blend, 8> independentBlend; // 0x780 For each render target
u32 _pad23_[0x100]; // 0x7c0
u32 firmwareCall[0x20]; // 0x8c0
};
std::array<u32, constant::Maxwell3DRegisterSize> raw;
};
static_assert(sizeof(Regs) == (constant::Maxwell3DRegisterSize * sizeof(u32)));
Regs regs{}; //!< The maxwell 3D register space
Regs shadowRegs{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
std::array<u32, 0x10000> macroCode{}; //!< This is used to store GPU macros, the 256kb size is from Ryujinx
Maxwell3D(const DeviceState &state);
/**
* @brief Resets the Maxwell 3D registers to their default values
*/
void ResetRegs();
void CallMethod(MethodParams params);
};
}
}

View File

@ -0,0 +1,201 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <kernel/types/KProcess.h>
#include "engines/maxwell_3d.h"
#include "memory_manager.h"
#include "macro_interpreter.h"
namespace skyline::gpu {
void MacroInterpreter::Execute(size_t offset, const std::vector<u32> &args) {
// Reset the interpreter state
registers = {};
carryFlag = false;
methodAddress.raw = 0;
opcode = reinterpret_cast<Opcode *>(&maxwell3D.macroCode[offset]);
argument = args.data();
// The first argument is stored in register 1
registers[1] = *argument++;
while (Step());
}
FORCE_INLINE bool MacroInterpreter::Step(Opcode *delayedOpcode) {
switch (opcode->operation) {
case Opcode::Operation::AluRegister: {
u32 result = HandleAlu(opcode->aluOperation, registers[opcode->srcA], registers[opcode->srcB]);
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::AddImmediate:
HandleAssignment(opcode->assignmentOperation, opcode->dest, registers[opcode->srcA] + opcode->immediate);
break;
case Opcode::Operation::BitfieldReplace: {
u32 src = registers[opcode->srcB];
u32 dest = registers[opcode->srcA];
// Extract the source region
src = (src >> opcode->bitfield.srcBit) & opcode->bitfield.GetMask();
// Mask out the bits that we will replace
dest &= ~(opcode->bitfield.GetMask() << opcode->bitfield.destBit);
// Replace the bitfield region in the destination with the region from the source
dest |= src << opcode->bitfield.destBit;
HandleAssignment(opcode->assignmentOperation, opcode->dest, dest);
break;
}
case Opcode::Operation::BitfieldExtractShiftLeftImmediate: {
u32 src = registers[opcode->srcB];
u32 dest = registers[opcode->srcA];
u32 result = ((src >> dest) & opcode->bitfield.GetMask()) << opcode->bitfield.destBit;
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::BitfieldExtractShiftLeftRegister: {
u32 src = registers[opcode->srcB];
u32 dest = registers[opcode->srcA];
u32 result = ((src >> opcode->bitfield.srcBit) & opcode->bitfield.GetMask()) << dest;
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::ReadImmediate: {
u32 result = maxwell3D.regs.raw[registers[opcode->srcA] + opcode->immediate];
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::Branch: {
if (delayedOpcode != nullptr)
throw exception("Cannot branch while inside a delay slot");
u32 value = registers[opcode->srcA];
bool branch = (opcode->branchCondition == Opcode::BranchCondition::Zero) ? (value == 0) : (value != 0);
if (branch) {
if (opcode->noDelay) {
opcode += opcode->immediate;
return true;
} else {
Opcode* targetOpcode = opcode + opcode->immediate;
// Step into delay slot
opcode++;
return Step(targetOpcode);
}
}
break;
}
}
if (opcode->exit && (delayedOpcode == nullptr)) {
// Exit has a delay slot
opcode++;
Step(opcode);
return false;
}
if (delayedOpcode != nullptr)
opcode = delayedOpcode;
else
opcode++;
return true;
}
FORCE_INLINE u32 MacroInterpreter::HandleAlu(Opcode::AluOperation operation, u32 srcA, u32 srcB) {
switch (operation) {
case Opcode::AluOperation::Add: {
u64 result = static_cast<u64>(srcA) + srcB;
carryFlag = result >> 32;
return static_cast<u32>(result);
}
case Opcode::AluOperation::AddWithCarry: {
u64 result = static_cast<u64>(srcA) + srcB + carryFlag;
carryFlag = result >> 32;
return static_cast<u32>(result);
}
case Opcode::AluOperation::Subtract: {
u64 result = static_cast<u64>(srcA) - srcB;
carryFlag = result & 0xffffffff;
return static_cast<u32>(result);
}
case Opcode::AluOperation::SubtractWithBorrow: {
u64 result = static_cast<u64>(srcA) - srcB - !carryFlag;
carryFlag = result & 0xffffffff;
return static_cast<u32>(result);
}
case Opcode::AluOperation::BitwiseXor:
return srcA ^ srcB;
case Opcode::AluOperation::BitwiseOr:
return srcA | srcB;
case Opcode::AluOperation::BitwiseAnd:
return srcA & srcB;
case Opcode::AluOperation::BitwiseAndNot:
return srcA & ~srcB;
case Opcode::AluOperation::BitwiseNand:
return ~(srcA & srcB);
}
}
FORCE_INLINE void MacroInterpreter::HandleAssignment(Opcode::AssignmentOperation operation, u8 reg, u32 result) {
switch (operation) {
case Opcode::AssignmentOperation::IgnoreAndFetch:
WriteRegister(reg, *argument++);
break;
case Opcode::AssignmentOperation::Move:
WriteRegister(reg, result);
break;
case Opcode::AssignmentOperation::MoveAndSetMethod:
WriteRegister(reg, result);
methodAddress.raw = result;
break;
case Opcode::AssignmentOperation::FetchAndSend:
WriteRegister(reg, *argument++);
Send(result);
break;
case Opcode::AssignmentOperation::MoveAndSend:
WriteRegister(reg, result);
Send(result);
break;
case Opcode::AssignmentOperation::FetchAndSetMethod:
WriteRegister(reg, *argument++);
methodAddress.raw = result;
break;
case Opcode::AssignmentOperation::MoveAndSetMethodThenFetchAndSend:
WriteRegister(reg, result);
methodAddress.raw = result;
Send(*argument++);
break;
case Opcode::AssignmentOperation::MoveAndSetMethodThenSendHigh:
WriteRegister(reg, result);
methodAddress.raw = result;
Send(methodAddress.increment);
break;
}
}
FORCE_INLINE void MacroInterpreter::Send(u32 argument) {
maxwell3D.CallMethod(MethodParams{methodAddress.address, argument, 0, true});
methodAddress.address += methodAddress.increment;
}
FORCE_INLINE void MacroInterpreter::WriteRegister(u8 reg, u32 value) {
// Register 0 should always be zero so block writes to it
if (reg == 0)
return;
registers[reg] = value;
}
}

View File

@ -0,0 +1,147 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::gpu {
namespace engine {
class Maxwell3D;
}
class MacroInterpreter {
private:
/**
* @brief This holds a single macro opcode
*/
union Opcode {
enum class Operation : u8 {
AluRegister = 0,
AddImmediate = 1,
BitfieldReplace = 2,
BitfieldExtractShiftLeftImmediate = 3,
BitfieldExtractShiftLeftRegister = 4,
ReadImmediate = 5,
Branch = 7,
};
enum class AssignmentOperation : u8 {
IgnoreAndFetch = 0,
Move = 1,
MoveAndSetMethod = 2,
FetchAndSend = 3,
MoveAndSend = 4,
FetchAndSetMethod = 5,
MoveAndSetMethodThenFetchAndSend = 6,
MoveAndSetMethodThenSendHigh = 7,
};
enum class AluOperation : u8 {
Add = 0,
AddWithCarry = 1,
Subtract = 2,
SubtractWithBorrow = 3,
BitwiseXor = 8,
BitwiseOr = 9,
BitwiseAnd = 10,
BitwiseAndNot = 11,
BitwiseNand = 12,
};
enum class BranchCondition : u8 {
Zero = 0,
NonZero = 1,
};
struct __attribute__((__packed__)) {
Operation operation : 3;
u8 _pad0_ : 1;
AssignmentOperation assignmentOperation : 3;
};
struct __attribute__((__packed__)) {
u8 _pad1_ : 4;
BranchCondition branchCondition : 1;
u8 noDelay : 1;
u8 _pad2_ : 1;
u8 exit : 1;
u8 dest : 3;
u8 srcA : 3;
u8 srcB : 3;
AluOperation aluOperation : 5;
};
struct __attribute__((__packed__)) {
u16 _pad3_ : 14;
i32 immediate : 18;
};
struct __attribute__((__packed__)) {
u32 _pad_ : 17;
u8 srcBit : 5;
u8 size : 5;
u8 destBit : 5;
u32 GetMask() {
return (1 << size) - 1;
}
} bitfield;
u32 raw;
};
static_assert(sizeof(Opcode) == sizeof(u32));
/**
* @brief This holds information about the Maxwell 3D method to be called in 'Send'
*/
union MethodAddress {
struct {
u16 address : 12;
u8 increment : 6;
};
u32 raw;
};
engine::Maxwell3D &maxwell3D;
std::array<u32, 8> registers{};
Opcode *opcode{};
const u32 *argument{};
MethodAddress methodAddress{};
bool carryFlag{};
/**
* @brief Steps forward one macro instruction, including delay slots
* @param delayedOpcode The target opcode to be jumped to after executing the instruction
*/
bool Step(Opcode *delayedOpcode = nullptr);
/**
* @brief Performs an ALU operation on the given source values and returns the result as a u32
*/
u32 HandleAlu(Opcode::AluOperation operation, u32 srcA, u32 srcB);
/**
* @brief Handles an opcode's assignment operation
*/
void HandleAssignment(Opcode::AssignmentOperation operation, u8 reg, u32 result);
/**
* @brief Sends a method call to the Maxwell 3D
*/
void Send(u32 argument);
void WriteRegister(u8 reg, u32 value);
public:
MacroInterpreter(engine::Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
/**
* @brief Executes a GPU macro from macro memory with the given arguments
*/
void Execute(size_t offset, const std::vector<u32> &args);
};
}

View File

@ -9,8 +9,6 @@
#include <asm/unistd.h>
#include "guest_common.h"
#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
namespace skyline::guest {
FORCE_INLINE void SaveCtxStack() {
asm("SUB SP, SP, #240\n\t"

View File

@ -5,6 +5,8 @@
#include <cstdint>
#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
namespace skyline {
using u128 = __uint128_t; //!< Unsigned 128-bit integer
using u64 = __uint64_t; //!< Unsigned 64-bit integer