mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-22 21:01:14 +01:00
Implement the basis of the Maxwell 3D engine together with a macro
interpreter. The Maxwell 3D engine handles all 3D rendering, currently only non rendering related methods are implemented. Macros are small pieces of code that run on the GPU and allow methods to be quickly called for things like instanced drawing.
This commit is contained in:
parent
68d5a48df1
commit
7503496bb0
172
app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp
Normal file
172
app/src/main/cpp/skyline/gpu/engines/maxwell_3d.cpp
Normal file
@ -0,0 +1,172 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <gpu.h>
|
||||
#include <gpu/syncpoint.h>
|
||||
#include "maxwell_3d.h"
|
||||
|
||||
namespace skyline::gpu::engine {
|
||||
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) {
|
||||
ResetRegs();
|
||||
}
|
||||
|
||||
void Maxwell3D::ResetRegs() {
|
||||
memset(®s, 0, sizeof(regs));
|
||||
|
||||
regs.rasterizerEnable = true;
|
||||
|
||||
for (auto &transform : regs.viewportTransform) {
|
||||
transform.swizzles.x = Regs::ViewportTransform::Swizzle::PositiveX;
|
||||
transform.swizzles.y = Regs::ViewportTransform::Swizzle::PositiveY;
|
||||
transform.swizzles.z = Regs::ViewportTransform::Swizzle::PositiveZ;
|
||||
transform.swizzles.w = Regs::ViewportTransform::Swizzle::PositiveW;
|
||||
}
|
||||
|
||||
for (auto &viewport : regs.viewport) {
|
||||
viewport.depthRangeFar = 1.0f;
|
||||
viewport.depthRangeNear = 0.0f;
|
||||
}
|
||||
|
||||
regs.polygonMode.front = Regs::PolygonMode::Fill;
|
||||
regs.polygonMode.back = Regs::PolygonMode::Fill;
|
||||
|
||||
regs.stencilFront.failOp = regs.stencilFront.zFailOp = regs.stencilFront.zPassOp = Regs::StencilOp::Keep;
|
||||
regs.stencilFront.func.op = Regs::CompareOp::Always;
|
||||
regs.stencilFront.func.mask = 0xFFFFFFFF;
|
||||
regs.stencilFront.mask = 0xFFFFFFFF;
|
||||
|
||||
regs.stencilBack.stencilTwoSideEnable = true;
|
||||
regs.stencilBack.failOp = regs.stencilBack.zFailOp = regs.stencilBack.zPassOp = Regs::StencilOp::Keep;
|
||||
regs.stencilBack.funcOp = Regs::CompareOp::Always;
|
||||
regs.stencilBackExtra.funcMask = 0xFFFFFFFF;
|
||||
regs.stencilBackExtra.mask = 0xFFFFFFFF;
|
||||
|
||||
regs.rtSeparateFragData = true;
|
||||
|
||||
for (auto &attribute : regs.vertexAttributeState)
|
||||
attribute.fixed = true;
|
||||
|
||||
regs.depthTestFunc = Regs::CompareOp::Always;
|
||||
|
||||
regs.blend.colorOp = regs.blend.alphaOp = Regs::Blend::Op::Add;
|
||||
regs.blend.colorSrcFactor = regs.blend.alphaSrcFactor = Regs::Blend::Factor::One;
|
||||
regs.blend.colorDestFactor = regs.blend.alphaDestFactor = Regs::Blend::Factor::Zero;
|
||||
|
||||
regs.lineWidthSmooth = 1.0f;
|
||||
regs.lineWidthAliased = 1.0f;
|
||||
|
||||
regs.pointSpriteSize = 1.0f;
|
||||
|
||||
regs.frontFace = Regs::FrontFace::CounterClockwise;
|
||||
regs.cullFace = Regs::CullFace::Back;
|
||||
|
||||
for (auto &mask : regs.colorMask)
|
||||
mask.r = mask.g = mask.b = mask.a = 1;
|
||||
|
||||
for (auto &blend : regs.independentBlend) {
|
||||
blend.colorOp = blend.alphaOp = Regs::Blend::Op::Add;
|
||||
blend.colorSrcFactor = blend.alphaSrcFactor = Regs::Blend::Factor::One;
|
||||
blend.colorDestFactor = blend.alphaDestFactor = Regs::Blend::Factor::Zero;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMethod(MethodParams params) {
|
||||
state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", params.method, params.argument);
|
||||
|
||||
// Methods that are greater than the register size are for macro control
|
||||
if (params.method > constant::Maxwell3DRegisterSize) {
|
||||
if (!(params.method & 1))
|
||||
macroInvocation.index = ((params.method - constant::Maxwell3DRegisterSize) >> 1) % macroPositions.size();
|
||||
|
||||
macroInvocation.arguments.push_back(params.argument);
|
||||
|
||||
// Macros are always executed on the last method call in a pushbuffer entry
|
||||
if (params.lastCall) {
|
||||
macroInterpreter.Execute(macroPositions[macroInvocation.index], macroInvocation.arguments);
|
||||
|
||||
macroInvocation.arguments.clear();
|
||||
macroInvocation.index = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
regs.raw[params.method] = params.argument;
|
||||
|
||||
if (shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodTrack || shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodTrackWithFilter)
|
||||
shadowRegs.raw[params.method] = params.argument;
|
||||
else if (shadowRegs.mme.shadowRamControl == Regs::MmeShadowRamControl::MethodReplay)
|
||||
params.argument = shadowRegs.raw[params.method];
|
||||
|
||||
switch (params.method) {
|
||||
case MAXWELL3D_OFFSET(mme.instructionRamLoad):
|
||||
if (regs.mme.instructionRamPointer >= macroCode.size())
|
||||
throw exception("Macro memory is full!");
|
||||
|
||||
macroCode[regs.mme.instructionRamPointer++] = params.argument;
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(mme.startAddressRamLoad):
|
||||
if (regs.mme.startAddressRamPointer >= macroPositions.size())
|
||||
throw exception("Maximum amount of macros reached!");
|
||||
|
||||
macroPositions[regs.mme.startAddressRamPointer++] = params.argument;
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(mme.shadowRamControl):
|
||||
shadowRegs.mme.shadowRamControl = static_cast<Regs::MmeShadowRamControl>(params.argument);
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(syncpointAction):
|
||||
state.gpu->syncpoints.at(regs.syncpointAction.id).Increment();
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(semaphore.info):
|
||||
switch (regs.semaphore.info.op) {
|
||||
case Regs::SemaphoreInfo::Op::Release:
|
||||
WriteSemaphoreResult(regs.semaphore.payload);
|
||||
break;
|
||||
case Regs::SemaphoreInfo::Op::Counter:
|
||||
HandleSemaphoreCounterOperation();
|
||||
break;
|
||||
default:
|
||||
state.logger->Warn("Unsupported semaphore operation: 0x{:X}", static_cast<u8>(regs.semaphore.info.op));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case MAXWELL3D_OFFSET(firmwareCall[4]):
|
||||
regs.raw[0xd00] = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::HandleSemaphoreCounterOperation() {
|
||||
switch (regs.semaphore.info.counterType) {
|
||||
case Regs::SemaphoreInfo::CounterType::Zero:
|
||||
WriteSemaphoreResult(0);
|
||||
break;
|
||||
default:
|
||||
state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast<u8>(regs.semaphore.info.counterType));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::WriteSemaphoreResult(u64 result) {
|
||||
struct FourWordResult {
|
||||
u64 value;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
switch (regs.semaphore.info.structureSize) {
|
||||
case Regs::SemaphoreInfo::StructureSize::OneWord:
|
||||
state.gpu->memoryManager.Write<u32>(static_cast<u32>(result), regs.semaphore.address.Pack());
|
||||
break;
|
||||
case Regs::SemaphoreInfo::StructureSize::FourWords: {
|
||||
// Convert the current nanosecond time to GPU ticks
|
||||
constexpr u64 NsToTickNumerator = 384;
|
||||
constexpr u64 NsToTickDenominator = 625;
|
||||
|
||||
u64 nsTime = util::GetTimeNs();
|
||||
u64 timestamp = (nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator;
|
||||
|
||||
state.gpu->memoryManager.Write<FourWordResult>(FourWordResult{result, timestamp}, regs.semaphore.address.Pack());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
560
app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h
Normal file
560
app/src/main/cpp/skyline/gpu/engines/maxwell_3d.h
Normal file
@ -0,0 +1,560 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <common.h>
|
||||
#include <gpu/texture.h>
|
||||
#include <gpu/macro_interpreter.h>
|
||||
#include "engine.h"
|
||||
|
||||
#define MAXWELL3D_OFFSET(field) U32_OFFSET(skyline::gpu::engine::Maxwell3D::Regs, field)
|
||||
|
||||
namespace skyline {
|
||||
namespace constant {
|
||||
constexpr u32 Maxwell3DRegisterSize = 0xe00; //!< The size of the GPFIFO's register space in units of u32
|
||||
}
|
||||
|
||||
namespace gpu::engine {
|
||||
/**
|
||||
* @brief The Maxwell 3D engine handles processing 3D graphics
|
||||
*/
|
||||
class Maxwell3D : public Engine {
|
||||
private:
|
||||
std::array<size_t, 0x80> macroPositions{}; //!< This holds the positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
|
||||
|
||||
struct {
|
||||
u32 index;
|
||||
std::vector<u32> arguments;
|
||||
} macroInvocation{}; //!< This hold the index and arguments of the macro that is pending execution
|
||||
|
||||
MacroInterpreter macroInterpreter;
|
||||
|
||||
void HandleSemaphoreCounterOperation();
|
||||
|
||||
void WriteSemaphoreResult(u64 result);
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief This holds the Maxwell3D engine's register space
|
||||
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
|
||||
*/
|
||||
union Regs {
|
||||
struct Address {
|
||||
u32 high;
|
||||
u32 low;
|
||||
|
||||
u64 Pack() {
|
||||
return (static_cast<u64>(high) << 32) | low;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Address) == sizeof(u64));
|
||||
|
||||
enum class MmeShadowRamControl : u32 {
|
||||
MethodTrack = 0,
|
||||
MethodTrackWithFilter = 1,
|
||||
MethodPassthrough = 2,
|
||||
MethodReplay = 3
|
||||
};
|
||||
|
||||
struct ViewportTransform {
|
||||
enum class Swizzle : u8 {
|
||||
PositiveX = 0,
|
||||
NegativeX = 1,
|
||||
PositiveY = 2,
|
||||
NegativeY = 3,
|
||||
PositiveZ = 4,
|
||||
NegativeZ = 5,
|
||||
PositiveW = 6,
|
||||
NegativeW = 7,
|
||||
};
|
||||
|
||||
float scaleX;
|
||||
float scaleY;
|
||||
float scaleZ;
|
||||
float translateX;
|
||||
float translateY;
|
||||
float translateZ;
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
Swizzle x : 3;
|
||||
u8 _pad0_ : 1;
|
||||
Swizzle y : 3;
|
||||
u8 _pad1_ : 1;
|
||||
Swizzle z : 3;
|
||||
u8 _pad2_ : 1;
|
||||
Swizzle w : 3;
|
||||
u32 _pad3_ : 17;
|
||||
} swizzles;
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
u8 x : 5;
|
||||
u8 _pad0_ : 3;
|
||||
u8 y : 5;
|
||||
u32 _pad1_ : 19;
|
||||
} subpixelPrecisionBias;
|
||||
};
|
||||
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
|
||||
|
||||
struct Viewport {
|
||||
struct {
|
||||
u16 x;
|
||||
u16 width;
|
||||
};
|
||||
|
||||
struct {
|
||||
u16 y;
|
||||
u16 height;
|
||||
};
|
||||
|
||||
float depthRangeNear;
|
||||
float depthRangeFar;
|
||||
};
|
||||
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
|
||||
|
||||
enum class PolygonMode : u32 {
|
||||
Point = 0x1b00,
|
||||
Line = 0x1b01,
|
||||
Fill = 0x1b02,
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
enum class Size : u8 {
|
||||
Size_1x32 = 0x12,
|
||||
Size_2x32 = 0x04,
|
||||
Size_3x32 = 0x02,
|
||||
Size_4x32 = 0x01,
|
||||
Size_1x16 = 0x1b,
|
||||
Size_2x16 = 0x0f,
|
||||
Size_3x16 = 0x05,
|
||||
Size_4x16 = 0x03,
|
||||
Size_1x8 = 0x1d,
|
||||
Size_2x8 = 0x18,
|
||||
Size_3x8 = 0x13,
|
||||
Size_4x8 = 0x0a,
|
||||
Size_10_10_10_2 = 0x30,
|
||||
Size_11_11_10 = 0x31,
|
||||
};
|
||||
|
||||
enum class Type : u8 {
|
||||
None = 0,
|
||||
SNorm = 1,
|
||||
UNorm = 2,
|
||||
SInt = 3,
|
||||
UInt = 4,
|
||||
UScaled = 5,
|
||||
SScaled = 6,
|
||||
Float = 7,
|
||||
};
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
u8 bufferId : 5;
|
||||
u8 _pad0_ : 1;
|
||||
bool fixed : 1;
|
||||
u16 offset : 14;
|
||||
Size size : 6;
|
||||
Type type : 3;
|
||||
u8 _pad1_ : 1;
|
||||
bool bgra : 1;
|
||||
};
|
||||
|
||||
u32 raw;
|
||||
};
|
||||
static_assert(sizeof(VertexAttribute) == sizeof(u32));
|
||||
|
||||
enum class CompareOp : u32 {
|
||||
Never = 1,
|
||||
Less = 2,
|
||||
Equal = 3,
|
||||
LessOrEqual = 4,
|
||||
Greater = 5,
|
||||
NotEqual = 6,
|
||||
GreaterOrEqual = 7,
|
||||
Always = 8,
|
||||
|
||||
NeverGL = 0x200,
|
||||
LessGL = 0x201,
|
||||
EqualGL = 0x202,
|
||||
LessOrEqualGL = 0x203,
|
||||
GreaterGL = 0x204,
|
||||
NotEqualGL = 0x205,
|
||||
GreaterOrEqualGL = 0x206,
|
||||
AlwaysGL = 0x207,
|
||||
};
|
||||
|
||||
struct Blend {
|
||||
enum class Op : u32 {
|
||||
Add = 1,
|
||||
Subtract = 2,
|
||||
ReverseSubtract = 3,
|
||||
Minimum = 4,
|
||||
Maximum = 5,
|
||||
|
||||
AddGL = 0x8006,
|
||||
SubtractGL = 0x8007,
|
||||
ReverseSubtractGL = 0x8008,
|
||||
MinimumGL = 0x800a,
|
||||
MaximumGL = 0x800b
|
||||
};
|
||||
|
||||
enum class Factor : u32 {
|
||||
Zero = 0x1,
|
||||
One = 0x2,
|
||||
SourceColor = 0x3,
|
||||
OneMinusSourceColor = 0x4,
|
||||
SourceAlpha = 0x5,
|
||||
OneMinusSourceAlpha = 0x6,
|
||||
DestAlpha = 0x7,
|
||||
OneMinusDestAlpha = 0x8,
|
||||
DestColor = 0x9,
|
||||
OneMinusDestColor = 0xa,
|
||||
SourceAlphaSaturate = 0xb,
|
||||
Source1Color = 0x10,
|
||||
OneMinusSource1Color = 0x11,
|
||||
Source1Alpha = 0x12,
|
||||
OneMinusSource1Alpha = 0x13,
|
||||
ConstantColor = 0x61,
|
||||
OneMinusConstantColor = 0x62,
|
||||
ConstantAlpha = 0x63,
|
||||
OneMinusConstantAlpha = 0x64,
|
||||
|
||||
ZeroGL = 0x4000,
|
||||
OneGL = 0x4001,
|
||||
SourceColorGL = 0x4300,
|
||||
OneMinusSourceColorGL = 0x4301,
|
||||
SourceAlphaGL = 0x4302,
|
||||
OneMinusSourceAlphaGL = 0x4303,
|
||||
DestAlphaGL = 0x4304,
|
||||
OneMinusDestAlphaGL = 0x4305,
|
||||
DestColorGL = 0x4306,
|
||||
OneMinusDestColorGL = 0x4307,
|
||||
SourceAlphaSaturateGL = 0x4308,
|
||||
ConstantColorGL = 0xc001,
|
||||
OneMinusConstantColorGL = 0xc002,
|
||||
ConstantAlphaGL = 0xc003,
|
||||
OneMinusConstantAlphaGL = 0xc004,
|
||||
Source1ColorGL = 0xc900,
|
||||
OneMinusSource1ColorGL = 0xc901,
|
||||
Source1AlphaGL = 0xc902,
|
||||
OneMinusSource1AlphaGL = 0xc903,
|
||||
};
|
||||
|
||||
struct {
|
||||
u32 seperateAlpha;
|
||||
Op colorOp;
|
||||
Factor colorSrcFactor;
|
||||
Factor colorDestFactor;
|
||||
Op alphaOp;
|
||||
Factor alphaSrcFactor;
|
||||
Factor alphaDestFactor;
|
||||
u32 _pad_;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
|
||||
|
||||
enum class StencilOp : u32 {
|
||||
Keep = 1,
|
||||
Zero = 2,
|
||||
Replace = 3,
|
||||
IncrementAndClamp = 4,
|
||||
DecrementAndClamp = 5,
|
||||
Invert = 6,
|
||||
IncrementAndWrap = 7,
|
||||
DecrementAndWrap = 8,
|
||||
};
|
||||
|
||||
enum class FrontFace : u32 {
|
||||
Clockwise = 0x900,
|
||||
CounterClockwise = 0x901,
|
||||
};
|
||||
|
||||
enum class CullFace : u32 {
|
||||
Front = 0x404,
|
||||
Back = 0x405,
|
||||
FrontAndBack = 0x408,
|
||||
};
|
||||
|
||||
union ColorMask {
|
||||
struct __attribute__((__packed__)) {
|
||||
u8 r : 4;
|
||||
u8 g : 4;
|
||||
u8 b : 4;
|
||||
u8 a : 4;
|
||||
};
|
||||
|
||||
u32 raw;
|
||||
};
|
||||
static_assert(sizeof(ColorMask) == sizeof(u32));
|
||||
|
||||
struct __attribute__((__packed__)) SemaphoreInfo {
|
||||
enum class Op : u8 {
|
||||
Release = 0,
|
||||
Acquire = 1,
|
||||
Counter = 2,
|
||||
Trap = 3
|
||||
};
|
||||
|
||||
enum class ReductionOp : u8 {
|
||||
Add = 0,
|
||||
Min = 1,
|
||||
Max = 2,
|
||||
Inc = 3,
|
||||
Dec = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Xor = 7,
|
||||
};
|
||||
|
||||
enum class Unit : u8 {
|
||||
VFetch = 1,
|
||||
VP = 2,
|
||||
Rast = 4,
|
||||
StrmOut = 5,
|
||||
GP = 6,
|
||||
ZCull = 7,
|
||||
Prop = 10,
|
||||
Crop = 15,
|
||||
};
|
||||
|
||||
enum class SyncCondition : u8 {
|
||||
NotEqual = 0,
|
||||
GreaterThan = 1,
|
||||
};
|
||||
|
||||
enum class Format : u8 {
|
||||
U32 = 0,
|
||||
I32 = 1,
|
||||
};
|
||||
|
||||
enum class CounterType : u8 {
|
||||
Zero = 0x0,
|
||||
InputVertices = 0x1,
|
||||
InputPrimitives = 0x3,
|
||||
VertexShaderInvocations = 0x5,
|
||||
GeometryShaderInvocations = 0x7,
|
||||
GeometryShaderPrimitives = 0x9,
|
||||
ZcullStats0 = 0xa,
|
||||
TransformFeedbackPrimitivesWritten = 0xb,
|
||||
ZcullStats1 = 0xc,
|
||||
ZcullStats2 = 0xe,
|
||||
ClipperInputPrimitives = 0xf,
|
||||
ZcullStats3 = 0x10,
|
||||
ClipperOutputPrimitives = 0x11,
|
||||
PrimitivesGenerated = 0x12,
|
||||
FragmentShaderInvocations = 0x13,
|
||||
SamplesPassed = 0x15,
|
||||
TransformFeedbackOffset = 0x1a,
|
||||
TessControlShaderInvocations = 0x1b,
|
||||
TessEvaluationShaderInvocations = 0x1d,
|
||||
TessEvaluationShaderPrimitives = 0x1f
|
||||
};
|
||||
|
||||
enum class StructureSize : u8 {
|
||||
FourWords = 0,
|
||||
OneWord = 1,
|
||||
};
|
||||
|
||||
Op op : 2;
|
||||
bool flushDisable : 1;
|
||||
bool reductionEnable : 1;
|
||||
bool fenceEnable : 1;
|
||||
u8 _pad0_ : 4;
|
||||
ReductionOp reductionOp : 3;
|
||||
Unit unit : 4;
|
||||
SyncCondition syncCondition : 1;
|
||||
Format format : 2;
|
||||
u8 _pad1_ : 1;
|
||||
bool awakenEnable : 1;
|
||||
u8 _pad2_ : 2;
|
||||
CounterType counterType : 5;
|
||||
StructureSize structureSize : 1;
|
||||
};
|
||||
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
|
||||
|
||||
struct {
|
||||
u32 _pad0_[0x40]; // 0x0
|
||||
u32 noOperation; // 0x40
|
||||
u32 _pad1_[0x3]; // 0x41
|
||||
u32 waitForIdle; // 0x44
|
||||
|
||||
struct {
|
||||
u32 instructionRamPointer; // 0x45
|
||||
u32 instructionRamLoad; // 0x46
|
||||
u32 startAddressRamPointer; // 0x47
|
||||
u32 startAddressRamLoad; // 0x48
|
||||
MmeShadowRamControl shadowRamControl; // 0x49
|
||||
} mme;
|
||||
|
||||
u32 _pad2_[0x68]; // 0x4a
|
||||
|
||||
struct {
|
||||
u16 id : 12;
|
||||
u8 _pad0_ : 4;
|
||||
bool flushCache : 1;
|
||||
u8 _pad1_ : 3;
|
||||
bool increment : 1;
|
||||
u16 _pad2_ : 11;
|
||||
} syncpointAction; // 0xb2
|
||||
|
||||
u32 _pad3_[0x2c]; // 0xb3
|
||||
u32 rasterizerEnable; // 0xdf
|
||||
u32 _pad4_[0x1a0]; // 0xe0
|
||||
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
|
||||
std::array<Viewport, 0x10> viewport; // 0x300
|
||||
u32 _pad5_[0x2b]; // 0x340
|
||||
|
||||
struct {
|
||||
PolygonMode front; // 0x36b
|
||||
PolygonMode back; // 0x36c
|
||||
} polygonMode;
|
||||
|
||||
u32 _pad6_[0x68]; // 0x36d
|
||||
|
||||
struct {
|
||||
u32 funcRef; // 0x3d5
|
||||
u32 mask; // 0x3d6
|
||||
u32 funcMask; // 0x3d7
|
||||
} stencilBackExtra;
|
||||
|
||||
u32 _pad7_[0x13]; // 0x3d8
|
||||
u32 rtSeparateFragData; // 0x3eb
|
||||
u32 _pad8_[0x6c]; // 0x3ec
|
||||
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
|
||||
u32 _pad9_[0x4b]; // 0x478
|
||||
CompareOp depthTestFunc; // 0x4c3
|
||||
float alphaTestRef; // 0x4c4
|
||||
CompareOp alphaTestFunc; // 0x4c5
|
||||
u32 drawTFBStride; // 0x4c6
|
||||
|
||||
struct {
|
||||
float r; // 0x4c7
|
||||
float g; // 0x4c8
|
||||
float b; // 0x4c9
|
||||
float a; // 0x4ca
|
||||
} blendConstant;
|
||||
|
||||
u32 _pad10_[0x4]; // 0x4cb
|
||||
|
||||
struct {
|
||||
u32 seperateAlpha; // 0x4cf
|
||||
Blend::Op colorOp; // 0x4d0
|
||||
Blend::Factor colorSrcFactor; // 0x4d1
|
||||
Blend::Factor colorDestFactor; // 0x4d2
|
||||
Blend::Op alphaOp; // 0x4d3
|
||||
Blend::Factor alphaSrcFactor; // 0x4d4
|
||||
u32 _pad_; // 0x4d5
|
||||
Blend::Factor alphaDestFactor; // 0x4d6
|
||||
|
||||
u32 enableCommon; // 0x4d7
|
||||
std::array<u32, 8> enable; // 0x4d8 For each render target
|
||||
} blend;
|
||||
|
||||
u32 stencilEnable; // 0x4e0
|
||||
|
||||
struct {
|
||||
StencilOp failOp; // 0x4e1
|
||||
StencilOp zFailOp; // 0x4e2
|
||||
StencilOp zPassOp; // 0x4e3
|
||||
|
||||
struct {
|
||||
CompareOp op; // 0x4e4
|
||||
i32 ref; // 0x4e5
|
||||
u32 mask; // 0x4e6
|
||||
} func;
|
||||
|
||||
u32 mask; // 0x4e7
|
||||
} stencilFront;
|
||||
|
||||
u32 _pad11_[0x4]; // 0x4e8
|
||||
float lineWidthSmooth; // 0x4ec
|
||||
float lineWidthAliased; // 0x4d
|
||||
u32 _pad12_[0x1f]; // 0x4ee
|
||||
u32 drawBaseVertex; // 0x50d
|
||||
u32 drawBaseInstance; // 0x50e
|
||||
u32 _pad13_[0x35]; // 0x50f
|
||||
u32 clipDistanceEnable; // 0x544
|
||||
u32 sampleCounterEnable; // 0x545
|
||||
float pointSpriteSize; // 0x546
|
||||
u32 zCullStatCountersEnable; // 0x547
|
||||
u32 pointSpriteEnable; // 0x548
|
||||
u32 _pad14_; // 0x549
|
||||
u32 shaderExceptions; // 0x54a
|
||||
u32 _pad15_[0x2]; // 0x54b
|
||||
u32 multisampleEnable; // 0x54d
|
||||
u32 depthTargetEnable; // 0x54e
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
bool alphaToCoverage : 1;
|
||||
u8 _pad0_ : 3;
|
||||
bool alphaToOne : 1;
|
||||
u32 _pad1_ : 27;
|
||||
} multisampleControl; // 0x54f
|
||||
|
||||
u32 _pad16_[0x7]; // 0x550
|
||||
|
||||
struct {
|
||||
Address address; // 0x557
|
||||
u32 maximumIndex; // 0x559
|
||||
} texSamplerPool;
|
||||
|
||||
u32 _pad17_; // 0x55a
|
||||
u32 polygonOffsetFactor; // 0x55b
|
||||
u32 lineSmoothEnable; // 0x55c
|
||||
|
||||
struct {
|
||||
Address address; // 0x55d
|
||||
u32 maximumIndex; // 0x55f
|
||||
} texHeaderPool;
|
||||
|
||||
u32 _pad18_[0x5]; // 0x560
|
||||
|
||||
struct {
|
||||
u32 stencilTwoSideEnable; // 0x565
|
||||
StencilOp failOp; // 0x566
|
||||
StencilOp zFailOp; // 0x567
|
||||
StencilOp zPassOp; // 0x568
|
||||
CompareOp funcOp; // 0x569
|
||||
} stencilBack;
|
||||
|
||||
u32 _pad19_[0xdc]; // 0x56a
|
||||
u32 cullFaceEnable; // 0x646
|
||||
FrontFace frontFace; // 0x647
|
||||
CullFace cullFace; // 0x648
|
||||
u32 pixelCentreImage; // 0x649
|
||||
u32 _pad20_[0x36]; // 0x64a
|
||||
std::array<ColorMask, 8> colorMask; // 0x680 For each render target
|
||||
u32 _pad21_[0x38]; // 0x688
|
||||
|
||||
struct {
|
||||
Address address; // 0x6c0
|
||||
u32 payload; // 0x6c2
|
||||
SemaphoreInfo info; // 0x6c3
|
||||
} semaphore;
|
||||
|
||||
u32 _pad22_[0xbc]; // 0x6c4
|
||||
std::array<Blend, 8> independentBlend; // 0x780 For each render target
|
||||
u32 _pad23_[0x100]; // 0x7c0
|
||||
u32 firmwareCall[0x20]; // 0x8c0
|
||||
};
|
||||
|
||||
std::array<u32, constant::Maxwell3DRegisterSize> raw;
|
||||
};
|
||||
static_assert(sizeof(Regs) == (constant::Maxwell3DRegisterSize * sizeof(u32)));
|
||||
|
||||
Regs regs{}; //!< The maxwell 3D register space
|
||||
Regs shadowRegs{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
|
||||
|
||||
std::array<u32, 0x10000> macroCode{}; //!< This is used to store GPU macros, the 256kb size is from Ryujinx
|
||||
|
||||
Maxwell3D(const DeviceState &state);
|
||||
|
||||
/**
|
||||
* @brief Resets the Maxwell 3D registers to their default values
|
||||
*/
|
||||
void ResetRegs();
|
||||
|
||||
void CallMethod(MethodParams params);
|
||||
};
|
||||
}
|
||||
}
|
201
app/src/main/cpp/skyline/gpu/macro_interpreter.cpp
Normal file
201
app/src/main/cpp/skyline/gpu/macro_interpreter.cpp
Normal file
@ -0,0 +1,201 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include "engines/maxwell_3d.h"
|
||||
#include "memory_manager.h"
|
||||
#include "macro_interpreter.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
void MacroInterpreter::Execute(size_t offset, const std::vector<u32> &args) {
|
||||
// Reset the interpreter state
|
||||
registers = {};
|
||||
carryFlag = false;
|
||||
methodAddress.raw = 0;
|
||||
opcode = reinterpret_cast<Opcode *>(&maxwell3D.macroCode[offset]);
|
||||
argument = args.data();
|
||||
|
||||
// The first argument is stored in register 1
|
||||
registers[1] = *argument++;
|
||||
|
||||
while (Step());
|
||||
}
|
||||
|
||||
FORCE_INLINE bool MacroInterpreter::Step(Opcode *delayedOpcode) {
|
||||
switch (opcode->operation) {
|
||||
case Opcode::Operation::AluRegister: {
|
||||
u32 result = HandleAlu(opcode->aluOperation, registers[opcode->srcA], registers[opcode->srcB]);
|
||||
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
case Opcode::Operation::AddImmediate:
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, registers[opcode->srcA] + opcode->immediate);
|
||||
break;
|
||||
case Opcode::Operation::BitfieldReplace: {
|
||||
u32 src = registers[opcode->srcB];
|
||||
u32 dest = registers[opcode->srcA];
|
||||
|
||||
// Extract the source region
|
||||
src = (src >> opcode->bitfield.srcBit) & opcode->bitfield.GetMask();
|
||||
|
||||
// Mask out the bits that we will replace
|
||||
dest &= ~(opcode->bitfield.GetMask() << opcode->bitfield.destBit);
|
||||
|
||||
// Replace the bitfield region in the destination with the region from the source
|
||||
dest |= src << opcode->bitfield.destBit;
|
||||
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, dest);
|
||||
break;
|
||||
}
|
||||
case Opcode::Operation::BitfieldExtractShiftLeftImmediate: {
|
||||
u32 src = registers[opcode->srcB];
|
||||
u32 dest = registers[opcode->srcA];
|
||||
|
||||
u32 result = ((src >> dest) & opcode->bitfield.GetMask()) << opcode->bitfield.destBit;
|
||||
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
case Opcode::Operation::BitfieldExtractShiftLeftRegister: {
|
||||
u32 src = registers[opcode->srcB];
|
||||
u32 dest = registers[opcode->srcA];
|
||||
|
||||
u32 result = ((src >> opcode->bitfield.srcBit) & opcode->bitfield.GetMask()) << dest;
|
||||
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
case Opcode::Operation::ReadImmediate: {
|
||||
u32 result = maxwell3D.regs.raw[registers[opcode->srcA] + opcode->immediate];
|
||||
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
|
||||
break;
|
||||
}
|
||||
case Opcode::Operation::Branch: {
|
||||
if (delayedOpcode != nullptr)
|
||||
throw exception("Cannot branch while inside a delay slot");
|
||||
|
||||
u32 value = registers[opcode->srcA];
|
||||
bool branch = (opcode->branchCondition == Opcode::BranchCondition::Zero) ? (value == 0) : (value != 0);
|
||||
|
||||
if (branch) {
|
||||
if (opcode->noDelay) {
|
||||
opcode += opcode->immediate;
|
||||
return true;
|
||||
} else {
|
||||
Opcode* targetOpcode = opcode + opcode->immediate;
|
||||
|
||||
// Step into delay slot
|
||||
opcode++;
|
||||
return Step(targetOpcode);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (opcode->exit && (delayedOpcode == nullptr)) {
|
||||
// Exit has a delay slot
|
||||
opcode++;
|
||||
Step(opcode);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (delayedOpcode != nullptr)
|
||||
opcode = delayedOpcode;
|
||||
else
|
||||
opcode++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
FORCE_INLINE u32 MacroInterpreter::HandleAlu(Opcode::AluOperation operation, u32 srcA, u32 srcB) {
|
||||
switch (operation) {
|
||||
case Opcode::AluOperation::Add: {
|
||||
u64 result = static_cast<u64>(srcA) + srcB;
|
||||
|
||||
carryFlag = result >> 32;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Opcode::AluOperation::AddWithCarry: {
|
||||
u64 result = static_cast<u64>(srcA) + srcB + carryFlag;
|
||||
|
||||
carryFlag = result >> 32;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Opcode::AluOperation::Subtract: {
|
||||
u64 result = static_cast<u64>(srcA) - srcB;
|
||||
|
||||
carryFlag = result & 0xffffffff;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Opcode::AluOperation::SubtractWithBorrow: {
|
||||
u64 result = static_cast<u64>(srcA) - srcB - !carryFlag;
|
||||
|
||||
carryFlag = result & 0xffffffff;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Opcode::AluOperation::BitwiseXor:
|
||||
return srcA ^ srcB;
|
||||
case Opcode::AluOperation::BitwiseOr:
|
||||
return srcA | srcB;
|
||||
case Opcode::AluOperation::BitwiseAnd:
|
||||
return srcA & srcB;
|
||||
case Opcode::AluOperation::BitwiseAndNot:
|
||||
return srcA & ~srcB;
|
||||
case Opcode::AluOperation::BitwiseNand:
|
||||
return ~(srcA & srcB);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void MacroInterpreter::HandleAssignment(Opcode::AssignmentOperation operation, u8 reg, u32 result) {
|
||||
switch (operation) {
|
||||
case Opcode::AssignmentOperation::IgnoreAndFetch:
|
||||
WriteRegister(reg, *argument++);
|
||||
break;
|
||||
case Opcode::AssignmentOperation::Move:
|
||||
WriteRegister(reg, result);
|
||||
break;
|
||||
case Opcode::AssignmentOperation::MoveAndSetMethod:
|
||||
WriteRegister(reg, result);
|
||||
methodAddress.raw = result;
|
||||
break;
|
||||
case Opcode::AssignmentOperation::FetchAndSend:
|
||||
WriteRegister(reg, *argument++);
|
||||
Send(result);
|
||||
break;
|
||||
case Opcode::AssignmentOperation::MoveAndSend:
|
||||
WriteRegister(reg, result);
|
||||
Send(result);
|
||||
break;
|
||||
case Opcode::AssignmentOperation::FetchAndSetMethod:
|
||||
WriteRegister(reg, *argument++);
|
||||
methodAddress.raw = result;
|
||||
break;
|
||||
case Opcode::AssignmentOperation::MoveAndSetMethodThenFetchAndSend:
|
||||
WriteRegister(reg, result);
|
||||
methodAddress.raw = result;
|
||||
Send(*argument++);
|
||||
break;
|
||||
case Opcode::AssignmentOperation::MoveAndSetMethodThenSendHigh:
|
||||
WriteRegister(reg, result);
|
||||
methodAddress.raw = result;
|
||||
Send(methodAddress.increment);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void MacroInterpreter::Send(u32 argument) {
|
||||
maxwell3D.CallMethod(MethodParams{methodAddress.address, argument, 0, true});
|
||||
|
||||
methodAddress.address += methodAddress.increment;
|
||||
}
|
||||
|
||||
FORCE_INLINE void MacroInterpreter::WriteRegister(u8 reg, u32 value) {
|
||||
// Register 0 should always be zero so block writes to it
|
||||
if (reg == 0)
|
||||
return;
|
||||
|
||||
registers[reg] = value;
|
||||
}
|
||||
}
|
147
app/src/main/cpp/skyline/gpu/macro_interpreter.h
Normal file
147
app/src/main/cpp/skyline/gpu/macro_interpreter.h
Normal file
@ -0,0 +1,147 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline::gpu {
|
||||
namespace engine {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
class MacroInterpreter {
|
||||
private:
|
||||
/**
|
||||
* @brief This holds a single macro opcode
|
||||
*/
|
||||
union Opcode {
|
||||
enum class Operation : u8 {
|
||||
AluRegister = 0,
|
||||
AddImmediate = 1,
|
||||
BitfieldReplace = 2,
|
||||
BitfieldExtractShiftLeftImmediate = 3,
|
||||
BitfieldExtractShiftLeftRegister = 4,
|
||||
ReadImmediate = 5,
|
||||
Branch = 7,
|
||||
};
|
||||
|
||||
enum class AssignmentOperation : u8 {
|
||||
IgnoreAndFetch = 0,
|
||||
Move = 1,
|
||||
MoveAndSetMethod = 2,
|
||||
FetchAndSend = 3,
|
||||
MoveAndSend = 4,
|
||||
FetchAndSetMethod = 5,
|
||||
MoveAndSetMethodThenFetchAndSend = 6,
|
||||
MoveAndSetMethodThenSendHigh = 7,
|
||||
};
|
||||
|
||||
enum class AluOperation : u8 {
|
||||
Add = 0,
|
||||
AddWithCarry = 1,
|
||||
Subtract = 2,
|
||||
SubtractWithBorrow = 3,
|
||||
BitwiseXor = 8,
|
||||
BitwiseOr = 9,
|
||||
BitwiseAnd = 10,
|
||||
BitwiseAndNot = 11,
|
||||
BitwiseNand = 12,
|
||||
};
|
||||
|
||||
enum class BranchCondition : u8 {
|
||||
Zero = 0,
|
||||
NonZero = 1,
|
||||
};
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
Operation operation : 3;
|
||||
u8 _pad0_ : 1;
|
||||
AssignmentOperation assignmentOperation : 3;
|
||||
};
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
u8 _pad1_ : 4;
|
||||
BranchCondition branchCondition : 1;
|
||||
u8 noDelay : 1;
|
||||
u8 _pad2_ : 1;
|
||||
u8 exit : 1;
|
||||
u8 dest : 3;
|
||||
u8 srcA : 3;
|
||||
u8 srcB : 3;
|
||||
AluOperation aluOperation : 5;
|
||||
};
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
u16 _pad3_ : 14;
|
||||
i32 immediate : 18;
|
||||
};
|
||||
|
||||
struct __attribute__((__packed__)) {
|
||||
u32 _pad_ : 17;
|
||||
u8 srcBit : 5;
|
||||
u8 size : 5;
|
||||
u8 destBit : 5;
|
||||
|
||||
u32 GetMask() {
|
||||
return (1 << size) - 1;
|
||||
}
|
||||
} bitfield;
|
||||
|
||||
u32 raw;
|
||||
};
|
||||
static_assert(sizeof(Opcode) == sizeof(u32));
|
||||
|
||||
/**
|
||||
* @brief This holds information about the Maxwell 3D method to be called in 'Send'
|
||||
*/
|
||||
union MethodAddress {
|
||||
struct {
|
||||
u16 address : 12;
|
||||
u8 increment : 6;
|
||||
};
|
||||
|
||||
u32 raw;
|
||||
};
|
||||
|
||||
engine::Maxwell3D &maxwell3D;
|
||||
|
||||
std::array<u32, 8> registers{};
|
||||
|
||||
Opcode *opcode{};
|
||||
const u32 *argument{};
|
||||
MethodAddress methodAddress{};
|
||||
bool carryFlag{};
|
||||
|
||||
/**
|
||||
* @brief Steps forward one macro instruction, including delay slots
|
||||
* @param delayedOpcode The target opcode to be jumped to after executing the instruction
|
||||
*/
|
||||
bool Step(Opcode *delayedOpcode = nullptr);
|
||||
|
||||
/**
|
||||
* @brief Performs an ALU operation on the given source values and returns the result as a u32
|
||||
*/
|
||||
u32 HandleAlu(Opcode::AluOperation operation, u32 srcA, u32 srcB);
|
||||
|
||||
/**
|
||||
* @brief Handles an opcode's assignment operation
|
||||
*/
|
||||
void HandleAssignment(Opcode::AssignmentOperation operation, u8 reg, u32 result);
|
||||
|
||||
/**
|
||||
* @brief Sends a method call to the Maxwell 3D
|
||||
*/
|
||||
void Send(u32 argument);
|
||||
|
||||
void WriteRegister(u8 reg, u32 value);
|
||||
|
||||
public:
|
||||
MacroInterpreter(engine::Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
|
||||
|
||||
/**
|
||||
* @brief Executes a GPU macro from macro memory with the given arguments
|
||||
*/
|
||||
void Execute(size_t offset, const std::vector<u32> &args);
|
||||
};
|
||||
}
|
@ -9,8 +9,6 @@
|
||||
#include <asm/unistd.h>
|
||||
#include "guest_common.h"
|
||||
|
||||
#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
|
||||
|
||||
namespace skyline::guest {
|
||||
FORCE_INLINE void SaveCtxStack() {
|
||||
asm("SUB SP, SP, #240\n\t"
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
|
||||
|
||||
namespace skyline {
|
||||
using u128 = __uint128_t; //!< Unsigned 128-bit integer
|
||||
using u64 = __uint64_t; //!< Unsigned 64-bit integer
|
||||
|
Loading…
x
Reference in New Issue
Block a user