Introduce GraphicContext and Implement Viewport Transform + Scissors

This commit is contained in:
PixelyIon 2021-07-23 08:27:11 +05:30
parent bc378ad135
commit 190fde110f
4 changed files with 633 additions and 413 deletions

View File

@ -0,0 +1,81 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <vulkan/vulkan_raii.hpp>
#include <soc/gm20b/engines/maxwell/types.h>
namespace skyline::gpu::context {
namespace maxwell3d = soc::gm20b::engine::maxwell3d::type;
/**
* @brief Host-equivalent context for state of the Maxwell3D engine on the guest
*/
class GraphicsContext {
private:
GPU &gpu;
std::array<vk::Viewport, maxwell3d::ViewportCount> viewports;
std::array<vk::Rect2D, maxwell3d::ViewportCount> scissors;
constexpr static vk::Rect2D DefaultScissor{
.extent = {
.height = std::numeric_limits<i32>::max(),
.width = std::numeric_limits<i32>::max(),
}
}; //!< A scissor which displays the entire viewport, utilized when the viewport scissor is disabled
public:
GraphicsContext(GPU &gpu) : gpu(gpu) {
scissors.fill(DefaultScissor);
}
/* Viewport Transforms */
/**
* @url https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#vertexpostproc-viewport
* @note Comments are written in the way of getting the same viewport transformations to be done on the host rather than deriving the host structure values from the guest submitted values, fundamentally the same thing but it is consistent with not assuming a certain guest API
*/
void SetViewportX(size_t index, float scale, float translate) {
auto &viewport{viewports.at(index)};
viewport.x = scale - translate; // Counteract the addition of the half of the width (o_x) to the host translation
viewport.width = scale * 2.0f; // Counteract the division of the width (p_x) by 2 for the host scale
}
void SetViewportY(size_t index, float scale, float translate) {
auto &viewport{viewports.at(index)};
viewport.y = scale - translate; // Counteract the addition of the half of the height (p_y/2 is center) to the host translation (o_y)
viewport.height = scale * 2.0f; // Counteract the division of the height (p_y) by 2 for the host scale
}
void SetViewportZ(size_t index, float scale, float translate) {
auto &viewport{viewports.at(index)};
viewport.minDepth = translate; // minDepth (o_z) directly corresponds to the host translation
viewport.maxDepth = scale + translate; // Counteract the subtraction of the maxDepth (p_z - o_z) by minDepth (o_z) for the host scale
}
/* Viewport Scissors */
void SetScissor(size_t index, std::optional<maxwell3d::Scissor> scissor) {
scissors.at(index) = scissor ? vk::Rect2D{
.offset.x = scissor->horizontal.minimum,
.extent.width = scissor->horizontal.maximum,
.offset.y = scissor->vertical.minimum,
.extent.height = scissor->horizontal.maximum,
} : DefaultScissor;
}
void SetScissorHorizontal(size_t index, maxwell3d::Scissor::ScissorBounds bounds) {
auto &scissor{scissors.at(index)};
scissor.offset.x = bounds.minimum;
scissor.extent.width = bounds.maximum;
}
void SetScissorVertical(size_t index, maxwell3d::Scissor::ScissorBounds bounds) {
auto &scissor{scissors.at(index)};
scissor.offset.y = bounds.minimum;
scissor.extent.height = bounds.maximum;
}
};
}

View File

@ -0,0 +1,386 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::gm20b::engine::maxwell3d::type {
#pragma pack(push, 1)
/**
* @brief A 40-bit GMMU virtual address with register-packing
*/
struct Address {
u32 high;
u32 low;
u64 Pack() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
enum class MmeShadowRamControl : u32 {
MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM
MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter
MethodPassthrough = 2, //!< Does nothing, no write tracking or hooking
MethodReplay = 3, //!< Replays older tracked writes for any new writes to registers, discarding the contents of the new write
};
constexpr static size_t ViewportCount{16}; //!< Amount of viewports on Maxwell 3D, array size for any per-viewport parameter such as transform, scissors, etc
/**
* @brief The transformations applied on any primitive sent to a viewport
*/
struct ViewportTransform {
float scaleX; //!< Scales all X-axis primitive coordinates by this factor
float scaleY;
float scaleZ;
float translateX; //!< Translates all X-axis primitive coordinates by this value
float translateY;
float translateZ;
/**
* @brief A component swizzle applied to primitive coordinates prior to clipping/perspective divide with optional negation
* @note This functionality is exposed via GL_NV_viewport_swizzle (OpenGL) and VK_NV_viewport_swizzle (Vulkan)
*/
enum class Swizzle : u8 {
PositiveX = 0,
NegativeX = 1,
PositiveY = 2,
NegativeY = 3,
PositiveZ = 4,
NegativeZ = 5,
PositiveW = 6,
NegativeW = 7,
};
struct {
Swizzle x : 3;
u8 _pad0_ : 1;
Swizzle y : 3;
u8 _pad1_ : 1;
Swizzle z : 3;
u8 _pad2_ : 1;
Swizzle w : 3;
u32 _pad3_ : 17;
} swizzles;
/**
* @brief The amount of subpixel bits on screen-space axes that bias if a pixel is inside a primitive for conservative rasterization
* @note This functionality is exposed via GL_NV_conservative_raster (OpenGL) using SubpixelPrecisionBiasNV
*/
struct {
u8 x : 5;
u8 _pad0_ : 3;
u8 y : 5;
u32 _pad1_ : 19;
} subpixelPrecisionBias;
};
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
/**
* @brief The offset and extent of the viewport for transformation of coordinates from NDC-space (Normalized Device Coordinates) to screen-space
* @note This is effectively unused since all this data can be derived from the viewport transform, this misses crucial data that the transform has such as depth range order and viewport axis inverse transformations
*/
struct Viewport {
struct {
u16 x;
u16 width;
};
struct {
u16 y;
u16 height;
};
float depthRangeNear;
float depthRangeFar;
};
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
/**
* @brief The method used to rasterize polygons, not to be confused with the primitive type
* @note This functionality is exposed via glPolygonMode (OpenGL)
*/
enum class PolygonMode : u32 {
Point = 0x1B00, //!< Draw a point for every vertex
Line = 0x1B01, //!< Draw a line between all vertices
Fill = 0x1B02, //!< Fill the area bounded by the vertices
};
/**
* @brief A scissor which is used to reject all writes to non-masked regions
* @note All coordinates are in screen-space as defined by the viewport
*/
struct Scissor {
u32 enable; //!< Rejects non-masked writes when enabled and allows all writes otherwise
struct ScissorBounds {
u16 minimum; //!< The lower bound of the masked region in a dimension
u16 maximum; //!< The higher bound of the masked region in a dimension
} horizontal, vertical;
u32 next;
};
static_assert(sizeof(Scissor) == (0x4 * sizeof(u32)));
union VertexAttribute {
u32 raw;
enum class Size : u8 {
Size_1x32 = 0x12,
Size_2x32 = 0x04,
Size_3x32 = 0x02,
Size_4x32 = 0x01,
Size_1x16 = 0x1B,
Size_2x16 = 0x0F,
Size_3x16 = 0x05,
Size_4x16 = 0x03,
Size_1x8 = 0x1D,
Size_2x8 = 0x18,
Size_3x8 = 0x13,
Size_4x8 = 0x0A,
Size_10_10_10_2 = 0x30,
Size_11_11_10 = 0x31,
};
enum class Type : u8 {
None = 0,
SNorm = 1,
UNorm = 2,
SInt = 3,
UInt = 4,
UScaled = 5,
SScaled = 6,
Float = 7,
};
struct {
u8 bufferId : 5;
u8 _pad0_ : 1;
bool fixed : 1;
u16 offset : 14;
Size size : 6;
Type type : 3;
u8 _pad1_ : 1;
bool bgra : 1;
};
};
static_assert(sizeof(VertexAttribute) == sizeof(u32));
enum class CompareOp : u32 {
Never = 1,
Less = 2,
Equal = 3,
LessOrEqual = 4,
Greater = 5,
NotEqual = 6,
GreaterOrEqual = 7,
Always = 8,
NeverGL = 0x200,
LessGL = 0x201,
EqualGL = 0x202,
LessOrEqualGL = 0x203,
GreaterGL = 0x204,
NotEqualGL = 0x205,
GreaterOrEqualGL = 0x206,
AlwaysGL = 0x207,
};
struct Blend {
enum class Op : u32 {
Add = 1,
Subtract = 2,
ReverseSubtract = 3,
Minimum = 4,
Maximum = 5,
AddGL = 0x8006,
SubtractGL = 0x8007,
ReverseSubtractGL = 0x8008,
MinimumGL = 0x800A,
MaximumGL = 0x800B,
};
enum class Factor : u32 {
Zero = 0x1,
One = 0x2,
SourceColor = 0x3,
OneMinusSourceColor = 0x4,
SourceAlpha = 0x5,
OneMinusSourceAlpha = 0x6,
DestAlpha = 0x7,
OneMinusDestAlpha = 0x8,
DestColor = 0x9,
OneMinusDestColor = 0xA,
SourceAlphaSaturate = 0xB,
Source1Color = 0x10,
OneMinusSource1Color = 0x11,
Source1Alpha = 0x12,
OneMinusSource1Alpha = 0x13,
ConstantColor = 0x61,
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
ZeroGL = 0x4000,
OneGL = 0x4001,
SourceColorGL = 0x4300,
OneMinusSourceColorGL = 0x4301,
SourceAlphaGL = 0x4302,
OneMinusSourceAlphaGL = 0x4303,
DestAlphaGL = 0x4304,
OneMinusDestAlphaGL = 0x4305,
DestColorGL = 0x4306,
OneMinusDestColorGL = 0x4307,
SourceAlphaSaturateGL = 0x4308,
ConstantColorGL = 0xC001,
OneMinusConstantColorGL = 0xC002,
ConstantAlphaGL = 0xC003,
OneMinusConstantAlphaGL = 0xC004,
Source1ColorGL = 0xC900,
OneMinusSource1ColorGL = 0xC901,
Source1AlphaGL = 0xC902,
OneMinusSource1AlphaGL = 0xC903,
};
struct {
u32 seperateAlpha;
Op colorOp;
Factor colorSrcFactor;
Factor colorDestFactor;
Op alphaOp;
Factor alphaSrcFactor;
Factor alphaDestFactor;
u32 _pad_;
};
};
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
enum class StencilOp : u32 {
Keep = 1,
Zero = 2,
Replace = 3,
IncrementAndClamp = 4,
DecrementAndClamp = 5,
Invert = 6,
IncrementAndWrap = 7,
DecrementAndWrap = 8,
};
enum class FrontFace : u32 {
Clockwise = 0x900,
CounterClockwise = 0x901,
};
enum class CullFace : u32 {
Front = 0x404,
Back = 0x405,
FrontAndBack = 0x408,
};
union ColorWriteMask {
u32 raw;
struct {
u8 r : 4;
u8 g : 4;
u8 b : 4;
u8 a : 4;
};
};
static_assert(sizeof(ColorWriteMask) == sizeof(u32));
struct SemaphoreInfo {
enum class Op : u8 {
Release = 0,
Acquire = 1,
Counter = 2,
Trap = 3,
};
enum class ReductionOp : u8 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
};
enum class Unit : u8 {
VFetch = 1,
VP = 2,
Rast = 4,
StrmOut = 5,
GP = 6,
ZCull = 7,
Prop = 10,
Crop = 15,
};
enum class SyncCondition : u8 {
NotEqual = 0,
GreaterThan = 1,
};
enum class Format : u8 {
U32 = 0,
I32 = 1,
};
enum class CounterType : u8 {
Zero = 0x0,
InputVertices = 0x1,
InputPrimitives = 0x3,
VertexShaderInvocations = 0x5,
GeometryShaderInvocations = 0x7,
GeometryShaderPrimitives = 0x9,
ZcullStats0 = 0xA,
TransformFeedbackPrimitivesWritten = 0xB,
ZcullStats1 = 0xC,
ZcullStats2 = 0xE,
ClipperInputPrimitives = 0xF,
ZcullStats3 = 0x10,
ClipperOutputPrimitives = 0x11,
PrimitivesGenerated = 0x12,
FragmentShaderInvocations = 0x13,
SamplesPassed = 0x15,
TransformFeedbackOffset = 0x1A,
TessControlShaderInvocations = 0x1B,
TessEvaluationShaderInvocations = 0x1D,
TessEvaluationShaderPrimitives = 0x1F,
};
enum class StructureSize : u8 {
FourWords = 0,
OneWord = 1,
};
Op op : 2;
bool flushDisable : 1;
bool reductionEnable : 1;
bool fenceEnable : 1;
u8 _pad0_ : 4;
ReductionOp reductionOp : 3;
Unit unit : 4;
SyncCondition syncCondition : 1;
Format format : 2;
u8 _pad1_ : 1;
bool awakenEnable : 1;
u8 _pad2_ : 2;
CounterType counterType : 5;
StructureSize structureSize : 1;
};
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
enum class CoordOrigin : u8 {
LowerLeft = 0,
UpperLeft = 1,
};
#pragma pack(pop)
}

View File

@ -4,7 +4,7 @@
#include <soc.h>
namespace skyline::soc::gm20b::engine::maxwell3d {
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) {
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this), context(*state.gpu) {
ResetRegs();
}
@ -13,29 +13,29 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
registers.rasterizerEnable = true;
for (auto &transform : registers.viewportTransform) {
transform.swizzles.x = Registers::ViewportTransform::Swizzle::PositiveX;
transform.swizzles.y = Registers::ViewportTransform::Swizzle::PositiveY;
transform.swizzles.z = Registers::ViewportTransform::Swizzle::PositiveZ;
transform.swizzles.w = Registers::ViewportTransform::Swizzle::PositiveW;
for (auto &transform : registers.viewportTransforms) {
transform.swizzles.x = type::ViewportTransform::Swizzle::PositiveX;
transform.swizzles.y = type::ViewportTransform::Swizzle::PositiveY;
transform.swizzles.z = type::ViewportTransform::Swizzle::PositiveZ;
transform.swizzles.w = type::ViewportTransform::Swizzle::PositiveW;
}
for (auto &viewport : registers.viewport) {
for (auto &viewport : registers.viewports) {
viewport.depthRangeFar = 1.0f;
viewport.depthRangeNear = 0.0f;
}
registers.polygonMode.front = Registers::PolygonMode::Fill;
registers.polygonMode.back = Registers::PolygonMode::Fill;
registers.polygonMode.front = type::PolygonMode::Fill;
registers.polygonMode.back = type::PolygonMode::Fill;
registers.stencilFront.failOp = registers.stencilFront.zFailOp = registers.stencilFront.zPassOp = Registers::StencilOp::Keep;
registers.stencilFront.compare.op = Registers::CompareOp::Always;
registers.stencilFront.failOp = registers.stencilFront.zFailOp = registers.stencilFront.zPassOp = type::StencilOp::Keep;
registers.stencilFront.compare.op = type::CompareOp::Always;
registers.stencilFront.compare.mask = 0xFFFFFFFF;
registers.stencilFront.writeMask = 0xFFFFFFFF;
registers.stencilTwoSideEnable = true;
registers.stencilBack.failOp = registers.stencilBack.zFailOp = registers.stencilBack.zPassOp = Registers::StencilOp::Keep;
registers.stencilBack.compareOp = Registers::CompareOp::Always;
registers.stencilBack.failOp = registers.stencilBack.zFailOp = registers.stencilBack.zPassOp = type::StencilOp::Keep;
registers.stencilBack.compareOp = type::CompareOp::Always;
registers.stencilBackExtra.compareMask = 0xFFFFFFFF;
registers.stencilBackExtra.writeMask = 0xFFFFFFFF;
@ -44,11 +44,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
for (auto &attribute : registers.vertexAttributeState)
attribute.fixed = true;
registers.depthTestFunc = Registers::CompareOp::Always;
registers.depthTestFunc = type::CompareOp::Always;
registers.blend.colorOp = registers.blend.alphaOp = Registers::Blend::Op::Add;
registers.blend.colorSrcFactor = registers.blend.alphaSrcFactor = Registers::Blend::Factor::One;
registers.blend.colorDestFactor = registers.blend.alphaDestFactor = Registers::Blend::Factor::Zero;
registers.blend.colorOp = registers.blend.alphaOp = type::Blend::Op::Add;
registers.blend.colorSrcFactor = registers.blend.alphaSrcFactor = type::Blend::Factor::One;
registers.blend.colorDestFactor = registers.blend.alphaDestFactor = type::Blend::Factor::Zero;
registers.lineWidthSmooth = 1.0f;
registers.lineWidthAliased = 1.0f;
@ -57,16 +57,16 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
registers.pointSpriteSize = 1.0f;
registers.pointCoordReplace.enable = true;
registers.frontFace = Registers::FrontFace::CounterClockwise;
registers.cullFace = Registers::CullFace::Back;
registers.frontFace = type::FrontFace::CounterClockwise;
registers.cullFace = type::CullFace::Back;
for (auto &mask : registers.colorMask)
mask.r = mask.g = mask.b = mask.a = 1;
for (auto &blend : registers.independentBlend) {
blend.colorOp = blend.alphaOp = Registers::Blend::Op::Add;
blend.colorSrcFactor = blend.alphaSrcFactor = Registers::Blend::Factor::One;
blend.colorDestFactor = blend.alphaDestFactor = Registers::Blend::Factor::Zero;
blend.colorOp = blend.alphaOp = type::Blend::Op::Add;
blend.colorSrcFactor = blend.alphaSrcFactor = type::Blend::Factor::One;
blend.colorDestFactor = blend.alphaDestFactor = type::Blend::Factor::Zero;
}
registers.viewportTransformEnable = true;
@ -104,9 +104,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
registers.raw[method] = argument;
if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodTrack || shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodTrackWithFilter)
if (shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodTrack || shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodTrackWithFilter)
shadowRegisters.raw[method] = argument;
else if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodReplay)
else if (shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodReplay)
argument = shadowRegisters.raw[method];
switch (method) {
@ -120,32 +120,113 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
registers.mme.instructionRamPointer %= macroCode.size();
break;
case MAXWELL3D_OFFSET(mme.startAddressRamLoad):
if (registers.mme.startAddressRamPointer >= macroPositions.size())
throw exception("Maximum amount of macros reached!");
macroPositions[registers.mme.startAddressRamPointer++] = argument;
break;
case MAXWELL3D_OFFSET(mme.shadowRamControl):
shadowRegisters.mme.shadowRamControl = static_cast<Registers::MmeShadowRamControl>(argument);
shadowRegisters.mme.shadowRamControl = static_cast<type::MmeShadowRamControl>(argument);
break;
case MAXWELL3D_OFFSET(syncpointAction):
state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(registers.syncpointAction.id));
state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment();
break;
#define VIEWPORT_TRANSFORM_CALLBACKS(index) \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleX): \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateX): \
context.SetViewportX(index, registers.viewportTransforms[index].scaleX, registers.viewportTransforms[index].translateX); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleY): \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateY): \
context.SetViewportY(index, registers.viewportTransforms[index].scaleY, registers.viewportTransforms[index].translateY); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleZ): \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateZ): \
context.SetViewportZ(index, registers.viewportTransforms[index].scaleY, registers.viewportTransforms[index].translateY); \
break
VIEWPORT_TRANSFORM_CALLBACKS(0);
VIEWPORT_TRANSFORM_CALLBACKS(1);
VIEWPORT_TRANSFORM_CALLBACKS(2);
VIEWPORT_TRANSFORM_CALLBACKS(3);
VIEWPORT_TRANSFORM_CALLBACKS(4);
VIEWPORT_TRANSFORM_CALLBACKS(5);
VIEWPORT_TRANSFORM_CALLBACKS(6);
VIEWPORT_TRANSFORM_CALLBACKS(7);
VIEWPORT_TRANSFORM_CALLBACKS(8);
VIEWPORT_TRANSFORM_CALLBACKS(9);
VIEWPORT_TRANSFORM_CALLBACKS(10);
VIEWPORT_TRANSFORM_CALLBACKS(11);
VIEWPORT_TRANSFORM_CALLBACKS(12);
VIEWPORT_TRANSFORM_CALLBACKS(13);
VIEWPORT_TRANSFORM_CALLBACKS(14);
VIEWPORT_TRANSFORM_CALLBACKS(15);
static_assert(type::ViewportCount == 16);
#undef VIEWPORT_TRANSFORM_CALLBACKS
#define SCISSOR_CALLBACKS(index) \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, enable): \
context.SetScissor(index, argument ? registers.scissors[index] : std::optional<type::Scissor>{}); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, horizontal): \
context.SetScissorHorizontal(index, registers.scissors[index].horizontal); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, vertical): \
context.SetScissorVertical(index, registers.scissors[index].vertical); \
break
SCISSOR_CALLBACKS(0);
SCISSOR_CALLBACKS(1);
SCISSOR_CALLBACKS(2);
SCISSOR_CALLBACKS(3);
SCISSOR_CALLBACKS(4);
SCISSOR_CALLBACKS(5);
SCISSOR_CALLBACKS(6);
SCISSOR_CALLBACKS(7);
SCISSOR_CALLBACKS(8);
SCISSOR_CALLBACKS(9);
SCISSOR_CALLBACKS(10);
SCISSOR_CALLBACKS(11);
SCISSOR_CALLBACKS(12);
SCISSOR_CALLBACKS(13);
SCISSOR_CALLBACKS(14);
SCISSOR_CALLBACKS(15);
static_assert(type::ViewportCount == 16);
#undef SCISSOR_CALLBACKS
case MAXWELL3D_OFFSET(semaphore.info):
switch (registers.semaphore.info.op) {
case Registers::SemaphoreInfo::Op::Release:
case type::SemaphoreInfo::Op::Release:
WriteSemaphoreResult(registers.semaphore.payload);
break;
case Registers::SemaphoreInfo::Op::Counter:
HandleSemaphoreCounterOperation();
case type::SemaphoreInfo::Op::Counter: {
switch (registers.semaphore.info.counterType) {
case type::SemaphoreInfo::CounterType::Zero:
WriteSemaphoreResult(0);
break;
default:
state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast<u8>(registers.semaphore.info.counterType));
break;
}
break;
}
default:
state.logger->Warn("Unsupported semaphore operation: 0x{:X}", static_cast<u8>(registers.semaphore.info.op));
break;
}
break;
case MAXWELL3D_OFFSET(firmwareCall[4]):
registers.raw[0xD00] = 1;
break;
@ -154,17 +235,6 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
}
}
void Maxwell3D::HandleSemaphoreCounterOperation() {
switch (registers.semaphore.info.counterType) {
case Registers::SemaphoreInfo::CounterType::Zero:
WriteSemaphoreResult(0);
break;
default:
state.logger->Warn("Unsupported semaphore counter type: 0x{:X}", static_cast<u8>(registers.semaphore.info.counterType));
break;
}
}
void Maxwell3D::WriteSemaphoreResult(u64 result) {
struct FourWordResult {
u64 value;
@ -172,10 +242,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
};
switch (registers.semaphore.info.structureSize) {
case Registers::SemaphoreInfo::StructureSize::OneWord:
case type::SemaphoreInfo::StructureSize::OneWord:
state.soc->gm20b.gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result));
break;
case Registers::SemaphoreInfo::StructureSize::FourWords: {
case type::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks
constexpr u64 NsToTickNumerator{384};
constexpr u64 NsToTickDenominator{625};

View File

@ -3,10 +3,19 @@
#pragma once
#include <gpu/context/graphics_context.h>
#include "engine.h"
#include "maxwell/macro_interpreter.h"
#define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field)
#define MAXWELL3D_STRUCT_OFFSET(field, member) U32_OFFSET(Registers, field) + offsetof(typeof(Registers::field), member)
#define MAXWELL3D_ARRAY_OFFSET(field, index) U32_OFFSET(Registers, field) + ((sizeof(typeof(Registers::field[0])) / sizeof(u32)) * index)
#define MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) MAXWELL3D_ARRAY_OFFSET(field, index) + U32_OFFSET(typeof(Registers::field[0]), member)
#define MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET(field, index, member, submember) MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) + U32_OFFSET(typeof(Registers::field[0].member), submember)
namespace skyline::gpu::context {
class GraphicsContext;
}
namespace skyline::soc::gm20b::engine::maxwell3d {
/**
@ -23,356 +32,25 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
MacroInterpreter macroInterpreter;
void HandleSemaphoreCounterOperation();
gpu::context::GraphicsContext context;
/**
* @brief Writes back a semaphore result to the guest with an auto-generated timestamp (if required)
* @note If the semaphore is OneWord then the result will be downcasted to a 32-bit unsigned integer
*/
void WriteSemaphoreResult(u64 result);
public:
static constexpr u32 RegisterCount{0xE00}; //!< The number of Maxwell 3D registers
/**
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def
* @note To ease the extension of this structure, padding may follow both _padN_ and _padN_M_ formats
*/
#pragma pack(push, 1)
union Registers {
std::array<u32, RegisterCount> raw;
struct Address {
u32 high;
u32 low;
u64 Pack() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
enum class MmeShadowRamControl : u32 {
MethodTrack = 0,
MethodTrackWithFilter = 1,
MethodPassthrough = 2,
MethodReplay = 3,
};
struct ViewportTransform {
enum class Swizzle : u8 {
PositiveX = 0,
NegativeX = 1,
PositiveY = 2,
NegativeY = 3,
PositiveZ = 4,
NegativeZ = 5,
PositiveW = 6,
NegativeW = 7,
};
float scaleX;
float scaleY;
float scaleZ;
float translateX;
float translateY;
float translateZ;
struct {
Swizzle x : 3;
u8 _pad0_ : 1;
Swizzle y : 3;
u8 _pad1_ : 1;
Swizzle z : 3;
u8 _pad2_ : 1;
Swizzle w : 3;
u32 _pad3_ : 17;
} swizzles;
struct {
u8 x : 5;
u8 _pad0_ : 3;
u8 y : 5;
u32 _pad1_ : 19;
} subpixelPrecisionBias;
};
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
struct Viewport {
struct {
u16 x;
u16 width;
};
struct {
u16 y;
u16 height;
};
float depthRangeNear;
float depthRangeFar;
};
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
enum class PolygonMode : u32 {
Point = 0x1B00,
Line = 0x1B01,
Fill = 0x1B02,
};
union VertexAttribute {
u32 raw;
enum class Size : u8 {
Size_1x32 = 0x12,
Size_2x32 = 0x04,
Size_3x32 = 0x02,
Size_4x32 = 0x01,
Size_1x16 = 0x1B,
Size_2x16 = 0x0F,
Size_3x16 = 0x05,
Size_4x16 = 0x03,
Size_1x8 = 0x1D,
Size_2x8 = 0x18,
Size_3x8 = 0x13,
Size_4x8 = 0x0A,
Size_10_10_10_2 = 0x30,
Size_11_11_10 = 0x31,
};
enum class Type : u8 {
None = 0,
SNorm = 1,
UNorm = 2,
SInt = 3,
UInt = 4,
UScaled = 5,
SScaled = 6,
Float = 7,
};
struct {
u8 bufferId : 5;
u8 _pad0_ : 1;
bool fixed : 1;
u16 offset : 14;
Size size : 6;
Type type : 3;
u8 _pad1_ : 1;
bool bgra : 1;
};
};
static_assert(sizeof(VertexAttribute) == sizeof(u32));
enum class CompareOp : u32 {
Never = 1,
Less = 2,
Equal = 3,
LessOrEqual = 4,
Greater = 5,
NotEqual = 6,
GreaterOrEqual = 7,
Always = 8,
NeverGL = 0x200,
LessGL = 0x201,
EqualGL = 0x202,
LessOrEqualGL = 0x203,
GreaterGL = 0x204,
NotEqualGL = 0x205,
GreaterOrEqualGL = 0x206,
AlwaysGL = 0x207,
};
struct Blend {
enum class Op : u32 {
Add = 1,
Subtract = 2,
ReverseSubtract = 3,
Minimum = 4,
Maximum = 5,
AddGL = 0x8006,
SubtractGL = 0x8007,
ReverseSubtractGL = 0x8008,
MinimumGL = 0x800A,
MaximumGL = 0x800B,
};
enum class Factor : u32 {
Zero = 0x1,
One = 0x2,
SourceColor = 0x3,
OneMinusSourceColor = 0x4,
SourceAlpha = 0x5,
OneMinusSourceAlpha = 0x6,
DestAlpha = 0x7,
OneMinusDestAlpha = 0x8,
DestColor = 0x9,
OneMinusDestColor = 0xA,
SourceAlphaSaturate = 0xB,
Source1Color = 0x10,
OneMinusSource1Color = 0x11,
Source1Alpha = 0x12,
OneMinusSource1Alpha = 0x13,
ConstantColor = 0x61,
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
ZeroGL = 0x4000,
OneGL = 0x4001,
SourceColorGL = 0x4300,
OneMinusSourceColorGL = 0x4301,
SourceAlphaGL = 0x4302,
OneMinusSourceAlphaGL = 0x4303,
DestAlphaGL = 0x4304,
OneMinusDestAlphaGL = 0x4305,
DestColorGL = 0x4306,
OneMinusDestColorGL = 0x4307,
SourceAlphaSaturateGL = 0x4308,
ConstantColorGL = 0xC001,
OneMinusConstantColorGL = 0xC002,
ConstantAlphaGL = 0xC003,
OneMinusConstantAlphaGL = 0xC004,
Source1ColorGL = 0xC900,
OneMinusSource1ColorGL = 0xC901,
Source1AlphaGL = 0xC902,
OneMinusSource1AlphaGL = 0xC903,
};
struct {
u32 seperateAlpha;
Op colorOp;
Factor colorSrcFactor;
Factor colorDestFactor;
Op alphaOp;
Factor alphaSrcFactor;
Factor alphaDestFactor;
u32 _pad_;
};
};
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
enum class StencilOp : u32 {
Keep = 1,
Zero = 2,
Replace = 3,
IncrementAndClamp = 4,
DecrementAndClamp = 5,
Invert = 6,
IncrementAndWrap = 7,
DecrementAndWrap = 8,
};
enum class FrontFace : u32 {
Clockwise = 0x900,
CounterClockwise = 0x901,
};
enum class CullFace : u32 {
Front = 0x404,
Back = 0x405,
FrontAndBack = 0x408,
};
union ColorWriteMask {
u32 raw;
struct {
u8 r : 4;
u8 g : 4;
u8 b : 4;
u8 a : 4;
};
};
static_assert(sizeof(ColorWriteMask) == sizeof(u32));
struct SemaphoreInfo {
enum class Op : u8 {
Release = 0,
Acquire = 1,
Counter = 2,
Trap = 3,
};
enum class ReductionOp : u8 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
};
enum class Unit : u8 {
VFetch = 1,
VP = 2,
Rast = 4,
StrmOut = 5,
GP = 6,
ZCull = 7,
Prop = 10,
Crop = 15,
};
enum class SyncCondition : u8 {
NotEqual = 0,
GreaterThan = 1,
};
enum class Format : u8 {
U32 = 0,
I32 = 1,
};
enum class CounterType : u8 {
Zero = 0x0,
InputVertices = 0x1,
InputPrimitives = 0x3,
VertexShaderInvocations = 0x5,
GeometryShaderInvocations = 0x7,
GeometryShaderPrimitives = 0x9,
ZcullStats0 = 0xA,
TransformFeedbackPrimitivesWritten = 0xB,
ZcullStats1 = 0xC,
ZcullStats2 = 0xE,
ClipperInputPrimitives = 0xF,
ZcullStats3 = 0x10,
ClipperOutputPrimitives = 0x11,
PrimitivesGenerated = 0x12,
FragmentShaderInvocations = 0x13,
SamplesPassed = 0x15,
TransformFeedbackOffset = 0x1A,
TessControlShaderInvocations = 0x1B,
TessEvaluationShaderInvocations = 0x1D,
TessEvaluationShaderPrimitives = 0x1F,
};
enum class StructureSize : u8 {
FourWords = 0,
OneWord = 1,
};
Op op : 2;
bool flushDisable : 1;
bool reductionEnable : 1;
bool fenceEnable : 1;
u8 _pad0_ : 4;
ReductionOp reductionOp : 3;
Unit unit : 4;
SyncCondition syncCondition : 1;
Format format : 2;
u8 _pad1_ : 1;
bool awakenEnable : 1;
u8 _pad2_ : 2;
CounterType counterType : 5;
StructureSize structureSize : 1;
};
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
enum class CoordOrigin : u8 {
LowerLeft = 0,
UpperLeft = 1,
};
struct {
u32 _pad0_[0x40]; // 0x0
u32 noOperation; // 0x40
@ -384,7 +62,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 instructionRamLoad; // 0x46
u32 startAddressRamPointer; // 0x47
u32 startAddressRamLoad; // 0x48
MmeShadowRamControl shadowRamControl; // 0x49
type::MmeShadowRamControl shadowRamControl; // 0x49
} mme;
u32 _pad2_[0x68]; // 0x4A
@ -401,16 +79,20 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 _pad3_[0x2C]; // 0xB3
u32 rasterizerEnable; // 0xDF
u32 _pad4_[0x1A0]; // 0xE0
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
std::array<Viewport, 0x10> viewport; // 0x300
std::array<type::ViewportTransform, type::ViewportCount> viewportTransforms; // 0x280
std::array<type::Viewport, type::ViewportCount> viewports; // 0x300
u32 _pad5_[0x2B]; // 0x340
struct {
PolygonMode front; // 0x36B
PolygonMode back; // 0x36C
type::PolygonMode front; // 0x36B
type::PolygonMode back; // 0x36C
} polygonMode;
u32 _pad6_[0x68]; // 0x36D
u32 _pad6_[0x13]; // 0x36D
std::array<type::Scissor, type::ViewportCount> scissors; // 0x380
u32 _pad6_1_[0x15]; // 0x3C0
struct {
u32 compareRef; // 0x3D5
@ -421,11 +103,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 _pad7_[0x13]; // 0x3D8
u32 rtSeparateFragData; // 0x3EB
u32 _pad8_[0x6C]; // 0x3EC
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
std::array<type::VertexAttribute, 0x20> vertexAttributeState; // 0x458
u32 _pad9_[0x4B]; // 0x478
CompareOp depthTestFunc; // 0x4C3
type::CompareOp depthTestFunc; // 0x4C3
float alphaTestRef; // 0x4C4
CompareOp alphaTestFunc; // 0x4C5
type::CompareOp alphaTestFunc; // 0x4C5
u32 drawTFBStride; // 0x4C6
struct {
@ -439,13 +121,13 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
struct {
u32 seperateAlpha; // 0x4CF
Blend::Op colorOp; // 0x4D0
Blend::Factor colorSrcFactor; // 0x4D1
Blend::Factor colorDestFactor; // 0x4D2
Blend::Op alphaOp; // 0x4D3
Blend::Factor alphaSrcFactor; // 0x4D4
type::Blend::Op colorOp; // 0x4D0
type::Blend::Factor colorSrcFactor; // 0x4D1
type::Blend::Factor colorDestFactor; // 0x4D2
type::Blend::Op alphaOp; // 0x4D3
type::Blend::Factor alphaSrcFactor; // 0x4D4
u32 _pad_; // 0x4D5
Blend::Factor alphaDestFactor; // 0x4D6
type::Blend::Factor alphaDestFactor; // 0x4D6
u32 enableCommon; // 0x4D7
std::array<u32, 8> enable; // 0x4D8 For each render target
@ -454,12 +136,12 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 stencilEnable; // 0x4E0
struct {
StencilOp failOp; // 0x4E1
StencilOp zFailOp; // 0x4E2
StencilOp zPassOp; // 0x4E3
type::StencilOp failOp; // 0x4E1
type::StencilOp zFailOp; // 0x4E2
type::StencilOp zPassOp; // 0x4E3
struct {
CompareOp op; // 0x4E4
type::CompareOp op; // 0x4E4
i32 ref; // 0x4E5
u32 mask; // 0x4E6
} compare;
@ -495,7 +177,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 _pad16_[0x7]; // 0x550
struct {
Address address; // 0x557
type::Address address; // 0x557
u32 maximumIndex; // 0x559
} texSamplerPool;
@ -504,7 +186,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 lineSmoothEnable; // 0x55C
struct {
Address address; // 0x55D
type::Address address; // 0x55D
u32 maximumIndex; // 0x55F
} texHeaderPool;
@ -513,40 +195,40 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 stencilTwoSideEnable; // 0x565
struct {
StencilOp failOp; // 0x566
StencilOp zFailOp; // 0x567
StencilOp zPassOp; // 0x568
CompareOp compareOp; // 0x569
type::StencilOp failOp; // 0x566
type::StencilOp zFailOp; // 0x567
type::StencilOp zPassOp; // 0x568
type::CompareOp compareOp; // 0x569
} stencilBack;
u32 _pad19_[0x17]; // 0x56A
struct {
u8 _unk_ : 2;
CoordOrigin origin : 1;
type::CoordOrigin origin : 1;
u16 enable : 10;
u32 _pad_ : 19;
} pointCoordReplace; // 0x581
u32 _pad20_[0xC4]; // 0x582
u32 cullFaceEnable; // 0x646
FrontFace frontFace; // 0x647
CullFace cullFace; // 0x648
type::FrontFace frontFace; // 0x647
type::CullFace cullFace; // 0x648
u32 pixelCentreImage; // 0x649
u32 _pad21_; // 0x64A
u32 viewportTransformEnable; // 0x64B
u32 _pad22_[0x34]; // 0x64A
std::array<ColorWriteMask, 8> colorMask; // 0x680 For each render target
std::array<type::ColorWriteMask, 8> colorMask; // 0x680 For each render target
u32 _pad23_[0x38]; // 0x688
struct {
Address address; // 0x6C0
type::Address address; // 0x6C0
u32 payload; // 0x6C2
SemaphoreInfo info; // 0x6C3
type::SemaphoreInfo info; // 0x6C3
} semaphore;
u32 _pad24_[0xBC]; // 0x6C4
std::array<Blend, 8> independentBlend; // 0x780 For each render target
std::array<type::Blend, 8> independentBlend; // 0x780 For each render target
u32 _pad25_[0x100]; // 0x7C0
u32 firmwareCall[0x20]; // 0x8C0
};
@ -555,9 +237,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
#pragma pack(pop)
Registers registers{};
Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register
std::array<u32, 0x2000> macroCode{}; //!< This stores GPU macros, writes to it will wraparound on overflow
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
Maxwell3D(const DeviceState &state);