Move Guest GPU into SoC Directory

We decided to restructure Skyline to draw a layer of separation between guest and host GPU. We're reserving the `gpu` namespace and directory for purely host GPU and creating a new `soc` directory and namespace for emulation of parts of the X1 SoC which is currently limited to guest GPU but will be expanded to contain components like the audio DSP down the line.
This commit is contained in:
PixelyIon 2021-03-25 01:39:21 +05:30 committed by ◱ Mark
parent 0ea6d9bee5
commit 3f7373209a
33 changed files with 1319 additions and 1289 deletions

View File

@ -7,22 +7,6 @@
<option name="FORMATTER_OFF_TAG" value="@fmt:off" />
<option name="SOFT_MARGINS" value="80,140" />
<JetCodeStyleSettings>
<option name="PACKAGES_TO_USE_STAR_IMPORTS">
<value>
<package name="java.util" alias="false" withSubpackages="false" />
<package name="kotlinx.android.synthetic" alias="false" withSubpackages="true" />
<package name="io.ktor" alias="false" withSubpackages="true" />
</value>
</option>
<option name="PACKAGES_IMPORT_LAYOUT">
<value>
<package name="" alias="false" withSubpackages="true" />
<package name="java" alias="false" withSubpackages="true" />
<package name="javax" alias="false" withSubpackages="true" />
<package name="kotlin" alias="false" withSubpackages="true" />
<package name="" alias="true" withSubpackages="true" />
</value>
</option>
<option name="SPACE_BEFORE_TYPE_COLON" value="true" />
<option name="CODE_STYLE_DEFAULTS" value="KOTLIN_OFFICIAL" />
</JetCodeStyleSettings>

View File

@ -171,7 +171,7 @@
</inspection_tool>
<inspection_tool class="CheckedExceptionClass" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="ClangTidy" enabled="true" level="WARNING" enabled_by_default="true">
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-static-cast-downcast,cppcoreguidelines-slicing,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-headers,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err58-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-member-init,cppcoreguidelines-pro-type-static-cast-downcast,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
</inspection_tool>
<inspection_tool class="ClassComplexity" enabled="true" level="WARNING" enabled_by_default="true">
<option name="m_limit" value="80" />

View File

@ -67,12 +67,12 @@ add_library(skyline SHARED
${source_DIR}/skyline/audio/resampler.cpp
${source_DIR}/skyline/audio/adpcm_decoder.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/gpu/macro_interpreter.cpp
${source_DIR}/skyline/gpu/memory_manager.cpp
${source_DIR}/skyline/gpu/gpfifo.cpp
${source_DIR}/skyline/gpu/syncpoint.cpp
${source_DIR}/skyline/gpu/texture.cpp
${source_DIR}/skyline/gpu/engines/maxwell_3d.cpp
${source_DIR}/skyline/soc/gmmu.cpp
${source_DIR}/skyline/soc/host1x/syncpoint.cpp
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
${source_DIR}/skyline/input/npad.cpp
${source_DIR}/skyline/input/npad_device.cpp
${source_DIR}/skyline/input/touch.cpp

View File

@ -4,6 +4,7 @@
#include <android/log.h>
#include "common.h"
#include "nce.h"
#include "soc.h"
#include "gpu.h"
#include "audio.h"
#include "input.h"
@ -55,6 +56,7 @@ namespace skyline {
DeviceState::DeviceState(kernel::OS *os, std::shared_ptr<JvmManager> jvmManager, std::shared_ptr<Settings> settings, std::shared_ptr<Logger> logger)
: os(os), jvm(std::move(jvmManager)), settings(std::move(settings)), logger(std::move(logger)) {
// We assign these later as they use the state in their constructor and we don't want null pointers
soc = std::make_shared<soc::SOC>(*this);
gpu = std::make_shared<gpu::GPU>(*this);
audio = std::make_shared<audio::Audio>(*this);
nce = std::make_shared<nce::NCE>(*this);

View File

@ -27,7 +27,7 @@
#include <frozen/string.h>
#include <jni.h>
#define FORCE_INLINE __attribute__((always_inline)) inline // NOLINT(cppcoreguidelines-macro-usage)
#define FORCE_INLINE __attribute__((always_inline)) // NOLINT(cppcoreguidelines-macro-usage)
namespace fmt {
/**
@ -605,6 +605,9 @@ namespace skyline {
struct ThreadContext;
}
class JvmManager;
namespace soc {
class SOC;
}
namespace gpu {
class GPU;
}
@ -637,6 +640,7 @@ namespace skyline {
std::shared_ptr<Settings> settings;
std::shared_ptr<Logger> logger;
std::shared_ptr<loader::Loader> loader;
std::shared_ptr<soc::SOC> soc;
std::shared_ptr<gpu::GPU> gpu;
std::shared_ptr<audio::Audio> audio;
std::shared_ptr<nce::NCE> nce;

View File

@ -3,30 +3,16 @@
#pragma once
#include "gpu/gpfifo.h"
#include "gpu/syncpoint.h"
#include "gpu/engines/maxwell_3d.h"
#include "gpu/presentation_engine.h"
namespace skyline::gpu {
/**
* @brief A common interfaces to the GPU where all objects relevant to it are present
* @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this
*/
class GPU {
private:
const DeviceState &state;
public:
PresentationEngine presentation;
vmm::MemoryManager memoryManager;
std::shared_ptr<engine::Engine> fermi2D;
std::shared_ptr<engine::Maxwell3D> maxwell3D;
std::shared_ptr<engine::Engine> maxwellCompute;
std::shared_ptr<engine::Engine> maxwellDma;
std::shared_ptr<engine::Engine> keplerMemory;
std::array<Syncpoint, constant::MaxHwSyncpointCount> syncpoints{};
gpfifo::GPFIFO gpfifo;
GPU(const DeviceState &state) : state(state), presentation(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared<engine::Engine>(state)), keplerMemory(std::make_shared<engine::Engine>(state)), maxwell3D(std::make_shared<engine::Maxwell3D>(state)), maxwellCompute(std::make_shared<engine::Engine>(state)), maxwellDma(std::make_shared<engine::Engine>(state)) {}
GPU(const DeviceState &state) : presentation(state) {}
};
}

View File

@ -1,179 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "engine.h"
namespace skyline {
namespace constant {
constexpr u32 GpfifoRegisterCount{0x40}; //!< The number of GPFIFO registers
}
namespace gpu::engine {
/**
* @brief The GPFIFO engine handles managing macros and semaphores
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
*/
class GPFIFO : public Engine {
private:
/**
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
*/
#pragma pack(push, 1)
union Registers {
std::array<u32, constant::GpfifoRegisterCount> raw;
enum class SemaphoreOperation : u8 {
Acquire = 1,
Release = 2,
AcqGeq = 4,
AcqAnd = 8,
Reduction = 16,
};
enum class SemaphoreAcquireSwitch : u8 {
Disabled = 0,
Enabled = 1,
};
enum class SemaphoreReleaseWfi : u8 {
En = 0,
Dis = 1,
};
enum class SemaphoreReleaseSize : u8 {
SixteenBytes = 0,
FourBytes = 1,
};
enum class SemaphoreReduction : u8 {
Min = 0,
Max = 1,
Xor = 2,
And = 3,
Or = 4,
Add = 5,
Inc = 6,
Dec = 7,
};
enum class SemaphoreFormat : u8 {
Signed = 0,
Unsigned = 1,
};
enum class MemOpTlbInvalidatePdb : u8 {
One = 0,
All = 1,
};
enum class SyncpointOperation : u8 {
Wait = 0,
Incr = 1,
};
enum class SyncpointWaitSwitch : u8 {
Dis = 0,
En = 1,
};
enum class WfiScope : u8 {
CurrentScgType = 0,
All = 1,
};
enum class YieldOp : u8 {
Nop = 0,
PbdmaTimeslice = 1,
RunlistTimeslice = 2,
Tsg = 3,
};
struct {
struct {
u16 nvClass : 16;
u8 engine : 5;
u16 _pad_ : 11;
} setObject;
u32 illegal;
u32 nop;
u32 _pad0_;
struct {
struct {
u32 offsetUpper : 8;
u32 _pad0_ : 24;
};
struct {
u8 _pad1_ : 2;
u32 offsetLower : 30;
};
u32 payload;
struct {
SemaphoreOperation operation : 5;
u8 _pad2_ : 7;
SemaphoreAcquireSwitch acquireSwitch : 1;
u8 _pad3_ : 7;
SemaphoreReleaseWfi releaseWfi : 1;
u8 _pad4_ : 3;
SemaphoreReleaseSize releaseSize : 1;
u8 _pad5_ : 2;
SemaphoreReduction reduction : 4;
SemaphoreFormat format : 1;
};
} semaphore;
u32 nonStallInterrupt;
u32 fbFlush;
u32 _pad1_[2];
u32 memOpC;
u32 memOpD;
u32 _pad2_[6];
u32 setReference;
u32 _pad3_[7];
struct {
u32 payload;
struct {
SyncpointOperation operation : 1;
u8 _pad0_ : 3;
SyncpointWaitSwitch waitSwitch : 1;
u8 _pad1_ : 3;
u16 index : 12;
u16 _pad2_ : 12;
};
} syncpoint;
struct {
WfiScope scope : 1;
u32 _pad_ : 31;
} wfi;
u32 crcCheck;
struct {
YieldOp op : 2;
u32 _pad_ : 30;
} yield;
};
} registers{};
static_assert(sizeof(Registers) == (constant::GpfifoRegisterCount * sizeof(u32)));
#pragma pack(pop)
public:
GPFIFO(const DeviceState &state) : Engine(state) {}
void CallMethod(MethodParams params) override {
state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
registers.raw[params.method] = params.argument;
};
};
}
}

View File

@ -1,575 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <gpu/macro_interpreter.h>
#include "engine.h"
#define MAXWELL3D_OFFSET(field) U32_OFFSET(skyline::gpu::engine::Maxwell3D::Registers, field)
namespace skyline {
namespace constant {
constexpr u32 Maxwell3DRegisterCounter{0xE00}; //!< The number of Maxwell 3D registers
}
namespace gpu::engine {
/**
* @brief The Maxwell 3D engine handles processing 3D graphics
*/
class Maxwell3D : public Engine {
private:
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
struct {
u32 index;
std::vector<u32> arguments;
} macroInvocation{}; //!< Data for a macro that is pending execution
MacroInterpreter macroInterpreter;
void HandleSemaphoreCounterOperation();
void WriteSemaphoreResult(u64 result);
public:
/**
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
*/
#pragma pack(push, 1)
union Registers {
std::array<u32, constant::Maxwell3DRegisterCounter> raw;
struct Address {
u32 high;
u32 low;
u64 Pack() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
enum class MmeShadowRamControl : u32 {
MethodTrack = 0,
MethodTrackWithFilter = 1,
MethodPassthrough = 2,
MethodReplay = 3,
};
struct ViewportTransform {
enum class Swizzle : u8 {
PositiveX = 0,
NegativeX = 1,
PositiveY = 2,
NegativeY = 3,
PositiveZ = 4,
NegativeZ = 5,
PositiveW = 6,
NegativeW = 7,
};
float scaleX;
float scaleY;
float scaleZ;
float translateX;
float translateY;
float translateZ;
struct {
Swizzle x : 3;
u8 _pad0_ : 1;
Swizzle y : 3;
u8 _pad1_ : 1;
Swizzle z : 3;
u8 _pad2_ : 1;
Swizzle w : 3;
u32 _pad3_ : 17;
} swizzles;
struct {
u8 x : 5;
u8 _pad0_ : 3;
u8 y : 5;
u32 _pad1_ : 19;
} subpixelPrecisionBias;
};
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
struct Viewport {
struct {
u16 x;
u16 width;
};
struct {
u16 y;
u16 height;
};
float depthRangeNear;
float depthRangeFar;
};
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
enum class PolygonMode : u32 {
Point = 0x1B00,
Line = 0x1B01,
Fill = 0x1B02,
};
union VertexAttribute {
u32 raw;
enum class Size : u8 {
Size_1x32 = 0x12,
Size_2x32 = 0x04,
Size_3x32 = 0x02,
Size_4x32 = 0x01,
Size_1x16 = 0x1B,
Size_2x16 = 0x0F,
Size_3x16 = 0x05,
Size_4x16 = 0x03,
Size_1x8 = 0x1D,
Size_2x8 = 0x18,
Size_3x8 = 0x13,
Size_4x8 = 0x0A,
Size_10_10_10_2 = 0x30,
Size_11_11_10 = 0x31,
};
enum class Type : u8 {
None = 0,
SNorm = 1,
UNorm = 2,
SInt = 3,
UInt = 4,
UScaled = 5,
SScaled = 6,
Float = 7,
};
struct {
u8 bufferId : 5;
u8 _pad0_ : 1;
bool fixed : 1;
u16 offset : 14;
Size size : 6;
Type type : 3;
u8 _pad1_ : 1;
bool bgra : 1;
};
};
static_assert(sizeof(VertexAttribute) == sizeof(u32));
enum class CompareOp : u32 {
Never = 1,
Less = 2,
Equal = 3,
LessOrEqual = 4,
Greater = 5,
NotEqual = 6,
GreaterOrEqual = 7,
Always = 8,
NeverGL = 0x200,
LessGL = 0x201,
EqualGL = 0x202,
LessOrEqualGL = 0x203,
GreaterGL = 0x204,
NotEqualGL = 0x205,
GreaterOrEqualGL = 0x206,
AlwaysGL = 0x207,
};
struct Blend {
enum class Op : u32 {
Add = 1,
Subtract = 2,
ReverseSubtract = 3,
Minimum = 4,
Maximum = 5,
AddGL = 0x8006,
SubtractGL = 0x8007,
ReverseSubtractGL = 0x8008,
MinimumGL = 0x800A,
MaximumGL = 0x800B,
};
enum class Factor : u32 {
Zero = 0x1,
One = 0x2,
SourceColor = 0x3,
OneMinusSourceColor = 0x4,
SourceAlpha = 0x5,
OneMinusSourceAlpha = 0x6,
DestAlpha = 0x7,
OneMinusDestAlpha = 0x8,
DestColor = 0x9,
OneMinusDestColor = 0xA,
SourceAlphaSaturate = 0xB,
Source1Color = 0x10,
OneMinusSource1Color = 0x11,
Source1Alpha = 0x12,
OneMinusSource1Alpha = 0x13,
ConstantColor = 0x61,
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
ZeroGL = 0x4000,
OneGL = 0x4001,
SourceColorGL = 0x4300,
OneMinusSourceColorGL = 0x4301,
SourceAlphaGL = 0x4302,
OneMinusSourceAlphaGL = 0x4303,
DestAlphaGL = 0x4304,
OneMinusDestAlphaGL = 0x4305,
DestColorGL = 0x4306,
OneMinusDestColorGL = 0x4307,
SourceAlphaSaturateGL = 0x4308,
ConstantColorGL = 0xC001,
OneMinusConstantColorGL = 0xC002,
ConstantAlphaGL = 0xC003,
OneMinusConstantAlphaGL = 0xC004,
Source1ColorGL = 0xC900,
OneMinusSource1ColorGL = 0xC901,
Source1AlphaGL = 0xC902,
OneMinusSource1AlphaGL = 0xC903,
};
struct {
u32 seperateAlpha;
Op colorOp;
Factor colorSrcFactor;
Factor colorDestFactor;
Op alphaOp;
Factor alphaSrcFactor;
Factor alphaDestFactor;
u32 _pad_;
};
};
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
enum class StencilOp : u32 {
Keep = 1,
Zero = 2,
Replace = 3,
IncrementAndClamp = 4,
DecrementAndClamp = 5,
Invert = 6,
IncrementAndWrap = 7,
DecrementAndWrap = 8,
};
enum class FrontFace : u32 {
Clockwise = 0x900,
CounterClockwise = 0x901,
};
enum class CullFace : u32 {
Front = 0x404,
Back = 0x405,
FrontAndBack = 0x408,
};
union ColorWriteMask {
u32 raw;
struct {
u8 r : 4;
u8 g : 4;
u8 b : 4;
u8 a : 4;
};
};
static_assert(sizeof(ColorWriteMask) == sizeof(u32));
struct SemaphoreInfo {
enum class Op : u8 {
Release = 0,
Acquire = 1,
Counter = 2,
Trap = 3,
};
enum class ReductionOp : u8 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
};
enum class Unit : u8 {
VFetch = 1,
VP = 2,
Rast = 4,
StrmOut = 5,
GP = 6,
ZCull = 7,
Prop = 10,
Crop = 15,
};
enum class SyncCondition : u8 {
NotEqual = 0,
GreaterThan = 1,
};
enum class Format : u8 {
U32 = 0,
I32 = 1,
};
enum class CounterType : u8 {
Zero = 0x0,
InputVertices = 0x1,
InputPrimitives = 0x3,
VertexShaderInvocations = 0x5,
GeometryShaderInvocations = 0x7,
GeometryShaderPrimitives = 0x9,
ZcullStats0 = 0xA,
TransformFeedbackPrimitivesWritten = 0xB,
ZcullStats1 = 0xC,
ZcullStats2 = 0xE,
ClipperInputPrimitives = 0xF,
ZcullStats3 = 0x10,
ClipperOutputPrimitives = 0x11,
PrimitivesGenerated = 0x12,
FragmentShaderInvocations = 0x13,
SamplesPassed = 0x15,
TransformFeedbackOffset = 0x1A,
TessControlShaderInvocations = 0x1B,
TessEvaluationShaderInvocations = 0x1D,
TessEvaluationShaderPrimitives = 0x1F,
};
enum class StructureSize : u8 {
FourWords = 0,
OneWord = 1,
};
Op op : 2;
bool flushDisable : 1;
bool reductionEnable : 1;
bool fenceEnable : 1;
u8 _pad0_ : 4;
ReductionOp reductionOp : 3;
Unit unit : 4;
SyncCondition syncCondition : 1;
Format format : 2;
u8 _pad1_ : 1;
bool awakenEnable : 1;
u8 _pad2_ : 2;
CounterType counterType : 5;
StructureSize structureSize : 1;
};
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
enum class CoordOrigin : u8 {
LowerLeft = 0,
UpperLeft = 1,
};
struct {
u32 _pad0_[0x40]; // 0x0
u32 noOperation; // 0x40
u32 _pad1_[0x3]; // 0x41
u32 waitForIdle; // 0x44
struct {
u32 instructionRamPointer; // 0x45
u32 instructionRamLoad; // 0x46
u32 startAddressRamPointer; // 0x47
u32 startAddressRamLoad; // 0x48
MmeShadowRamControl shadowRamControl; // 0x49
} mme;
u32 _pad2_[0x68]; // 0x4A
struct {
u16 id : 12;
u8 _pad0_ : 4;
bool flushCache : 1;
u8 _pad1_ : 3;
bool increment : 1;
u16 _pad2_ : 11;
} syncpointAction; // 0xB2
u32 _pad3_[0x2C]; // 0xB3
u32 rasterizerEnable; // 0xDF
u32 _pad4_[0x1A0]; // 0xE0
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
std::array<Viewport, 0x10> viewport; // 0x300
u32 _pad5_[0x2B]; // 0x340
struct {
PolygonMode front; // 0x36B
PolygonMode back; // 0x36C
} polygonMode;
u32 _pad6_[0x68]; // 0x36D
struct {
u32 compareRef; // 0x3D5
u32 writeMask; // 0x3D6
u32 compareMask; // 0x3D7
} stencilBackExtra;
u32 _pad7_[0x13]; // 0x3D8
u32 rtSeparateFragData; // 0x3EB
u32 _pad8_[0x6C]; // 0x3EC
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
u32 _pad9_[0x4B]; // 0x478
CompareOp depthTestFunc; // 0x4C3
float alphaTestRef; // 0x4C4
CompareOp alphaTestFunc; // 0x4C5
u32 drawTFBStride; // 0x4C6
struct {
float r; // 0x4C7
float g; // 0x4C8
float b; // 0x4C9
float a; // 0x4CA
} blendConstant;
u32 _pad10_[0x4]; // 0x4CB
struct {
u32 seperateAlpha; // 0x4CF
Blend::Op colorOp; // 0x4D0
Blend::Factor colorSrcFactor; // 0x4D1
Blend::Factor colorDestFactor; // 0x4D2
Blend::Op alphaOp; // 0x4D3
Blend::Factor alphaSrcFactor; // 0x4D4
u32 _pad_; // 0x4D5
Blend::Factor alphaDestFactor; // 0x4D6
u32 enableCommon; // 0x4D7
std::array<u32, 8> enable; // 0x4D8 For each render target
} blend;
u32 stencilEnable; // 0x4E0
struct {
StencilOp failOp; // 0x4E1
StencilOp zFailOp; // 0x4E2
StencilOp zPassOp; // 0x4E3
struct {
CompareOp op; // 0x4E4
i32 ref; // 0x4E5
u32 mask; // 0x4E6
} compare;
u32 writeMask; // 0x4E7
} stencilFront;
u32 _pad11_[0x4]; // 0x4E8
float lineWidthSmooth; // 0x4EC
float lineWidthAliased; // 0x4D
u32 _pad12_[0x1F]; // 0x4EE
u32 drawBaseVertex; // 0x50D
u32 drawBaseInstance; // 0x50E
u32 _pad13_[0x35]; // 0x50F
u32 clipDistanceEnable; // 0x544
u32 sampleCounterEnable; // 0x545
float pointSpriteSize; // 0x546
u32 zCullStatCountersEnable; // 0x547
u32 pointSpriteEnable; // 0x548
u32 _pad14_; // 0x549
u32 shaderExceptions; // 0x54A
u32 _pad15_[0x2]; // 0x54B
u32 multisampleEnable; // 0x54D
u32 depthTargetEnable; // 0x54E
struct {
bool alphaToCoverage : 1;
u8 _pad0_ : 3;
bool alphaToOne : 1;
u32 _pad1_ : 27;
} multisampleControl; // 0x54F
u32 _pad16_[0x7]; // 0x550
struct {
Address address; // 0x557
u32 maximumIndex; // 0x559
} texSamplerPool;
u32 _pad17_; // 0x55A
u32 polygonOffsetFactor; // 0x55B
u32 lineSmoothEnable; // 0x55C
struct {
Address address; // 0x55D
u32 maximumIndex; // 0x55F
} texHeaderPool;
u32 _pad18_[0x5]; // 0x560
u32 stencilTwoSideEnable; // 0x565
struct {
StencilOp failOp; // 0x566
StencilOp zFailOp; // 0x567
StencilOp zPassOp; // 0x568
CompareOp compareOp; // 0x569
} stencilBack;
u32 _pad19_[0x17]; // 0x56A
struct {
u8 _unk_ : 2;
CoordOrigin origin : 1;
u16 enable : 10;
u32 _pad_ : 19;
} pointCoordReplace; // 0x581
u32 _pad20_[0xC4]; // 0x582
u32 cullFaceEnable; // 0x646
FrontFace frontFace; // 0x647
CullFace cullFace; // 0x648
u32 pixelCentreImage; // 0x649
u32 _pad21_; // 0x64A
u32 viewportTransformEnable; // 0x64B
u32 _pad22_[0x34]; // 0x64A
std::array<ColorWriteMask, 8> colorMask; // 0x680 For each render target
u32 _pad23_[0x38]; // 0x688
struct {
Address address; // 0x6C0
u32 payload; // 0x6C2
SemaphoreInfo info; // 0x6C3
} semaphore;
u32 _pad24_[0xBC]; // 0x6C4
std::array<Blend, 8> independentBlend; // 0x780 For each render target
u32 _pad25_[0x100]; // 0x7C0
u32 firmwareCall[0x20]; // 0x8C0
};
};
static_assert(sizeof(Registers) == (constant::Maxwell3DRegisterCounter * sizeof(u32)));
#pragma pack(pop)
Registers registers{};
Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
std::array<u32, 0x10000> macroCode{}; //!< This stores GPU macros, the 256kb size is from Ryujinx
Maxwell3D(const DeviceState &state);
/**
* @brief Resets the Maxwell 3D registers to their default values
*/
void ResetRegs();
void CallMethod(MethodParams params) override;
};
}
}

View File

@ -1,170 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common/circular_queue.h>
#include "engines/gpfifo.h"
#include "memory_manager.h"
namespace skyline::gpu {
namespace gpfifo {
/**
* @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
*/
struct GpEntry {
enum class Fetch : u8 {
Unconditional = 0,
Conditional = 1,
};
union {
u32 entry0;
struct {
Fetch fetch : 1;
u8 _pad_ : 1;
u32 get : 30;
};
};
enum class Opcode : u8 {
Nop = 0,
Illegal = 1,
Crc = 2,
PbCrc = 3,
};
enum class Priv : u8 {
User = 0,
Kernel = 1,
};
enum class Level : u8 {
Main = 0,
Subroutine = 1,
};
enum class Sync : u8 {
Proceed = 0,
Wait = 1,
};
union {
u32 entry1;
struct {
union {
u8 getHi;
Opcode opcode;
};
Priv priv : 1;
Level level : 1;
u32 size : 21;
Sync sync : 1;
};
};
constexpr u64 Address() const {
return (static_cast<u64>(getHi) << 32) | (static_cast<u64>(get) << 2);
}
};
static_assert(sizeof(GpEntry) == sizeof(u64));
/**
* @brief A single pushbuffer method header that describes a compressed method sequence
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
*/
union PushBufferMethodHeader {
u32 raw;
enum class TertOp : u8 {
Grp0IncMethod = 0,
Grp0SetSubDevMask = 1,
Grp0StoreSubDevMask = 2,
Grp0UseSubDevMask = 3,
Grp2NonIncMethod = 0,
};
enum class SecOp : u8 {
Grp0UseTert = 0,
IncMethod = 1,
Grp2UseTert = 2,
NonIncMethod = 3,
ImmdDataMethod = 4,
OneInc = 5,
Reserved6 = 6,
EndPbSegment = 7,
};
u16 methodAddress : 12;
struct {
u8 _pad0_ : 4;
u16 subDeviceMask : 12;
};
struct {
u16 _pad1_ : 13;
u8 methodSubChannel : 3;
union {
TertOp tertOp : 3;
u16 methodCount : 13;
u16 immdData : 13;
};
};
struct {
u32 _pad2_ : 29;
SecOp secOp : 3;
};
};
static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
/**
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/
class GPFIFO {
const DeviceState &state;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
std::array<std::shared_ptr<engine::Engine>, 8> subchannels;
std::optional<CircularQueue<GpEntry>> pushBuffers;
std::thread thread; //!< The thread that manages processing of pushbuffers
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
/**
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
*/
void Process(GpEntry gpEntry);
/**
* @brief Sends a method call to the GPU hardware
*/
void Send(MethodParams params);
public:
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
~GPFIFO();
/**
* @param numBuffers The amount of push-buffers to allocate in the circular buffer
*/
void Initialize(size_t numBuffers);
/**
* @brief Executes all pending entries in the FIFO
*/
void Run();
/**
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
*/
void Push(span<GpEntry> entries);
};
}
}

View File

@ -1,145 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline {
namespace constant {
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
}
namespace gpu::vmm {
enum class ChunkState {
Unmapped, //!< The chunk is unmapped
Reserved, //!< The chunk is reserved
Mapped //!< The chunk is mapped and a CPU side address is present
};
struct ChunkDescriptor {
u64 virtAddr; //!< The address of the chunk in the virtual address space
u64 size; //!< The size of the chunk in bytes
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
ChunkState state;
ChunkDescriptor(u64 virtAddr, u64 size, u8 *cpuPtr, ChunkState state) : virtAddr(virtAddr), size(size), cpuPtr(cpuPtr), state(state) {}
/**
* @return If the given chunk can be contained wholly within this chunk
*/
inline bool CanContain(const ChunkDescriptor &chunk) {
return (chunk.virtAddr >= virtAddr) && ((size + virtAddr) >= (chunk.size + chunk.virtAddr));
}
};
/**
* @brief The MemoryManager class handles mapping between a virtual address space and an application's address space
*/
class MemoryManager {
private:
const DeviceState &state;
std::vector<ChunkDescriptor> chunks;
std::shared_mutex vmmMutex;
/**
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
* @note vmmMutex MUST be locked when calling this
* @param desiredState The state of the chunk to find
* @param size The minimum size of the chunk to find
* @param alignment The minimum alignment of the chunk to find
* @return The first applicable chunk
*/
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
/**
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
* @note vmmMutex MUST be locked when calling this
* @param newChunk The chunk to insert
* @return The base virtual address of the inserted chunk
*/
u64 InsertChunk(const ChunkDescriptor &newChunk);
public:
MemoryManager(const DeviceState &state);
/**
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
* @param size The size of the region to reserve
* @param alignment The alignment of the region to reserve
* @return The base virtual address of the reserved region
*/
u64 ReserveSpace(u64 size, u64 alignment);
/**
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
* @param virtAddr The virtual base address of the region to allocate
* @param size The size of the region to allocate
* @return The base virtual address of the reserved region
*/
u64 ReserveFixed(u64 virtAddr, u64 size);
/**
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
*/
u64 MapAllocate(u8 *cpuPtr, u64 size);
/**
* @brief Maps a CPU memory region to a fixed region in the virtual address space
* @param virtAddr The target virtual address of the region
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
*/
u64 MapFixed(u64 virtAddr, u8 *cpuPtr, u64 size);
/**
* @brief Unmaps all chunks in the given region from the virtual address space
* @return Whether the operation succeeded
*/
bool Unmap(u64 virtAddr, u64 size);
void Read(u8 *destination, u64 virtAddr, u64 size);
/**
* @brief Reads in a span from a region of the virtual address space
*/
template<typename T>
void Read(span <T> destination, u64 virtAddr) {
Read(reinterpret_cast<u8 *>(destination.data()), virtAddr, destination.size_bytes());
}
/**
* @brief Reads in an object from a region of the virtual address space
* @tparam T The type of object to return
*/
template<typename T>
T Read(u64 virtAddr) {
T obj;
Read(reinterpret_cast<u8 *>(&obj), virtAddr, sizeof(T));
return obj;
}
void Write(u8 *source, u64 virtAddr, u64 size);
/**
* @brief Writes out a span to a region of the virtual address space
*/
template<typename T>
void Write(span <T> source, u64 virtAddr) {
Write(reinterpret_cast<u8 *>(source.data()), virtAddr, source.size_bytes());
}
/**
* @brief Reads in an object from a region of the virtual address space
*/
template<typename T>
void Write(T source, u64 virtAddr) {
Write(reinterpret_cast<u8 *>(&source), virtAddr, sizeof(T));
}
};
}
}

View File

@ -9,7 +9,7 @@ extern skyline::u16 Fps;
extern skyline::u32 FrameTime;
namespace skyline::gpu {
PresentationEngine::PresentationEngine(const DeviceState &state) : state(state), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<uint64_t>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
PresentationEngine::PresentationEngine(const DeviceState &state) : state(state), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
auto desc{presentationTrack.Serialize()};
desc.set_name("Presentation");
perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc);

View File

@ -1,56 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline {
namespace constant {
constexpr size_t MaxHwSyncpointCount{192}; //!< The maximum number of host1x syncpoints on T210
}
namespace gpu {
/**
* @brief The Syncpoint class represents a single syncpoint in the GPU which is used for GPU -> CPU synchronisation
*/
class Syncpoint {
private:
struct Waiter {
u32 threshold; //!< The syncpoint value to wait on to be reached
std::function<void()> callback; //!< The callback to do after the wait has ended
};
std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
std::map<u64, Waiter> waiterMap;
u64 nextWaiterId{1};
public:
std::atomic<u32> value{};
/**
* @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
* @note The callback will be called immediately if the syncpoint has already reached the given threshold
* @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
*/
u64 RegisterWaiter(u32 threshold, const std::function<void()> &callback);
/**
* @brief Removes a waiter given by 'id' from the pending waiter map
*/
void DeregisterWaiter(u64 id);
/**
* @brief Increments the syncpoint by 1
* @return The new value of the syncpoint
*/
u32 Increment();
/**
* @brief Waits for the syncpoint to reach given threshold
* @return false if the timeout was reached, otherwise true
*/
bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
};
}
}

View File

@ -1,7 +1,7 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <soc.h>
#include <services/nvdrv/driver.h>
#include "nvmap.h"
#include "nvhost_as_gpu.h"
@ -36,9 +36,9 @@ namespace skyline::service::nvdrv::device {
u64 size{static_cast<u64>(region.pages) * static_cast<u64>(region.pageSize)};
if (region.flags.fixed)
region.offset = state.gpu->memoryManager.ReserveFixed(region.offset, size);
region.offset = state.soc->gmmu.ReserveFixed(region.offset, size);
else
region.offset = state.gpu->memoryManager.ReserveSpace(size, region.align);
region.offset = state.soc->gmmu.ReserveSpace(size, region.align);
if (region.offset == 0) {
state.logger->Warn("Failed to allocate GPU address space region!");
@ -56,7 +56,7 @@ namespace skyline::service::nvdrv::device {
// Non-fixed regions are unmapped so that they can be used by future non-fixed mappings
if (!region.fixed)
if (!state.gpu->memoryManager.Unmap(offset, region.size))
if (!state.soc->gmmu.Unmap(offset, region.size))
state.logger->Warn("Failed to unmap region at 0x{:X}", offset);
regionMap.erase(offset);
@ -94,7 +94,7 @@ namespace skyline::service::nvdrv::device {
u64 gpuAddress{data.offset + data.bufferOffset};
u8 *cpuPtr{region->second.ptr + data.bufferOffset};
if (!state.gpu->memoryManager.MapFixed(gpuAddress, cpuPtr, data.mappingSize)) {
if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, data.mappingSize)) {
state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress);
return NvStatus::BadParameter;
}
@ -110,9 +110,9 @@ namespace skyline::service::nvdrv::device {
u64 size{data.mappingSize ? data.mappingSize : mapping->size};
if (data.flags.fixed)
data.offset = state.gpu->memoryManager.MapFixed(data.offset, cpuPtr, size);
data.offset = state.soc->gmmu.MapFixed(data.offset, cpuPtr, size);
else
data.offset = state.gpu->memoryManager.MapAllocate(cpuPtr, size);
data.offset = state.soc->gmmu.MapAllocate(cpuPtr, size);
if (data.offset == 0) {
state.logger->Warn("Failed to map GPU address space region!");
@ -184,7 +184,7 @@ namespace skyline::service::nvdrv::device {
u8 *cpuPtr{mapping->ptr + (static_cast<u64>(entry.mapOffset) << MinAlignmentShift)};
u64 size{static_cast<u64>(entry.pages) << MinAlignmentShift};
state.gpu->memoryManager.MapFixed(virtAddr, cpuPtr, size);
state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size);
} catch (const std::out_of_range &) {
state.logger->Warn("Invalid NvMap handle: 0x{:X}", entry.nvmapHandle);
return NvStatus::BadParameter;

View File

@ -1,8 +1,8 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <soc.h>
#include <kernel/types/KProcess.h>
#include <gpu.h>
#include <services/nvdrv/driver.h>
#include "nvhost_channel.h"
@ -25,7 +25,7 @@ namespace skyline::service::nvdrv::device {
NvStatus NvHostChannel::SubmitGpfifo(IoctlType type, span<u8> buffer, span<u8> inlineBuffer) {
struct Data {
gpu::gpfifo::GpEntry *entries; // In
soc::gm20b::GpEntry *entries; // In
u32 numEntries; // In
union {
struct __attribute__((__packed__)) {
@ -53,9 +53,9 @@ namespace skyline::service::nvdrv::device {
throw exception("Waiting on a fence through SubmitGpfifo is unimplemented");
}
state.gpu->gpfifo.Push([&]() {
state.soc->gm20b.gpfifo.Push([&]() {
if (type == IoctlType::Ioctl2)
return inlineBuffer.cast<gpu::gpfifo::GpEntry>();
return inlineBuffer.cast<soc::gm20b::GpEntry>();
else
return span(data.entries, data.numEntries);
}());
@ -110,7 +110,7 @@ namespace skyline::service::nvdrv::device {
u32 _res_[3]; // In
} &data = buffer.as<Data>();
state.gpu->gpfifo.Initialize(data.numEntries);
state.soc->gm20b.gpfifo.Initialize(data.numEntries);
auto driver{nvdrv::driver.lock()};
channelFence.UpdateValue(driver->hostSyncpoint);

View File

@ -2,7 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2019-2020 Ryujinx Team and Contributors
#include <gpu.h>
#include <soc.h>
#include <kernel/types/KProcess.h>
#include <services/nvdrv/driver.h>
#include "nvhost_ctrl.h"
@ -46,20 +46,20 @@ namespace skyline::service::nvdrv::device {
state = State::Signalled;
}
void SyncpointEvent::Cancel(const std::shared_ptr<gpu::GPU> &gpuState) {
void SyncpointEvent::Cancel(soc::host1x::Host1X &host1x) {
std::lock_guard lock(mutex);
gpuState->syncpoints.at(fence.id).DeregisterWaiter(waiterId);
host1x.syncpoints.at(fence.id).DeregisterWaiter(waiterId);
Signal();
event->ResetSignal();
}
void SyncpointEvent::Wait(const std::shared_ptr<gpu::GPU> &gpuState, const Fence &pFence) {
void SyncpointEvent::Wait(soc::host1x::Host1X &host1x, const Fence &pFence) {
std::lock_guard lock(mutex);
fence = pFence;
state = State::Waiting;
waiterId = gpuState->syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
waiterId = host1x.syncpoints.at(fence.id).RegisterWaiter(fence.value, [this] { Signal(); });
}
NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state) {}
@ -105,7 +105,7 @@ namespace skyline::service::nvdrv::device {
SyncpointEventValue value; // InOut
} &data = buffer.as<Data>();
if (data.fence.id >= constant::MaxHwSyncpointCount)
if (data.fence.id >= soc::host1x::SyncpointCount)
return NvStatus::BadValue;
if (data.timeout == 0)
@ -149,7 +149,7 @@ namespace skyline::service::nvdrv::device {
if (event->state == SyncpointEvent::State::Cancelled || event->state == SyncpointEvent::State::Available || event->state == SyncpointEvent::State::Signalled) {
state.logger->Debug("Waiting on syncpoint event: {} with fence: ({}, {})", eventSlot, data.fence.id, data.fence.value);
event->Wait(state.gpu, data.fence);
event->Wait(state.soc->host1x, data.fence);
data.value.val = 0;
@ -189,7 +189,7 @@ namespace skyline::service::nvdrv::device {
if (event->state == SyncpointEvent::State::Waiting) {
event->state = SyncpointEvent::State::Cancelling;
state.logger->Debug("Cancelling waiting syncpoint event: {}", eventSlot);
event->Cancel(state.gpu);
event->Cancel(state.soc->host1x);
}
event->state = SyncpointEvent::State::Cancelled;

View File

@ -41,12 +41,12 @@ namespace skyline {
/**
* @brief Removes any wait requests on a syncpoint event and resets its state
*/
void Cancel(const std::shared_ptr<gpu::GPU> &gpuState);
void Cancel(soc::host1x::Host1X &host1x);
/**
* @brief Asynchronously waits on a syncpoint event using the given fence
*/
void Wait(const std::shared_ptr<gpu::GPU> &gpuState, const Fence &fence);
void Wait(soc::host1x::Host1X &host1x, const Fence &fence);
};
/**

View File

@ -2,7 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <soc.h>
#include "nvhost_syncpoint.h"
namespace skyline::service::nvdrv {
@ -28,7 +28,7 @@ namespace skyline::service::nvdrv {
}
u32 NvHostSyncpoint::FindFreeSyncpoint() {
for (u32 i{1}; i < constant::MaxHwSyncpointCount; i++)
for (u32 i{1}; i < syncpoints.size(); i++)
if (!syncpoints[i].reserved)
return i;
@ -71,7 +71,7 @@ namespace skyline::service::nvdrv {
if (!syncpoints.at(id).reserved)
throw exception("Cannot update an unreserved syncpoint!");
syncpoints.at(id).counterMin = state.gpu->syncpoints.at(id).value.load();
syncpoints.at(id).counterMin = state.soc->host1x.syncpoints.at(id).value.load();
return syncpoints.at(id).counterMin;
}
}

View File

@ -3,7 +3,7 @@
#pragma once
#include <gpu/syncpoint.h>
#include <soc/host1x.h>
namespace skyline::service::nvdrv {
/**
@ -22,7 +22,7 @@ namespace skyline::service::nvdrv {
};
const DeviceState &state;
std::array<SyncpointInfo, skyline::constant::MaxHwSyncpointCount> syncpoints{};
std::array<SyncpointInfo, soc::host1x::SyncpointCount> syncpoints{};
std::mutex reservationLock;
/**

View File

@ -0,0 +1,23 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "soc/gmmu.h"
#include "soc/host1x.h"
#include "soc/gm20b.h"
namespace skyline::soc {
/**
* @brief An interface into all emulated components of the Tegra X1 SoC
* @note Refer to the Tegra X1 Processor Block Diagram (1.2) for more information
*/
class SOC {
public:
gmmu::GraphicsMemoryManager gmmu;
host1x::Host1X host1x;
gm20b::GM20B gm20b;
SOC(const DeviceState &state) : gmmu(state), gm20b(state) {}
};
}

View File

@ -0,0 +1,25 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "gm20b/engines/maxwell_3d.h"
#include "gm20b/gpfifo.h"
namespace skyline::soc::gm20b {
/**
* @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
* @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly
*/
class GM20B {
public:
engine::Engine fermi2D;
engine::maxwell3d::Maxwell3D maxwell3D;
engine::Engine maxwellCompute;
engine::Engine maxwellDma;
engine::Engine keplerMemory;
GPFIFO gpfifo;
GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {}
};
}

View File

@ -7,7 +7,7 @@
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
namespace skyline::gpu {
namespace skyline::soc::gm20b {
enum class EngineID {
Fermi2D = 0x902D,
KeplerMemory = 0xA140,

View File

@ -0,0 +1,176 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "engine.h"
namespace skyline::soc::gm20b::engine {
/**
* @brief The GPFIFO engine handles managing macros and semaphores
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
*/
class GPFIFO : public Engine {
public:
static constexpr u32 RegisterCount{0x40}; //!< The number of GPFIFO registers
private:
/**
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
*/
#pragma pack(push, 1)
union Registers {
std::array<u32, RegisterCount> raw;
enum class SemaphoreOperation : u8 {
Acquire = 1,
Release = 2,
AcqGeq = 4,
AcqAnd = 8,
Reduction = 16,
};
enum class SemaphoreAcquireSwitch : u8 {
Disabled = 0,
Enabled = 1,
};
enum class SemaphoreReleaseWfi : u8 {
En = 0,
Dis = 1,
};
enum class SemaphoreReleaseSize : u8 {
SixteenBytes = 0,
FourBytes = 1,
};
enum class SemaphoreReduction : u8 {
Min = 0,
Max = 1,
Xor = 2,
And = 3,
Or = 4,
Add = 5,
Inc = 6,
Dec = 7,
};
enum class SemaphoreFormat : u8 {
Signed = 0,
Unsigned = 1,
};
enum class MemOpTlbInvalidatePdb : u8 {
One = 0,
All = 1,
};
enum class SyncpointOperation : u8 {
Wait = 0,
Incr = 1,
};
enum class SyncpointWaitSwitch : u8 {
Dis = 0,
En = 1,
};
enum class WfiScope : u8 {
CurrentScgType = 0,
All = 1,
};
enum class YieldOp : u8 {
Nop = 0,
PbdmaTimeslice = 1,
RunlistTimeslice = 2,
Tsg = 3,
};
struct {
struct {
u16 nvClass : 16;
u8 engine : 5;
u16 _pad_ : 11;
} setObject;
u32 illegal;
u32 nop;
u32 _pad0_;
struct {
struct {
u32 offsetUpper : 8;
u32 _pad0_ : 24;
};
struct {
u8 _pad1_ : 2;
u32 offsetLower : 30;
};
u32 payload;
struct {
SemaphoreOperation operation : 5;
u8 _pad2_ : 7;
SemaphoreAcquireSwitch acquireSwitch : 1;
u8 _pad3_ : 7;
SemaphoreReleaseWfi releaseWfi : 1;
u8 _pad4_ : 3;
SemaphoreReleaseSize releaseSize : 1;
u8 _pad5_ : 2;
SemaphoreReduction reduction : 4;
SemaphoreFormat format : 1;
};
} semaphore;
u32 nonStallInterrupt;
u32 fbFlush;
u32 _pad1_[2];
u32 memOpC;
u32 memOpD;
u32 _pad2_[6];
u32 setReference;
u32 _pad3_[7];
struct {
u32 payload;
struct {
SyncpointOperation operation : 1;
u8 _pad0_ : 3;
SyncpointWaitSwitch waitSwitch : 1;
u8 _pad1_ : 3;
u16 index : 12;
u16 _pad2_ : 12;
};
} syncpoint;
struct {
WfiScope scope : 1;
u32 _pad_ : 31;
} wfi;
u32 crcCheck;
struct {
YieldOp op : 2;
u32 _pad_ : 30;
} yield;
};
} registers{};
static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
#pragma pack(pop)
public:
GPFIFO(const DeviceState &state) : Engine(state) {}
void CallMethod(MethodParams params) override {
state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
registers.raw[params.method] = params.argument;
};
};
}

View File

@ -1,11 +1,10 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "engines/maxwell_3d.h"
#include "memory_manager.h"
#include "macro_interpreter.h"
#include <soc/gmmu.h>
#include <soc/gm20b/engines/maxwell_3d.h>
namespace skyline::gpu {
namespace skyline::soc::gm20b::engine::maxwell3d {
void MacroInterpreter::Execute(size_t offset, const std::vector<u32> &args) {
// Reset the interpreter state
registers = {};
@ -28,9 +27,11 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::AddImmediate:
HandleAssignment(opcode->assignmentOperation, opcode->dest, registers[opcode->srcA] + opcode->immediate);
break;
case Opcode::Operation::BitfieldReplace: {
u32 src{registers[opcode->srcB]};
u32 dest{registers[opcode->srcA]};
@ -47,6 +48,7 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, dest);
break;
}
case Opcode::Operation::BitfieldExtractShiftLeftImmediate: {
u32 src{registers[opcode->srcB]};
u32 dest{registers[opcode->srcA]};
@ -56,6 +58,7 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::BitfieldExtractShiftLeftRegister: {
u32 src{registers[opcode->srcB]};
u32 dest{registers[opcode->srcA]};
@ -65,17 +68,19 @@ namespace skyline::gpu {
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::ReadImmediate: {
u32 result{maxwell3D.registers.raw[registers[opcode->srcA] + opcode->immediate]};
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
case Opcode::Operation::Branch: {
if (delayedOpcode != nullptr)
throw exception("Cannot branch while inside a delay slot");
u32 value{registers[opcode->srcA]};
bool branch{(opcode->branchCondition == Opcode::BranchCondition::Zero) ? (value == 0) : (value != 0)};
bool branch{(opcode->branchCondition == Opcode::BranchCondition::Zero) == (value == 0)};
if (branch) {
if (opcode->noDelay) {
@ -91,6 +96,7 @@ namespace skyline::gpu {
}
break;
}
default:
throw exception("Unknown MME opcode encountered: 0x{:X}", static_cast<u8>(opcode->operation));
}
@ -186,15 +192,14 @@ namespace skyline::gpu {
}
}
FORCE_INLINE void MacroInterpreter::Send(u32 argument) {
maxwell3D.CallMethod(MethodParams{methodAddress.address, argument, 0, true});
FORCE_INLINE void MacroInterpreter::Send(u32 pArgument) {
maxwell3D.CallMethod(MethodParams{methodAddress.address, pArgument, 0, true});
methodAddress.address += methodAddress.increment;
}
FORCE_INLINE void MacroInterpreter::WriteRegister(u8 reg, u32 value) {
// Register 0 should always be zero so block writes to it
if (reg == 0)
if (reg == 0) [[unlikely]]
return;
registers[reg] = value;

View File

@ -5,10 +5,8 @@
#include <common.h>
namespace skyline::gpu {
namespace engine {
class Maxwell3D;
}
namespace skyline::soc::gm20b::engine::maxwell3d {
class Maxwell3D; // A forward declaration of Maxwell3D as we don't want to import it here
/**
* @brief The MacroInterpreter class handles interpreting macros. Macros are small programs that run on the GPU and are used for things like instanced rendering.
@ -105,14 +103,13 @@ namespace skyline::gpu {
};
};
engine::Maxwell3D &maxwell3D;
Maxwell3D &maxwell3D; //!< A reference to the parent engine object
std::array<u32, 8> registers{};
Opcode *opcode{};
const u32 *argument{};
Opcode *opcode{}; //!< A pointer to the instruction that is currently being executed
std::array<u32, 8> registers{}; //!< The state of all the general-purpose registers in the macro interpreter
const u32 *argument{}; //!< A pointer to the argument buffer for the program, it is read from sequentially
MethodAddress methodAddress{};
bool carryFlag{};
bool carryFlag{}; //!< A flag representing if an arithmetic operation has set the most significant bit
/**
* @brief Steps forward one macro instruction, including delay slots
@ -135,10 +132,13 @@ namespace skyline::gpu {
*/
void Send(u32 argument);
/**
* @brief Writes to the specified register with sanity checking
*/
void WriteRegister(u8 reg, u32 value);
public:
MacroInterpreter(engine::Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
MacroInterpreter(Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
/**
* @brief Executes a GPU macro from macro memory with the given arguments

View File

@ -1,10 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include "maxwell_3d.h"
#include <soc.h>
namespace skyline::gpu::engine {
namespace skyline::soc::gm20b::engine::maxwell3d {
Maxwell3D::Maxwell3D(const DeviceState &state) : Engine(state), macroInterpreter(*this) {
ResetRegs();
}
@ -77,9 +76,9 @@ namespace skyline::gpu::engine {
state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", params.method, params.argument);
// Methods that are greater than the register size are for macro control
if (params.method > constant::Maxwell3DRegisterCounter) {
if (params.method > RegisterCount) {
if (!(params.method & 1))
macroInvocation.index = ((params.method - constant::Maxwell3DRegisterCounter) >> 1) % macroPositions.size();
macroInvocation.index = ((params.method - RegisterCount) >> 1) % macroPositions.size();
macroInvocation.arguments.push_back(params.argument);
@ -100,6 +99,8 @@ namespace skyline::gpu::engine {
else if (shadowRegisters.mme.shadowRamControl == Registers::MmeShadowRamControl::MethodReplay)
params.argument = shadowRegisters.raw[params.method];
#define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field)
switch (params.method) {
case MAXWELL3D_OFFSET(mme.instructionRamLoad):
if (registers.mme.instructionRamPointer >= macroCode.size())
@ -118,7 +119,7 @@ namespace skyline::gpu::engine {
break;
case MAXWELL3D_OFFSET(syncpointAction):
state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(registers.syncpointAction.id));
state.gpu->syncpoints.at(registers.syncpointAction.id).Increment();
state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment();
break;
case MAXWELL3D_OFFSET(semaphore.info):
switch (registers.semaphore.info.op) {
@ -137,6 +138,8 @@ namespace skyline::gpu::engine {
registers.raw[0xD00] = 1;
break;
}
#undef MAXWELL3D_OFFSET
}
void Maxwell3D::HandleSemaphoreCounterOperation() {
@ -158,7 +161,7 @@ namespace skyline::gpu::engine {
switch (registers.semaphore.info.structureSize) {
case Registers::SemaphoreInfo::StructureSize::OneWord:
state.gpu->memoryManager.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack());
state.soc->gmmu.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack());
break;
case Registers::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks
@ -168,7 +171,7 @@ namespace skyline::gpu::engine {
u64 nsTime{util::GetTimeNs()};
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
state.gpu->memoryManager.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
state.soc->gmmu.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
break;
}
}

View File

@ -0,0 +1,569 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "engine.h"
#include "maxwell/macro_interpreter.h"
namespace skyline::soc::gm20b::engine::maxwell3d {
/**
* @brief The Maxwell 3D engine handles processing 3D graphics
*/
class Maxwell3D : public Engine {
private:
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
struct {
u32 index;
std::vector<u32> arguments;
} macroInvocation{}; //!< Data for a macro that is pending execution
MacroInterpreter macroInterpreter;
void HandleSemaphoreCounterOperation();
void WriteSemaphoreResult(u64 result);
public:
static constexpr u32 RegisterCount{0xE00}; //!< The number of Maxwell 3D registers
/**
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def#L478
*/
#pragma pack(push, 1)
union Registers {
std::array<u32, RegisterCount> raw;
struct Address {
u32 high;
u32 low;
u64 Pack() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
enum class MmeShadowRamControl : u32 {
MethodTrack = 0,
MethodTrackWithFilter = 1,
MethodPassthrough = 2,
MethodReplay = 3,
};
struct ViewportTransform {
enum class Swizzle : u8 {
PositiveX = 0,
NegativeX = 1,
PositiveY = 2,
NegativeY = 3,
PositiveZ = 4,
NegativeZ = 5,
PositiveW = 6,
NegativeW = 7,
};
float scaleX;
float scaleY;
float scaleZ;
float translateX;
float translateY;
float translateZ;
struct {
Swizzle x : 3;
u8 _pad0_ : 1;
Swizzle y : 3;
u8 _pad1_ : 1;
Swizzle z : 3;
u8 _pad2_ : 1;
Swizzle w : 3;
u32 _pad3_ : 17;
} swizzles;
struct {
u8 x : 5;
u8 _pad0_ : 3;
u8 y : 5;
u32 _pad1_ : 19;
} subpixelPrecisionBias;
};
static_assert(sizeof(ViewportTransform) == (0x8 * sizeof(u32)));
struct Viewport {
struct {
u16 x;
u16 width;
};
struct {
u16 y;
u16 height;
};
float depthRangeNear;
float depthRangeFar;
};
static_assert(sizeof(Viewport) == (0x4 * sizeof(u32)));
enum class PolygonMode : u32 {
Point = 0x1B00,
Line = 0x1B01,
Fill = 0x1B02,
};
union VertexAttribute {
u32 raw;
enum class Size : u8 {
Size_1x32 = 0x12,
Size_2x32 = 0x04,
Size_3x32 = 0x02,
Size_4x32 = 0x01,
Size_1x16 = 0x1B,
Size_2x16 = 0x0F,
Size_3x16 = 0x05,
Size_4x16 = 0x03,
Size_1x8 = 0x1D,
Size_2x8 = 0x18,
Size_3x8 = 0x13,
Size_4x8 = 0x0A,
Size_10_10_10_2 = 0x30,
Size_11_11_10 = 0x31,
};
enum class Type : u8 {
None = 0,
SNorm = 1,
UNorm = 2,
SInt = 3,
UInt = 4,
UScaled = 5,
SScaled = 6,
Float = 7,
};
struct {
u8 bufferId : 5;
u8 _pad0_ : 1;
bool fixed : 1;
u16 offset : 14;
Size size : 6;
Type type : 3;
u8 _pad1_ : 1;
bool bgra : 1;
};
};
static_assert(sizeof(VertexAttribute) == sizeof(u32));
enum class CompareOp : u32 {
Never = 1,
Less = 2,
Equal = 3,
LessOrEqual = 4,
Greater = 5,
NotEqual = 6,
GreaterOrEqual = 7,
Always = 8,
NeverGL = 0x200,
LessGL = 0x201,
EqualGL = 0x202,
LessOrEqualGL = 0x203,
GreaterGL = 0x204,
NotEqualGL = 0x205,
GreaterOrEqualGL = 0x206,
AlwaysGL = 0x207,
};
struct Blend {
enum class Op : u32 {
Add = 1,
Subtract = 2,
ReverseSubtract = 3,
Minimum = 4,
Maximum = 5,
AddGL = 0x8006,
SubtractGL = 0x8007,
ReverseSubtractGL = 0x8008,
MinimumGL = 0x800A,
MaximumGL = 0x800B,
};
enum class Factor : u32 {
Zero = 0x1,
One = 0x2,
SourceColor = 0x3,
OneMinusSourceColor = 0x4,
SourceAlpha = 0x5,
OneMinusSourceAlpha = 0x6,
DestAlpha = 0x7,
OneMinusDestAlpha = 0x8,
DestColor = 0x9,
OneMinusDestColor = 0xA,
SourceAlphaSaturate = 0xB,
Source1Color = 0x10,
OneMinusSource1Color = 0x11,
Source1Alpha = 0x12,
OneMinusSource1Alpha = 0x13,
ConstantColor = 0x61,
OneMinusConstantColor = 0x62,
ConstantAlpha = 0x63,
OneMinusConstantAlpha = 0x64,
ZeroGL = 0x4000,
OneGL = 0x4001,
SourceColorGL = 0x4300,
OneMinusSourceColorGL = 0x4301,
SourceAlphaGL = 0x4302,
OneMinusSourceAlphaGL = 0x4303,
DestAlphaGL = 0x4304,
OneMinusDestAlphaGL = 0x4305,
DestColorGL = 0x4306,
OneMinusDestColorGL = 0x4307,
SourceAlphaSaturateGL = 0x4308,
ConstantColorGL = 0xC001,
OneMinusConstantColorGL = 0xC002,
ConstantAlphaGL = 0xC003,
OneMinusConstantAlphaGL = 0xC004,
Source1ColorGL = 0xC900,
OneMinusSource1ColorGL = 0xC901,
Source1AlphaGL = 0xC902,
OneMinusSource1AlphaGL = 0xC903,
};
struct {
u32 seperateAlpha;
Op colorOp;
Factor colorSrcFactor;
Factor colorDestFactor;
Op alphaOp;
Factor alphaSrcFactor;
Factor alphaDestFactor;
u32 _pad_;
};
};
static_assert(sizeof(Blend) == (sizeof(u32) * 8));
enum class StencilOp : u32 {
Keep = 1,
Zero = 2,
Replace = 3,
IncrementAndClamp = 4,
DecrementAndClamp = 5,
Invert = 6,
IncrementAndWrap = 7,
DecrementAndWrap = 8,
};
enum class FrontFace : u32 {
Clockwise = 0x900,
CounterClockwise = 0x901,
};
enum class CullFace : u32 {
Front = 0x404,
Back = 0x405,
FrontAndBack = 0x408,
};
union ColorWriteMask {
u32 raw;
struct {
u8 r : 4;
u8 g : 4;
u8 b : 4;
u8 a : 4;
};
};
static_assert(sizeof(ColorWriteMask) == sizeof(u32));
struct SemaphoreInfo {
enum class Op : u8 {
Release = 0,
Acquire = 1,
Counter = 2,
Trap = 3,
};
enum class ReductionOp : u8 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
};
enum class Unit : u8 {
VFetch = 1,
VP = 2,
Rast = 4,
StrmOut = 5,
GP = 6,
ZCull = 7,
Prop = 10,
Crop = 15,
};
enum class SyncCondition : u8 {
NotEqual = 0,
GreaterThan = 1,
};
enum class Format : u8 {
U32 = 0,
I32 = 1,
};
enum class CounterType : u8 {
Zero = 0x0,
InputVertices = 0x1,
InputPrimitives = 0x3,
VertexShaderInvocations = 0x5,
GeometryShaderInvocations = 0x7,
GeometryShaderPrimitives = 0x9,
ZcullStats0 = 0xA,
TransformFeedbackPrimitivesWritten = 0xB,
ZcullStats1 = 0xC,
ZcullStats2 = 0xE,
ClipperInputPrimitives = 0xF,
ZcullStats3 = 0x10,
ClipperOutputPrimitives = 0x11,
PrimitivesGenerated = 0x12,
FragmentShaderInvocations = 0x13,
SamplesPassed = 0x15,
TransformFeedbackOffset = 0x1A,
TessControlShaderInvocations = 0x1B,
TessEvaluationShaderInvocations = 0x1D,
TessEvaluationShaderPrimitives = 0x1F,
};
enum class StructureSize : u8 {
FourWords = 0,
OneWord = 1,
};
Op op : 2;
bool flushDisable : 1;
bool reductionEnable : 1;
bool fenceEnable : 1;
u8 _pad0_ : 4;
ReductionOp reductionOp : 3;
Unit unit : 4;
SyncCondition syncCondition : 1;
Format format : 2;
u8 _pad1_ : 1;
bool awakenEnable : 1;
u8 _pad2_ : 2;
CounterType counterType : 5;
StructureSize structureSize : 1;
};
static_assert(sizeof(SemaphoreInfo) == sizeof(u32));
enum class CoordOrigin : u8 {
LowerLeft = 0,
UpperLeft = 1,
};
struct {
u32 _pad0_[0x40]; // 0x0
u32 noOperation; // 0x40
u32 _pad1_[0x3]; // 0x41
u32 waitForIdle; // 0x44
struct {
u32 instructionRamPointer; // 0x45
u32 instructionRamLoad; // 0x46
u32 startAddressRamPointer; // 0x47
u32 startAddressRamLoad; // 0x48
MmeShadowRamControl shadowRamControl; // 0x49
} mme;
u32 _pad2_[0x68]; // 0x4A
struct {
u16 id : 12;
u8 _pad0_ : 4;
bool flushCache : 1;
u8 _pad1_ : 3;
bool increment : 1;
u16 _pad2_ : 11;
} syncpointAction; // 0xB2
u32 _pad3_[0x2C]; // 0xB3
u32 rasterizerEnable; // 0xDF
u32 _pad4_[0x1A0]; // 0xE0
std::array<ViewportTransform, 0x10> viewportTransform; // 0x280
std::array<Viewport, 0x10> viewport; // 0x300
u32 _pad5_[0x2B]; // 0x340
struct {
PolygonMode front; // 0x36B
PolygonMode back; // 0x36C
} polygonMode;
u32 _pad6_[0x68]; // 0x36D
struct {
u32 compareRef; // 0x3D5
u32 writeMask; // 0x3D6
u32 compareMask; // 0x3D7
} stencilBackExtra;
u32 _pad7_[0x13]; // 0x3D8
u32 rtSeparateFragData; // 0x3EB
u32 _pad8_[0x6C]; // 0x3EC
std::array<VertexAttribute, 0x20> vertexAttributeState; // 0x458
u32 _pad9_[0x4B]; // 0x478
CompareOp depthTestFunc; // 0x4C3
float alphaTestRef; // 0x4C4
CompareOp alphaTestFunc; // 0x4C5
u32 drawTFBStride; // 0x4C6
struct {
float r; // 0x4C7
float g; // 0x4C8
float b; // 0x4C9
float a; // 0x4CA
} blendConstant;
u32 _pad10_[0x4]; // 0x4CB
struct {
u32 seperateAlpha; // 0x4CF
Blend::Op colorOp; // 0x4D0
Blend::Factor colorSrcFactor; // 0x4D1
Blend::Factor colorDestFactor; // 0x4D2
Blend::Op alphaOp; // 0x4D3
Blend::Factor alphaSrcFactor; // 0x4D4
u32 _pad_; // 0x4D5
Blend::Factor alphaDestFactor; // 0x4D6
u32 enableCommon; // 0x4D7
std::array<u32, 8> enable; // 0x4D8 For each render target
} blend;
u32 stencilEnable; // 0x4E0
struct {
StencilOp failOp; // 0x4E1
StencilOp zFailOp; // 0x4E2
StencilOp zPassOp; // 0x4E3
struct {
CompareOp op; // 0x4E4
i32 ref; // 0x4E5
u32 mask; // 0x4E6
} compare;
u32 writeMask; // 0x4E7
} stencilFront;
u32 _pad11_[0x4]; // 0x4E8
float lineWidthSmooth; // 0x4EC
float lineWidthAliased; // 0x4D
u32 _pad12_[0x1F]; // 0x4EE
u32 drawBaseVertex; // 0x50D
u32 drawBaseInstance; // 0x50E
u32 _pad13_[0x35]; // 0x50F
u32 clipDistanceEnable; // 0x544
u32 sampleCounterEnable; // 0x545
float pointSpriteSize; // 0x546
u32 zCullStatCountersEnable; // 0x547
u32 pointSpriteEnable; // 0x548
u32 _pad14_; // 0x549
u32 shaderExceptions; // 0x54A
u32 _pad15_[0x2]; // 0x54B
u32 multisampleEnable; // 0x54D
u32 depthTargetEnable; // 0x54E
struct {
bool alphaToCoverage : 1;
u8 _pad0_ : 3;
bool alphaToOne : 1;
u32 _pad1_ : 27;
} multisampleControl; // 0x54F
u32 _pad16_[0x7]; // 0x550
struct {
Address address; // 0x557
u32 maximumIndex; // 0x559
} texSamplerPool;
u32 _pad17_; // 0x55A
u32 polygonOffsetFactor; // 0x55B
u32 lineSmoothEnable; // 0x55C
struct {
Address address; // 0x55D
u32 maximumIndex; // 0x55F
} texHeaderPool;
u32 _pad18_[0x5]; // 0x560
u32 stencilTwoSideEnable; // 0x565
struct {
StencilOp failOp; // 0x566
StencilOp zFailOp; // 0x567
StencilOp zPassOp; // 0x568
CompareOp compareOp; // 0x569
} stencilBack;
u32 _pad19_[0x17]; // 0x56A
struct {
u8 _unk_ : 2;
CoordOrigin origin : 1;
u16 enable : 10;
u32 _pad_ : 19;
} pointCoordReplace; // 0x581
u32 _pad20_[0xC4]; // 0x582
u32 cullFaceEnable; // 0x646
FrontFace frontFace; // 0x647
CullFace cullFace; // 0x648
u32 pixelCentreImage; // 0x649
u32 _pad21_; // 0x64A
u32 viewportTransformEnable; // 0x64B
u32 _pad22_[0x34]; // 0x64A
std::array<ColorWriteMask, 8> colorMask; // 0x680 For each render target
u32 _pad23_[0x38]; // 0x688
struct {
Address address; // 0x6C0
u32 payload; // 0x6C2
SemaphoreInfo info; // 0x6C3
} semaphore;
u32 _pad24_[0xBC]; // 0x6C4
std::array<Blend, 8> independentBlend; // 0x780 For each render target
u32 _pad25_[0x100]; // 0x7C0
u32 firmwareCall[0x20]; // 0x8C0
};
};
static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
#pragma pack(pop)
Registers registers{};
Registers shadowRegisters{}; //!< The shadow registers, their function is controlled by the 'shadowRamControl' register
std::array<u32, 0x10000> macroCode{}; //!< This stores GPU macros, the 256kb size is from Ryujinx
Maxwell3D(const DeviceState &state);
/**
* @brief Resets the Maxwell 3D registers to their default values
*/
void ResetRegs();
void CallMethod(MethodParams params) override;
};
}

View File

@ -4,30 +4,28 @@
#include <common/signal.h>
#include <loader/loader.h>
#include <kernel/types/KProcess.h>
#include <gpu.h>
#include <gpu/engines/maxwell_3d.h>
#include "gpfifo.h"
#include <soc.h>
namespace skyline::gpu::gpfifo {
namespace skyline::soc::gm20b {
void GPFIFO::Send(MethodParams params) {
state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
if (params.method == 0) {
switch (static_cast<EngineID>(params.argument)) {
case EngineID::Fermi2D:
subchannels.at(params.subChannel) = state.gpu->fermi2D;
subchannels.at(params.subChannel) = &state.soc->gm20b.fermi2D;
break;
case EngineID::KeplerMemory:
subchannels.at(params.subChannel) = state.gpu->keplerMemory;
subchannels.at(params.subChannel) = &state.soc->gm20b.keplerMemory;
break;
case EngineID::Maxwell3D:
subchannels.at(params.subChannel) = state.gpu->maxwell3D;
subchannels.at(params.subChannel) = &state.soc->gm20b.maxwell3D;
break;
case EngineID::MaxwellCompute:
subchannels.at(params.subChannel) = state.gpu->maxwellCompute;
subchannels.at(params.subChannel) = &state.soc->gm20b.maxwellCompute;
break;
case EngineID::MaxwellDma:
subchannels.at(params.subChannel) = state.gpu->maxwellDma;
subchannels.at(params.subChannel) = &state.soc->gm20b.maxwellDma;
break;
default:
throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel);
@ -35,7 +33,7 @@ namespace skyline::gpu::gpfifo {
state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel);
return;
} else if (params.method < constant::GpfifoRegisterCount) {
} else if (params.method < engine::GPFIFO::RegisterCount) {
gpfifoEngine.CallMethod(params);
} else {
if (subchannels.at(params.subChannel) == nullptr)
@ -58,7 +56,7 @@ namespace skyline::gpu::gpfifo {
}
pushBufferData.resize(gpEntry.size);
state.gpu->memoryManager.Read<u32>(pushBufferData, gpEntry.Address());
state.soc->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it
@ -66,28 +64,29 @@ namespace skyline::gpu::gpfifo {
continue;
PushBufferMethodHeader methodHeader{.raw = *entry};
switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod:
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
break;
case PushBufferMethodHeader::SecOp::NonIncMethod:
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
break;
case PushBufferMethodHeader::SecOp::OneInc:
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + static_cast<bool>(i)), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
break;
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(MethodParams{methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true});
break;
case PushBufferMethodHeader::SecOp::EndPbSegment:
return;
default:
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
break;

View File

@ -0,0 +1,168 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common/circular_queue.h>
#include "engines/gpfifo.h"
namespace skyline::soc::gm20b {
/**
* @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
*/
struct GpEntry {
enum class Fetch : u8 {
Unconditional = 0,
Conditional = 1,
};
union {
u32 entry0;
struct {
Fetch fetch : 1;
u8 _pad_ : 1;
u32 get : 30;
};
};
enum class Opcode : u8 {
Nop = 0,
Illegal = 1,
Crc = 2,
PbCrc = 3,
};
enum class Priv : u8 {
User = 0,
Kernel = 1,
};
enum class Level : u8 {
Main = 0,
Subroutine = 1,
};
enum class Sync : u8 {
Proceed = 0,
Wait = 1,
};
union {
u32 entry1;
struct {
union {
u8 getHi;
Opcode opcode;
};
Priv priv : 1;
Level level : 1;
u32 size : 21;
Sync sync : 1;
};
};
constexpr u64 Address() const {
return (static_cast<u64>(getHi) << 32) | (static_cast<u64>(get) << 2);
}
};
static_assert(sizeof(GpEntry) == sizeof(u64));
/**
* @brief A single pushbuffer method header that describes a compressed method sequence
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
*/
union PushBufferMethodHeader {
u32 raw;
enum class TertOp : u8 {
Grp0IncMethod = 0,
Grp0SetSubDevMask = 1,
Grp0StoreSubDevMask = 2,
Grp0UseSubDevMask = 3,
Grp2NonIncMethod = 0,
};
enum class SecOp : u8 {
Grp0UseTert = 0,
IncMethod = 1,
Grp2UseTert = 2,
NonIncMethod = 3,
ImmdDataMethod = 4,
OneInc = 5,
Reserved6 = 6,
EndPbSegment = 7,
};
u16 methodAddress : 12;
struct {
u8 _pad0_ : 4;
u16 subDeviceMask : 12;
};
struct {
u16 _pad1_ : 13;
u8 methodSubChannel : 3;
union {
TertOp tertOp : 3;
u16 methodCount : 13;
u16 immdData : 13;
};
};
struct {
u32 _pad2_ : 29;
SecOp secOp : 3;
};
};
static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
/**
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
* @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/
class GPFIFO {
const DeviceState &state;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
std::array<engine::Engine*, 8> subchannels;
std::optional<CircularQueue<GpEntry>> pushBuffers;
std::thread thread; //!< The thread that manages processing of pushbuffers
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
/**
* @brief Sends a method call to the GPU hardware
*/
void Send(MethodParams params);
/**
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
*/
void Process(GpEntry gpEntry);
public:
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
~GPFIFO();
/**
* @param numBuffers The amount of push-buffers to allocate in the circular buffer
*/
void Initialize(size_t numBuffers);
/**
* @brief Executes all pending entries in the FIFO
*/
void Run();
/**
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
*/
void Push(span<GpEntry> entries);
};
}

View File

@ -2,10 +2,12 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <kernel/types/KProcess.h>
#include "memory_manager.h"
#include "gmmu.h"
namespace skyline::gpu::vmm {
MemoryManager::MemoryManager(const DeviceState &state) : state(state) {
namespace skyline::soc::gmmu {
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) {
constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space
constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero
@ -14,9 +16,9 @@ namespace skyline::gpu::vmm {
chunks.push_back(baseChunk);
}
std::optional<ChunkDescriptor> MemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
std::optional<ChunkDescriptor> GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool {
return (alignment ? util::IsAligned(chunk.virtAddr, alignment) : true) && chunk.size > size && chunk.state == desiredState;
return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState;
})};
if (chunk != chunks.end())
@ -25,12 +27,12 @@ namespace skyline::gpu::vmm {
return std::nullopt;
}
u64 MemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
auto chunkEnd{chunks.end()};
for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) {
if (chunk->CanContain(newChunk)) {
auto oldChunk{*chunk};
u64 newSize{newChunk.virtAddr - chunk->virtAddr};
u64 newSize{newChunk.virtualAddress - chunk->virtualAddress};
u64 extension{chunk->size - newSize - newChunk.size};
if (newSize == 0) {
@ -41,16 +43,16 @@ namespace skyline::gpu::vmm {
}
if (extension)
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtAddr + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
return newChunk.virtAddr;
} else if (chunk->virtAddr + chunk->size > newChunk.virtAddr) {
chunk->size = newChunk.virtAddr - chunk->virtAddr;
return newChunk.virtualAddress;
} else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) {
chunk->size = newChunk.virtualAddress - chunk->virtualAddress;
// Deletes all chunks that are within the chunk being inserted and split the final one
auto tailChunk{std::next(chunk)};
while (tailChunk != chunkEnd) {
if (tailChunk->virtAddr + tailChunk->size >= newChunk.virtAddr + newChunk.size)
if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size)
break;
tailChunk = chunks.erase(tailChunk);
@ -61,8 +63,8 @@ namespace skyline::gpu::vmm {
if (tailChunk == chunkEnd)
break;
u64 chunkSliceOffset{newChunk.virtAddr + newChunk.size - tailChunk->virtAddr};
tailChunk->virtAddr += chunkSliceOffset;
u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress};
tailChunk->virtualAddress += chunkSliceOffset;
tailChunk->size -= chunkSliceOffset;
if (tailChunk->state == ChunkState::Mapped)
tailChunk->cpuPtr += chunkSliceOffset;
@ -74,19 +76,19 @@ namespace skyline::gpu::vmm {
else
chunks.insert(std::next(headChunk), newChunk);
return newChunk.virtAddr;
return newChunk.virtualAddress;
}
}
throw exception("Failed to insert chunk into GPU address space!");
}
u64 MemoryManager::ReserveSpace(u64 size, u64 alignment) {
size = util::AlignUp(size, constant::GpuPageSize);
u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) {
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(vmmMutex);
std::unique_lock lock(mutex);
auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)};
if (!newChunk)
if (!newChunk) [[unlikely]]
return 0;
auto chunk{*newChunk};
@ -96,22 +98,22 @@ namespace skyline::gpu::vmm {
return InsertChunk(chunk);
}
u64 MemoryManager::ReserveFixed(u64 virtAddr, u64 size) {
if (!util::IsAligned(virtAddr, constant::GpuPageSize))
u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
size = util::AlignUp(size, constant::GpuPageSize);
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(vmmMutex);
return InsertChunk(ChunkDescriptor(virtAddr, size, nullptr, ChunkState::Reserved));
std::unique_lock lock(mutex);
return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved));
}
u64 MemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
size = util::AlignUp(size, constant::GpuPageSize);
u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(vmmMutex);
std::unique_lock lock(mutex);
auto mappedChunk{FindChunk(ChunkState::Unmapped, size)};
if (!mappedChunk)
if (!mappedChunk) [[unlikely]]
return 0;
auto chunk{*mappedChunk};
@ -122,23 +124,23 @@ namespace skyline::gpu::vmm {
return InsertChunk(chunk);
}
u64 MemoryManager::MapFixed(u64 virtAddr, u8 *cpuPtr, u64 size) {
if (!util::IsAligned(virtAddr, constant::GpuPageSize))
u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return 0;
size = util::AlignUp(size, constant::GpuPageSize);
size = util::AlignUp(size, GpuPageSize);
std::unique_lock lock(vmmMutex);
return InsertChunk(ChunkDescriptor(virtAddr, size, cpuPtr, ChunkState::Mapped));
std::unique_lock lock(mutex);
return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped));
}
bool MemoryManager::Unmap(u64 virtAddr, u64 size) {
if (!util::IsAligned(virtAddr, constant::GpuPageSize))
bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) {
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
return false;
try {
std::unique_lock lock(vmmMutex);
InsertChunk(ChunkDescriptor(virtAddr, size, nullptr, ChunkState::Unmapped));
std::unique_lock lock(mutex);
InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped));
} catch (const std::exception &e) {
return false;
}
@ -146,20 +148,20 @@ namespace skyline::gpu::vmm {
return true;
}
void MemoryManager::Read(u8 *destination, u64 virtAddr, u64 size) {
std::shared_lock lock(vmmMutex);
void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) {
std::shared_lock lock(mutex);
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtAddr, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtAddr;
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtualAddress;
})};
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
chunk--;
u64 initialSize{size};
u64 chunkOffset{virtAddr - chunk->virtAddr};
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *source{chunk->cpuPtr + chunkOffset};
u64 sourceSize{std::min(chunk->size - chunkOffset, size)};
@ -170,7 +172,7 @@ namespace skyline::gpu::vmm {
size -= sourceSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
source = chunk->cpuPtr;
sourceSize = std::min(chunk->size, size);
@ -178,20 +180,20 @@ namespace skyline::gpu::vmm {
}
}
void MemoryManager::Write(u8 *source, u64 virtAddr, u64 size) {
std::shared_lock lock(vmmMutex);
void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) {
std::shared_lock lock(mutex);
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtAddr, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtAddr;
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
return address < chunk.virtualAddress;
})};
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
chunk--;
u64 initialSize{size};
u64 chunkOffset{virtAddr - chunk->virtAddr};
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
u8 *destination{chunk->cpuPtr + chunkOffset};
u64 destinationSize{std::min(chunk->size - chunkOffset, size)};
@ -202,7 +204,7 @@ namespace skyline::gpu::vmm {
size -= destinationSize;
if (size) {
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtAddr, size);
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
destination = chunk->cpuPtr;
destinationSize = std::min(chunk->size, size);

View File

@ -0,0 +1,140 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::gmmu {
enum class ChunkState {
Unmapped, //!< The chunk is unmapped
Reserved, //!< The chunk is reserved
Mapped //!< The chunk is mapped and a CPU side address is present
};
struct ChunkDescriptor {
u64 virtualAddress; //!< The address of the chunk in the virtual address space
u64 size; //!< The size of the chunk in bytes
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
ChunkState state;
ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {}
/**
* @return If the given chunk can be contained wholly within this chunk
*/
inline bool CanContain(const ChunkDescriptor &chunk) {
return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress));
}
};
/**
* @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment
*/
class GraphicsMemoryManager {
private:
const DeviceState &state;
std::vector<ChunkDescriptor> chunks;
std::shared_mutex mutex;
/**
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
* @note vmmMutex MUST be locked when calling this
* @param desiredState The state of the chunk to find
* @param size The minimum size of the chunk to find
* @param alignment The minimum alignment of the chunk to find
* @return The first applicable chunk
*/
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
/**
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
* @note vmmMutex MUST be locked when calling this
* @param newChunk The chunk to insert
* @return The base virtual address of the inserted chunk
*/
u64 InsertChunk(const ChunkDescriptor &newChunk);
public:
GraphicsMemoryManager(const DeviceState &state);
/**
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
* @param size The size of the region to reserve
* @param alignment The alignment of the region to reserve
* @return The base virtual address of the reserved region
*/
u64 ReserveSpace(u64 size, u64 alignment);
/**
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
* @param virtualAddress The virtual base address of the region to allocate
* @param size The size of the region to allocate
* @return The base virtual address of the reserved region
*/
u64 ReserveFixed(u64 virtualAddress, u64 size);
/**
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
*/
u64 MapAllocate(u8 *cpuPtr, u64 size);
/**
* @brief Maps a CPU memory region to a fixed region in the virtual address space
* @param virtualAddress The target virtual address of the region
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
* @param size The size of the region to map
* @return The base virtual address of the mapped region
*/
u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size);
/**
* @brief Unmaps all chunks in the given region from the virtual address space
* @return Whether the operation succeeded
*/
bool Unmap(u64 virtualAddress, u64 size);
void Read(u8 *destination, u64 virtualAddress, u64 size);
/**
* @brief Reads in a span from a region of the virtual address space
*/
template<typename T>
void Read(span <T> destination, u64 virtualAddress) {
Read(reinterpret_cast<u8 *>(destination.data()), virtualAddress, destination.size_bytes());
}
/**
* @brief Reads in an object from a region of the virtual address space
* @tparam T The type of object to return
*/
template<typename T>
T Read(u64 virtualAddress) {
T obj;
Read(reinterpret_cast<u8 *>(&obj), virtualAddress, sizeof(T));
return obj;
}
void Write(u8 *source, u64 virtualAddress, u64 size);
/**
* @brief Writes out a span to a region of the virtual address space
*/
template<typename T>
void Write(span <T> source, u64 virtualAddress) {
Write(reinterpret_cast<u8 *>(source.data()), virtualAddress, source.size_bytes());
}
/**
* @brief Reads in an object from a region of the virtual address space
*/
template<typename T>
void Write(T source, u64 virtualAddress) {
Write(reinterpret_cast<u8 *>(&source), virtualAddress, sizeof(T));
}
};
}

View File

@ -0,0 +1,17 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "host1x/syncpoint.h"
namespace skyline::soc::host1x {
/**
* @brief An abstraction for the graphics host, this handles DMA on behalf of the CPU when communicating to it's clients alongside handling syncpts
* @note This is different from the GM20B Host, it serves a similar function and has an interface for accessing Host1X syncpts
*/
class Host1X {
public:
std::array<Syncpoint, SyncpointCount> syncpoints{};
};
}

View File

@ -3,7 +3,7 @@
#include "syncpoint.h"
namespace skyline::gpu {
namespace skyline::soc::host1x {
u64 Syncpoint::RegisterWaiter(u32 threshold, const std::function<void()> &callback) {
if (value >= threshold) {
callback();

View File

@ -0,0 +1,52 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::host1x {
constexpr size_t SyncpointCount{192}; //!< The number of host1x syncpoints on T210
/**
* @brief The Syncpoint class represents a single syncpoint in the GPU which is used for GPU -> CPU synchronisation
*/
class Syncpoint {
private:
struct Waiter {
u32 threshold; //!< The syncpoint value to wait on to be reached
std::function<void()> callback; //!< The callback to do after the wait has ended
};
std::mutex waiterLock; //!< Synchronizes insertions and deletions of waiters
std::map<u64, Waiter> waiterMap;
u64 nextWaiterId{1};
public:
std::atomic<u32> value{};
/**
* @brief Registers a new waiter with a callback that will be called when the syncpoint reaches the target threshold
* @note The callback will be called immediately if the syncpoint has already reached the given threshold
* @return A persistent identifier that can be used to refer to the waiter, or 0 if the threshold has already been reached
*/
u64 RegisterWaiter(u32 threshold, const std::function<void()> &callback);
/**
* @brief Removes a waiter given by 'id' from the pending waiter map
*/
void DeregisterWaiter(u64 id);
/**
* @brief Increments the syncpoint by 1
* @return The new value of the syncpoint
*/
u32 Increment();
/**
* @brief Waits for the syncpoint to reach given threshold
* @return false if the timeout was reached, otherwise true
*/
bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
};
}