Update for new hades

This commit is contained in:
Billy Laws 2022-11-24 19:03:06 +00:00
parent a16383fd4b
commit bba07fb101
10 changed files with 76 additions and 62 deletions

View File

@ -33,7 +33,7 @@ namespace skyline::dirty {
* @tparam OverlapPoolSize Size of the pool used to store handles when there are multiple bound to the same subresource * @tparam OverlapPoolSize Size of the pool used to store handles when there are multiple bound to the same subresource
* @note This class is *NOT* thread-safe * @note This class is *NOT* thread-safe
*/ */
template<size_t ManagedResourceSize, size_t Granularity, size_t OverlapPoolSize = 0x1000> template<size_t ManagedResourceSize, size_t Granularity, size_t OverlapPoolSize = 0x2000>
class Manager { class Manager {
private: private:
struct BindingState { struct BindingState {

View File

@ -218,7 +218,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
viewport.offsetX, viewport.offsetY, viewport.scaleX, viewport.scaleY, viewport.swizzle, viewport.offsetX, viewport.offsetY, viewport.scaleX, viewport.scaleY, viewport.swizzle,
viewportClip, viewportClip,
windowOrigin, windowOrigin,
viewportScaleOffsetEnable); viewportScaleOffsetEnable,
surfaceClip);
} }
ViewportState::ViewportState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {} ViewportState::ViewportState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine, u32 index) : engine{manager, dirtyHandle, engine}, index{index} {}
@ -258,15 +259,21 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (index != 0 && !ctx.gpu.traits.supportsMultipleViewports) if (index != 0 && !ctx.gpu.traits.supportsMultipleViewports)
return; return;
if (!engine->viewportScaleOffsetEnable) if (!engine->viewportScaleOffsetEnable) {
// https://github.com/Ryujinx/Ryujinx/pull/3328 builder.SetViewport(index, vk::Viewport{
Logger::Warn("Viewport scale/offset disable is unimplemented"); .x = static_cast<float>(engine->surfaceClip.horizontal.x),
.y = static_cast<float>(engine->surfaceClip.vertical.y),
if (engine->viewport.scaleX == 0.0f || engine->viewport.scaleY == 0.0f) .width = engine->surfaceClip.horizontal.width ? static_cast<float>(engine->surfaceClip.horizontal.width) : 1.0f,
.height = engine->surfaceClip.vertical.height ? static_cast<float>(engine->surfaceClip.vertical.height) : 1.0f,
.minDepth = 0.0f,
.maxDepth = 1.0f,
});
} else if (engine->viewport.scaleX == 0.0f || engine->viewport.scaleY == 0.0f) {
builder.SetViewport(index, ConvertViewport(engine->viewport0, engine->viewportClip0, engine->windowOrigin, engine->viewportScaleOffsetEnable)); builder.SetViewport(index, ConvertViewport(engine->viewport0, engine->viewportClip0, engine->windowOrigin, engine->viewportScaleOffsetEnable));
else } else {
builder.SetViewport(index, ConvertViewport(engine->viewport, engine->viewportClip, engine->windowOrigin, engine->viewportScaleOffsetEnable)); builder.SetViewport(index, ConvertViewport(engine->viewport, engine->viewportClip, engine->windowOrigin, engine->viewportScaleOffsetEnable));
} }
}
/* Scissor */ /* Scissor */
void ScissorState::EngineRegisters::DirtyBind(DirtyManager &manager, dirty::Handle handle) const { void ScissorState::EngineRegisters::DirtyBind(DirtyManager &manager, dirty::Handle handle) const {

View File

@ -92,6 +92,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
const engine::ViewportClip &viewportClip; const engine::ViewportClip &viewportClip;
const engine::WindowOrigin &windowOrigin; const engine::WindowOrigin &windowOrigin;
const u32 &viewportScaleOffsetEnable; const u32 &viewportScaleOffsetEnable;
const engine::SurfaceClip &surfaceClip;
void DirtyBind(DirtyManager &manager, dirty::Handle handle) const; void DirtyBind(DirtyManager &manager, dirty::Handle handle) const;
}; };

View File

@ -59,6 +59,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
bool alphaTestEnable : 1; bool alphaTestEnable : 1;
bool depthClampEnable : 1; // Use SetDepthClampEnable bool depthClampEnable : 1; // Use SetDepthClampEnable
bool dynamicStateActive : 1; bool dynamicStateActive : 1;
bool viewportTransformEnable : 1;
}; };
u32 patchSize; u32 patchSize;

View File

@ -192,6 +192,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
ConvertCompilerShaderStage(static_cast<PipelineStage>(i)), ConvertCompilerShaderStage(static_cast<PipelineStage>(i)),
shaderBinaries[i].binary, shaderBinaries[i].baseOffset, shaderBinaries[i].binary, shaderBinaries[i].baseOffset,
packedState.bindlessTextureConstantBufferSlotSelect, packedState.bindlessTextureConstantBufferSlotSelect,
packedState.viewportTransformEnable,
[&](u32 index, u32 offset) { [&](u32 index, u32 offset) {
size_t shaderStage{i > 0 ? (i - 1) : 0}; size_t shaderStage{i > 0 ? (i - 1) : 0};
return constantBuffers[shaderStage][index].Read<int>(ctx.executor, offset); return constantBuffers[shaderStage][index].Read<int>(ctx.executor, offset);

View File

@ -444,7 +444,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
/* Global Shader Config State */ /* Global Shader Config State */
void GlobalShaderConfigState::EngineRegisters::DirtyBind(DirtyManager &manager, dirty::Handle handle) const { void GlobalShaderConfigState::EngineRegisters::DirtyBind(DirtyManager &manager, dirty::Handle handle) const {
manager.Bind(handle, postVtgShaderAttributeSkipMask, bindlessTexture, apiMandatedEarlyZ); manager.Bind(handle, postVtgShaderAttributeSkipMask, bindlessTexture, apiMandatedEarlyZ, viewportScaleOffsetEnable);
} }
GlobalShaderConfigState::GlobalShaderConfigState(const EngineRegisters &engine) : engine{engine} {} GlobalShaderConfigState::GlobalShaderConfigState(const EngineRegisters &engine) : engine{engine} {}
@ -453,6 +453,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
packedState.postVtgShaderAttributeSkipMask = engine.postVtgShaderAttributeSkipMask; packedState.postVtgShaderAttributeSkipMask = engine.postVtgShaderAttributeSkipMask;
packedState.bindlessTextureConstantBufferSlotSelect = engine.bindlessTexture.constantBufferSlotSelect; packedState.bindlessTextureConstantBufferSlotSelect = engine.bindlessTexture.constantBufferSlotSelect;
packedState.apiMandatedEarlyZ = engine.apiMandatedEarlyZ; packedState.apiMandatedEarlyZ = engine.apiMandatedEarlyZ;
packedState.viewportTransformEnable = engine.viewportScaleOffsetEnable;
} }
/* Pipeline State */ /* Pipeline State */

View File

@ -258,6 +258,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
const std::array<u32, 8> &postVtgShaderAttributeSkipMask; const std::array<u32, 8> &postVtgShaderAttributeSkipMask;
const engine_common::BindlessTexture &bindlessTexture; const engine_common::BindlessTexture &bindlessTexture;
const u32 &apiMandatedEarlyZ; const u32 &apiMandatedEarlyZ;
const u32 &viewportScaleOffsetEnable;
void DirtyBind(DirtyManager &manager, dirty::Handle handle) const; void DirtyBind(DirtyManager &manager, dirty::Handle handle) const;
}; };

View File

@ -32,6 +32,7 @@ namespace skyline::gpu {
.support_float16 = traits.supportsFloat16, .support_float16 = traits.supportsFloat16,
.support_int64 = traits.supportsInt64, .support_int64 = traits.supportsInt64,
.needs_demote_reorder = false, .needs_demote_reorder = false,
.support_snorm_render_buffer = true
}; };
constexpr u32 TegraX1WarpSize{32}; //!< The amount of threads in a warp on the Tegra X1 constexpr u32 TegraX1WarpSize{32}; //!< The amount of threads in a warp on the Tegra X1
@ -90,14 +91,21 @@ namespace skyline::gpu {
span<u8> binary; span<u8> binary;
u32 baseOffset; u32 baseOffset;
u32 textureBufferIndex; u32 textureBufferIndex;
bool viewportTransformEnabled;
ShaderManager::ConstantBufferRead constantBufferRead; ShaderManager::ConstantBufferRead constantBufferRead;
ShaderManager::GetTextureType getTextureType; ShaderManager::GetTextureType getTextureType;
public: public:
std::vector<ShaderManager::ConstantBufferWord> constantBufferWords; GraphicsEnvironment(const std::array<u32, 8> &postVtgShaderAttributeSkipMask,
std::vector<ShaderManager::CachedTextureType> textureTypes; Shader::Stage pStage,
span<u8> pBinary, u32 baseOffset,
GraphicsEnvironment(const std::array<u32, 8> &postVtgShaderAttributeSkipMask, Shader::Stage pStage, span<u8> pBinary, u32 baseOffset, u32 textureBufferIndex, ShaderManager::ConstantBufferRead constantBufferRead, ShaderManager::GetTextureType getTextureType) : binary{pBinary}, baseOffset{baseOffset}, textureBufferIndex{textureBufferIndex}, constantBufferRead{std::move(constantBufferRead)}, getTextureType{std::move(getTextureType)} { u32 textureBufferIndex,
bool viewportTransformEnabled,
ShaderManager::ConstantBufferRead constantBufferRead, ShaderManager::GetTextureType getTextureType)
: binary{pBinary}, baseOffset{baseOffset},
textureBufferIndex{textureBufferIndex},
viewportTransformEnabled{viewportTransformEnabled},
constantBufferRead{std::move(constantBufferRead)}, getTextureType{std::move(getTextureType)} {
gp_passthrough_mask = postVtgShaderAttributeSkipMask; gp_passthrough_mask = postVtgShaderAttributeSkipMask;
stage = pStage; stage = pStage;
sph = *reinterpret_cast<Shader::ProgramHeader *>(binary.data()); sph = *reinterpret_cast<Shader::ProgramHeader *>(binary.data());
@ -112,15 +120,19 @@ namespace skyline::gpu {
} }
[[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final { [[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final {
auto value{constantBufferRead(index, offset)}; return constantBufferRead(index, offset);
constantBufferWords.emplace_back(index, offset, value); }
return value;
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) final {
throw exception("ReadTexturePixelFormat not implemented");
} }
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final { [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final {
auto type{getTextureType(handle)}; return getTextureType(handle);
textureTypes.emplace_back(handle, type); }
return type;
[[nodiscard]] u32 ReadViewportTransformState() final {
return viewportTransformEnabled ? 1 : 0; // Only relevant for graphics shaders
} }
[[nodiscard]] u32 TextureBoundBuffer() const final { [[nodiscard]] u32 TextureBoundBuffer() const final {
@ -156,11 +168,7 @@ namespace skyline::gpu {
ShaderManager::ConstantBufferRead constantBufferRead; ShaderManager::ConstantBufferRead constantBufferRead;
ShaderManager::GetTextureType getTextureType; ShaderManager::GetTextureType getTextureType;
public: public:
std::vector<ShaderManager::ConstantBufferWord> constantBufferWords;
std::vector<ShaderManager::CachedTextureType> textureTypes;
ComputeEnvironment(span<u8> pBinary, ComputeEnvironment(span<u8> pBinary,
u32 baseOffset, u32 baseOffset,
u32 textureBufferIndex, u32 textureBufferIndex,
@ -187,15 +195,19 @@ namespace skyline::gpu {
} }
[[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final { [[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final {
auto value{constantBufferRead(index, offset)}; return constantBufferRead(index, offset);
constantBufferWords.emplace_back(index, offset, value); }
return value;
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) final {
throw exception("ReadTexturePixelFormat not implemented");
} }
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final { [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final {
auto type{getTextureType(handle)}; return getTextureType(handle);
textureTypes.emplace_back(handle, type); }
return type;
[[nodiscard]] u32 ReadViewportTransformState() final {
return 0; // Only relevant for graphics shaders
} }
[[nodiscard]] u32 TextureBoundBuffer() const final { [[nodiscard]] u32 TextureBoundBuffer() const final {
@ -240,6 +252,14 @@ namespace skyline::gpu {
throw exception("Not implemented"); throw exception("Not implemented");
} }
[[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) final {
throw exception("Not implemented");
}
[[nodiscard]] u32 ReadViewportTransformState() final {
throw exception("Not implemented");
}
[[nodiscard]] u32 TextureBoundBuffer() const final { [[nodiscard]] u32 TextureBoundBuffer() const final {
throw exception("Not implemented"); throw exception("Not implemented");
} }
@ -259,14 +279,15 @@ namespace skyline::gpu {
void Dump(u64 hash) final {} void Dump(u64 hash) final {}
}; };
constexpr ShaderManager::ConstantBufferWord::ConstantBufferWord(u32 index, u32 offset, u32 value) : index(index), offset(offset), value(value) {} Shader::IR::Program ShaderManager::ParseGraphicsShader(const std::array<u32, 8> &postVtgShaderAttributeSkipMask,
Shader::Stage stage,
constexpr ShaderManager::CachedTextureType::CachedTextureType(u32 handle, Shader::TextureType type) : handle(handle), type(type) {} span<u8> binary, u32 baseOffset,
u32 textureConstantBufferIndex,
Shader::IR::Program ShaderManager::ParseGraphicsShader(const std::array<u32, 8> &postVtgShaderAttributeSkipMask, Shader::Stage stage, span<u8> binary, u32 baseOffset, u32 textureConstantBufferIndex, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) { bool viewportTransformEnabled,
const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) {
std::scoped_lock lock{poolMutex}; std::scoped_lock lock{poolMutex};
GraphicsEnvironment environment{postVtgShaderAttributeSkipMask, stage, binary, baseOffset, textureConstantBufferIndex, constantBufferRead, getTextureType}; GraphicsEnvironment environment{postVtgShaderAttributeSkipMask, stage, binary, baseOffset, textureConstantBufferIndex, viewportTransformEnabled, constantBufferRead, getTextureType};
Shader::Maxwell::Flow::CFG cfg{environment, flowBlockPool, Shader::Maxwell::Location{static_cast<u32>(baseOffset + sizeof(Shader::ProgramHeader))}}; Shader::Maxwell::Flow::CFG cfg{environment, flowBlockPool, Shader::Maxwell::Location{static_cast<u32>(baseOffset + sizeof(Shader::ProgramHeader))}};
return Shader::Maxwell::TranslateProgram(instructionPool, blockPool, environment, cfg, hostTranslateInfo); return Shader::Maxwell::TranslateProgram(instructionPool, blockPool, environment, cfg, hostTranslateInfo);
} }
@ -278,7 +299,11 @@ namespace skyline::gpu {
return Shader::Maxwell::MergeDualVertexPrograms(vertexA, vertexB, env); return Shader::Maxwell::MergeDualVertexPrograms(vertexA, vertexB, env);
} }
Shader::IR::Program ShaderManager::ParseComputeShader(span<u8> binary, u32 baseOffset, u32 textureConstantBufferIndex, u32 localMemorySize, u32 sharedMemorySize, std::array<u32, 3> workgroupDimensions, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) { Shader::IR::Program ShaderManager::ParseComputeShader(span<u8> binary, u32 baseOffset,
u32 textureConstantBufferIndex,
u32 localMemorySize, u32 sharedMemorySize,
std::array<u32, 3> workgroupDimensions,
const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) {
std::scoped_lock lock{poolMutex}; std::scoped_lock lock{poolMutex};
ComputeEnvironment environment{binary, baseOffset, textureConstantBufferIndex, localMemorySize, sharedMemorySize, workgroupDimensions, constantBufferRead, getTextureType}; ComputeEnvironment environment{binary, baseOffset, textureConstantBufferIndex, localMemorySize, sharedMemorySize, workgroupDimensions, constantBufferRead, getTextureType};

View File

@ -31,37 +31,14 @@ namespace skyline::gpu {
public: public:
using ConstantBufferRead = std::function<u32(u32 index, u32 offset)>; //!< A function which reads a constant buffer at the specified offset and returns the value using ConstantBufferRead = std::function<u32(u32 index, u32 offset)>; //!< A function which reads a constant buffer at the specified offset and returns the value
/**
* @brief A single u32 word from a constant buffer with the offset it was read from, utilized to ensure constant buffer state is consistent
*/
struct ConstantBufferWord {
u32 index; //!< The index of the constant buffer
u32 offset; //!< The offset of the constant buffer word
u32 value; //!< The contents of the word
constexpr ConstantBufferWord(u32 index, u32 offset, u32 value);
constexpr bool operator==(const ConstantBufferWord &other) const = default;
};
using GetTextureType = std::function<Shader::TextureType(u32 handle)>; //!< A function which determines the type of a texture from its handle by checking the corresponding TIC using GetTextureType = std::function<Shader::TextureType(u32 handle)>; //!< A function which determines the type of a texture from its handle by checking the corresponding TIC
struct CachedTextureType {
u32 handle;
Shader::TextureType type;
constexpr CachedTextureType(u32 handle, Shader::TextureType type);
constexpr bool operator==(const CachedTextureType &other) const = default;
};
ShaderManager(const DeviceState &state, GPU &gpu); ShaderManager(const DeviceState &state, GPU &gpu);
/** /**
* @return A shader program that corresponds to all the supplied state including the current state of the constant buffers * @return A shader program that corresponds to all the supplied state including the current state of the constant buffers
*/ */
Shader::IR::Program ParseGraphicsShader(const std::array<u32, 8> &postVtgShaderAttributeSkipMask, Shader::Stage stage, span<u8> binary, u32 baseOffset, u32 textureConstantBufferIndex, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType); Shader::IR::Program ParseGraphicsShader(const std::array<u32, 8> &postVtgShaderAttributeSkipMask, Shader::Stage stage, span<u8> binary, u32 baseOffset, u32 textureConstantBufferIndex, bool viewportTransformEnabled, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType);
/** /**
* @brief Combines the VertexA and VertexB shader programs into a single program * @brief Combines the VertexA and VertexB shader programs into a single program

View File

@ -24,7 +24,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
.depthStencilRegisters = {*registers.depthTestEnable, *registers.depthWriteEnable, *registers.depthFunc, *registers.depthBoundsTestEnable, *registers.stencilTestEnable, *registers.twoSidedStencilTestEnable, *registers.stencilOps, *registers.stencilBack, *registers.alphaTestEnable, *registers.alphaFunc, *registers.alphaRef}, .depthStencilRegisters = {*registers.depthTestEnable, *registers.depthWriteEnable, *registers.depthFunc, *registers.depthBoundsTestEnable, *registers.stencilTestEnable, *registers.twoSidedStencilTestEnable, *registers.stencilOps, *registers.stencilBack, *registers.alphaTestEnable, *registers.alphaFunc, *registers.alphaRef},
.colorBlendRegisters = {*registers.logicOp, *registers.singleCtWriteControl, *registers.ctWrites, *registers.blendStatePerTargetEnable, *registers.blendPerTargets, *registers.blend}, .colorBlendRegisters = {*registers.logicOp, *registers.singleCtWriteControl, *registers.ctWrites, *registers.blendStatePerTargetEnable, *registers.blendPerTargets, *registers.blend},
.transformFeedbackRegisters = {*registers.streamOutputEnable, *registers.streamOutControls, *registers.streamOutLayoutSelect}, .transformFeedbackRegisters = {*registers.streamOutputEnable, *registers.streamOutControls, *registers.streamOutLayoutSelect},
.globalShaderConfigRegisters = {*registers.postVtgShaderAttributeSkipMask, *registers.bindlessTexture, *registers.apiMandatedEarlyZEnable}, .globalShaderConfigRegisters = {*registers.postVtgShaderAttributeSkipMask, *registers.bindlessTexture, *registers.apiMandatedEarlyZEnable, *registers.viewportScaleOffsetEnable},
.ctSelect = *registers.ctSelect .ctSelect = *registers.ctSelect
}; };
} }
@ -35,7 +35,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
.vertexBuffersRegisters = util::MergeInto<REGTYPE(VertexBufferState), type::VertexStreamCount>(*registers.vertexStreams, *registers.vertexStreamLimits), .vertexBuffersRegisters = util::MergeInto<REGTYPE(VertexBufferState), type::VertexStreamCount>(*registers.vertexStreams, *registers.vertexStreamLimits),
.indexBufferRegisters = {*registers.indexBuffer}, .indexBufferRegisters = {*registers.indexBuffer},
.transformFeedbackBuffersRegisters = util::MergeInto<REGTYPE(TransformFeedbackBufferState), type::StreamOutBufferCount>(*registers.streamOutBuffers, *registers.streamOutputEnable), .transformFeedbackBuffersRegisters = util::MergeInto<REGTYPE(TransformFeedbackBufferState), type::StreamOutBufferCount>(*registers.streamOutBuffers, *registers.streamOutputEnable),
.viewportsRegisters = util::MergeInto<REGTYPE(ViewportState), type::ViewportCount>(registers.viewports[0], registers.viewportClips[0], *registers.viewports, *registers.viewportClips, *registers.windowOrigin, *registers.viewportScaleOffsetEnable), .viewportsRegisters = util::MergeInto<REGTYPE(ViewportState), type::ViewportCount>(registers.viewports[0], registers.viewportClips[0], *registers.viewports, *registers.viewportClips, *registers.windowOrigin, *registers.viewportScaleOffsetEnable, *registers.surfaceClip),
.scissorsRegisters = util::MergeInto<REGTYPE(ScissorState), type::ViewportCount>(*registers.scissors), .scissorsRegisters = util::MergeInto<REGTYPE(ScissorState), type::ViewportCount>(*registers.scissors),
.lineWidthRegisters = {*registers.lineWidth, *registers.lineWidthAliased, *registers.aliasedLineWidthEnable}, .lineWidthRegisters = {*registers.lineWidth, *registers.lineWidthAliased, *registers.aliasedLineWidthEnable},
.depthBiasRegisters = {*registers.depthBias, *registers.depthBiasClamp, *registers.slopeScaleDepthBias}, .depthBiasRegisters = {*registers.depthBias, *registers.depthBiasClamp, *registers.slopeScaleDepthBias},