support rasterization kill

This commit is contained in:
Samuliak 2024-09-07 11:00:10 +02:00
parent 5e50592937
commit 3dc233fb56
5 changed files with 124 additions and 80 deletions

View File

@ -501,9 +501,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader)
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader)
vsHash += _activeFetchShader->mtlShaderHashObject;
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
if (rasterizationEnabled)
vsHash += 51ULL;
}
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;

View File

@ -20,6 +20,8 @@
#define _CRLF "\r\n"
static bool rasterizationEnabled;
void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib);
/*
@ -3108,6 +3110,9 @@ static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext,
static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
{
if (!rasterizationEnabled)
return;
StringBuf* src = shaderContext->shaderSource;
src->add("// export" _CRLF);
if(shaderContext->shaderType == LatteConst::ShaderType::Vertex )
@ -3332,6 +3337,9 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
return;
}
if (!rasterizationEnabled)
return;
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
{
if (cfInstruction->memWriteElemSize != 3)
@ -3861,6 +3869,23 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
{
bool isRectVertexShader = (static_cast<LattePrimitiveMode>(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS);
// Rasterization
rasterizationEnabled = true;
if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
rasterizationEnabled = !shaderContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
if (!shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
const auto& polygonControlReg = shaderContext->contextRegistersNew->PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
}
StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end)
shaderContext->shaderSource = src;
@ -3874,7 +3899,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("#include <metal_stdlib>" _CRLF);
src->add("using namespace metal;" _CRLF);
// header part (definitions for inputs and outputs)
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader);
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, rasterizationEnabled);
// helper functions
LatteDecompiler_emitHelperFunctions(shaderContext, src);
const char* functionType = "";
@ -4010,7 +4035,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
else
{
functionType = "vertex";
if (rasterizationEnabled)
outputTypeName = "VertexOut";
else
outputTypeName = "void";
}
break;
case LatteConst::ShaderType::Geometry:
@ -4048,6 +4076,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
}
else
{
if (rasterizationEnabled)
src->addFmt("{} out;" _CRLF, outputTypeName);
}
// variable definition
@ -4285,9 +4314,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
//if(shader->shaderType == LatteConst::ShaderType::Geometry)
// src->add("EndPrimitive();" _CRLF);
// vertex shader should write renderstate point size at the end if required but not modified by shader
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
if (shaderContext->analyzer.outputPointSize && !shaderContext->analyzer.writesPointSize)
{
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader && rasterizationEnabled)
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
}
@ -4325,13 +4354,15 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
}
}
// TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?)
if ((shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) || shader->shaderType == LatteConst::ShaderType::Geometry)
if (rasterizationEnabled)
{
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
// Return
if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel)
src->add("return out;" _CRLF);
}
// end of shader main
src->add("}" _CRLF);

View File

@ -262,7 +262,7 @@ namespace LatteDecompiler
src->add("};" _CRLF _CRLF);
}
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
@ -300,7 +300,7 @@ namespace LatteDecompiler
if (!decompilerContext->options->usesGeometryShader)
{
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && rasterizationEnabled)
_emitVSOutputs(decompilerContext, isRectVertexShader);
}
else
@ -351,7 +351,7 @@ namespace LatteDecompiler
}
}
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
@ -410,7 +410,7 @@ namespace LatteDecompiler
// uniform buffers
_emitUniformBuffers(decompilerContext);
// inputs and outputs
_emitInputsAndOutputs(decompilerContext, isRectVertexShader);
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, rasterizationEnabled);
if (dump_shaders_enabled)
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);

View File

@ -9,6 +9,7 @@
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cemu/Logging/CemuLogging.h"
#include "config/ActiveSettings.h"
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
@ -188,8 +189,31 @@ extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async;
template<typename T>
void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
// TODO: include this in the hash?
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
// Culling both front and back faces effectively disables rasterization
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
desc->setRasterizationEnabled(rasterizationEnabled);
if (rasterizationEnabled)
{
auto pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader);
desc->setFragmentFunction(pixelShaderMtl->GetFunction());
}
// Color attachments
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
@ -284,7 +308,7 @@ MetalPipelineCache::~MetalPipelineCache()
m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error);
if (error)
{
debug_printf("failed to serialize binary archive: %s\n", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "failed to serialize binary archive: {}", error->localizedDescription()->utf8String());
error->release();
}
m_binaryArchive->release();
@ -362,17 +386,15 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
layout->setStride(bufferStride);
}
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
auto vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
// Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(mtlVertexShader->GetFunction());
desc->setFragmentFunction(mtlPixelShader->GetFunction());
desc->setVertexFunction(vertexShaderMtl->GetFunction());
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
SetFragmentState(desc, lastUsedFBO, activeFBO, lcr);
SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr);
TryLoadBinaryArchive();
@ -391,9 +413,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error);
//static uint32 oldPipelineCount = 0;
//static uint32 newPipelineCount = 0;
// Pipeline wasn't found in the binary archive, we need to compile it
if (error)
{
@ -407,7 +426,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error);
if (error)
{
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String());
error->release();
}
else
@ -419,19 +438,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
m_binaryArchive->addRenderPipelineFunctions(desc, &error);
if (error)
{
debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String());
error->release();
}
}
}
//newPipelineCount++;
}
//else
//{
// oldPipelineCount++;
//}
//debug_printf("%u pipelines were found in the binary archive, %u new were created\n", oldPipelineCount, newPipelineCount);
desc->release();
vertexDescriptor->release();
@ -452,26 +464,24 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
if (pipeline)
return pipeline;
auto mtlObjectShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
RendererShaderMtl* mtlMeshShader;
auto objectShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
RendererShaderMtl* meshShaderMtl;
if (geometryShader)
{
mtlMeshShader = static_cast<RendererShaderMtl*>(geometryShader->shader);
meshShaderMtl = static_cast<RendererShaderMtl*>(geometryShader->shader);
}
else
{
// If there is no geometry shader, it means that we are emulating rects
mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
}
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
// Render pipeline state
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
desc->setObjectFunction(mtlObjectShader->GetFunction());
desc->setMeshFunction(mtlMeshShader->GetFunction());
desc->setFragmentFunction(mtlPixelShader->GetFunction());
desc->setObjectFunction(objectShaderMtl->GetFunction());
desc->setMeshFunction(meshShaderMtl->GetFunction());
SetFragmentState(desc, lastUsedFBO, activeFBO, lcr);
SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr);
TryLoadBinaryArchive();
@ -486,7 +496,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
desc->release();
if (error)
{
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String());
error->release();
}
@ -594,7 +604,7 @@ void MetalPipelineCache::TryLoadBinaryArchive()
m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error);
if (error)
{
debug_printf("failed to create binary archive: %s\n", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String());
error->release();
}
}

View File

@ -879,7 +879,7 @@ void MetalRenderer::draw_beginSequence()
LatteRenderTarget_updateScissorBox();
// check for conditions which would turn the drawcalls into no-ops
bool rasterizerEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL() == false;
bool rasterizerEnable = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly
// for now we use a workaround:
@ -888,18 +888,6 @@ void MetalRenderer::draw_beginSequence()
if (!rasterizerEnable && !streamoutEnable)
m_state.m_skipDrawSequence = true;
// Both faces are culled
// TODO: can we really skip the draw?
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
m_state.m_skipDrawSequence = true;
// TODO: is this even needed?
if (!m_state.m_activeFBO)
m_state.m_skipDrawSequence = true;
}
void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst)
@ -1065,10 +1053,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Cull mode
// Handled in draw_beginSequence
if (cullFront && cullBack)
cemu_assert_suspicious();
// Cull front and back is handled by disabling rasterization
if (!(cullFront && cullBack))
{
MTL::CullMode cullMode;
if (cullFront)
cullMode = MTL::CullModeFront;
@ -1082,6 +1069,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
renderCommandEncoder->setCullMode(cullMode);
encoderState.m_cullMode = cullMode;
}
}
// Front face
MTL::Winding frontFaceWinding;
@ -1164,12 +1152,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
else
renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew);
// HACK
if (!renderPipelineState)
{
printf("invalid render pipeline state, skipping draw\n");
return;
}
if (renderPipelineState != encoderState.m_renderPipelineState)
{