mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-29 20:44:18 +01:00
support rasterization kill
This commit is contained in:
parent
5e50592937
commit
3dc233fb56
@ -501,9 +501,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
|
||||
uint64 vsHash2 = 0;
|
||||
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
|
||||
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
|
||||
if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader)
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
if (usesGeometryShader)
|
||||
vsHash += _activeFetchShader->mtlShaderHashObject;
|
||||
|
||||
// Rasterization
|
||||
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||
|
||||
// HACK
|
||||
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
|
||||
rasterizationEnabled = true;
|
||||
|
||||
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
|
||||
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
|
||||
uint32 cullBack = polygonControlReg.get_CULL_BACK();
|
||||
if (cullFront && cullBack)
|
||||
rasterizationEnabled = false;
|
||||
|
||||
if (rasterizationEnabled)
|
||||
vsHash += 51ULL;
|
||||
}
|
||||
|
||||
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
|
||||
vsHash += tmp;
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
|
||||
#define _CRLF "\r\n"
|
||||
|
||||
static bool rasterizationEnabled;
|
||||
|
||||
void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib);
|
||||
|
||||
/*
|
||||
@ -3108,6 +3110,9 @@ static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext,
|
||||
|
||||
static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
||||
{
|
||||
if (!rasterizationEnabled)
|
||||
return;
|
||||
|
||||
StringBuf* src = shaderContext->shaderSource;
|
||||
src->add("// export" _CRLF);
|
||||
if(shaderContext->shaderType == LatteConst::ShaderType::Vertex )
|
||||
@ -3332,6 +3337,9 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
|
||||
return;
|
||||
}
|
||||
|
||||
if (!rasterizationEnabled)
|
||||
return;
|
||||
|
||||
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
if (cfInstruction->memWriteElemSize != 3)
|
||||
@ -3861,6 +3869,23 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
{
|
||||
bool isRectVertexShader = (static_cast<LattePrimitiveMode>(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS);
|
||||
|
||||
// Rasterization
|
||||
rasterizationEnabled = true;
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
rasterizationEnabled = !shaderContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||
|
||||
// HACK
|
||||
if (!shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
|
||||
rasterizationEnabled = true;
|
||||
|
||||
const auto& polygonControlReg = shaderContext->contextRegistersNew->PA_SU_SC_MODE_CNTL;
|
||||
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
|
||||
uint32 cullBack = polygonControlReg.get_CULL_BACK();
|
||||
if (cullFront && cullBack)
|
||||
rasterizationEnabled = false;
|
||||
}
|
||||
|
||||
StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end)
|
||||
shaderContext->shaderSource = src;
|
||||
|
||||
@ -3874,7 +3899,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
src->add("#include <metal_stdlib>" _CRLF);
|
||||
src->add("using namespace metal;" _CRLF);
|
||||
// header part (definitions for inputs and outputs)
|
||||
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader);
|
||||
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, rasterizationEnabled);
|
||||
// helper functions
|
||||
LatteDecompiler_emitHelperFunctions(shaderContext, src);
|
||||
const char* functionType = "";
|
||||
@ -4010,7 +4035,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
else
|
||||
{
|
||||
functionType = "vertex";
|
||||
if (rasterizationEnabled)
|
||||
outputTypeName = "VertexOut";
|
||||
else
|
||||
outputTypeName = "void";
|
||||
}
|
||||
break;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
@ -4048,6 +4076,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rasterizationEnabled)
|
||||
src->addFmt("{} out;" _CRLF, outputTypeName);
|
||||
}
|
||||
// variable definition
|
||||
@ -4285,9 +4314,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
//if(shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||
// src->add("EndPrimitive();" _CRLF);
|
||||
// vertex shader should write renderstate point size at the end if required but not modified by shader
|
||||
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
||||
if (shaderContext->analyzer.outputPointSize && !shaderContext->analyzer.writesPointSize)
|
||||
{
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader && rasterizationEnabled)
|
||||
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
|
||||
}
|
||||
|
||||
@ -4325,13 +4354,15 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?)
|
||||
if ((shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) || shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||
if (rasterizationEnabled)
|
||||
{
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
|
||||
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
|
||||
|
||||
// Return
|
||||
if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||
src->add("return out;" _CRLF);
|
||||
}
|
||||
|
||||
// end of shader main
|
||||
src->add("}" _CRLF);
|
||||
|
@ -262,7 +262,7 @@ namespace LatteDecompiler
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
|
||||
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
|
||||
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
@ -300,7 +300,7 @@ namespace LatteDecompiler
|
||||
|
||||
if (!decompilerContext->options->usesGeometryShader)
|
||||
{
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && rasterizationEnabled)
|
||||
_emitVSOutputs(decompilerContext, isRectVertexShader);
|
||||
}
|
||||
else
|
||||
@ -351,7 +351,7 @@ namespace LatteDecompiler
|
||||
}
|
||||
}
|
||||
|
||||
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
|
||||
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
@ -410,7 +410,7 @@ namespace LatteDecompiler
|
||||
// uniform buffers
|
||||
_emitUniformBuffers(decompilerContext);
|
||||
// inputs and outputs
|
||||
_emitInputsAndOutputs(decompilerContext, isRectVertexShader);
|
||||
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, rasterizationEnabled);
|
||||
|
||||
if (dump_shaders_enabled)
|
||||
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
|
||||
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||
@ -188,8 +189,31 @@ extern std::atomic_int g_compiled_shaders_total;
|
||||
extern std::atomic_int g_compiled_shaders_async;
|
||||
|
||||
template<typename T>
|
||||
void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
|
||||
void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr)
|
||||
{
|
||||
// Rasterization
|
||||
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||
|
||||
// HACK
|
||||
// TODO: include this in the hash?
|
||||
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
|
||||
rasterizationEnabled = true;
|
||||
|
||||
// Culling both front and back faces effectively disables rasterization
|
||||
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
|
||||
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
|
||||
uint32 cullBack = polygonControlReg.get_CULL_BACK();
|
||||
if (cullFront && cullBack)
|
||||
rasterizationEnabled = false;
|
||||
|
||||
desc->setRasterizationEnabled(rasterizationEnabled);
|
||||
|
||||
if (rasterizationEnabled)
|
||||
{
|
||||
auto pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
desc->setFragmentFunction(pixelShaderMtl->GetFunction());
|
||||
}
|
||||
|
||||
// Color attachments
|
||||
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
|
||||
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
|
||||
@ -284,7 +308,7 @@ MetalPipelineCache::~MetalPipelineCache()
|
||||
m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("failed to serialize binary archive: %s\n", error->localizedDescription()->utf8String());
|
||||
cemuLog_log(LogType::Force, "failed to serialize binary archive: {}", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
m_binaryArchive->release();
|
||||
@ -362,17 +386,15 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
|
||||
layout->setStride(bufferStride);
|
||||
}
|
||||
|
||||
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
auto vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
|
||||
// Render pipeline state
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(mtlVertexShader->GetFunction());
|
||||
desc->setFragmentFunction(mtlPixelShader->GetFunction());
|
||||
desc->setVertexFunction(vertexShaderMtl->GetFunction());
|
||||
// TODO: don't always set the vertex descriptor?
|
||||
desc->setVertexDescriptor(vertexDescriptor);
|
||||
|
||||
SetFragmentState(desc, lastUsedFBO, activeFBO, lcr);
|
||||
SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr);
|
||||
|
||||
TryLoadBinaryArchive();
|
||||
|
||||
@ -391,9 +413,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
|
||||
#endif
|
||||
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error);
|
||||
|
||||
//static uint32 oldPipelineCount = 0;
|
||||
//static uint32 newPipelineCount = 0;
|
||||
|
||||
// Pipeline wasn't found in the binary archive, we need to compile it
|
||||
if (error)
|
||||
{
|
||||
@ -407,7 +426,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
|
||||
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
|
||||
cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
else
|
||||
@ -419,19 +438,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
|
||||
m_binaryArchive->addRenderPipelineFunctions(desc, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String());
|
||||
cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//newPipelineCount++;
|
||||
}
|
||||
//else
|
||||
//{
|
||||
// oldPipelineCount++;
|
||||
//}
|
||||
//debug_printf("%u pipelines were found in the binary archive, %u new were created\n", oldPipelineCount, newPipelineCount);
|
||||
desc->release();
|
||||
vertexDescriptor->release();
|
||||
|
||||
@ -452,26 +464,24 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
|
||||
if (pipeline)
|
||||
return pipeline;
|
||||
|
||||
auto mtlObjectShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
RendererShaderMtl* mtlMeshShader;
|
||||
auto objectShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
RendererShaderMtl* meshShaderMtl;
|
||||
if (geometryShader)
|
||||
{
|
||||
mtlMeshShader = static_cast<RendererShaderMtl*>(geometryShader->shader);
|
||||
meshShaderMtl = static_cast<RendererShaderMtl*>(geometryShader->shader);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If there is no geometry shader, it means that we are emulating rects
|
||||
mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
|
||||
meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
|
||||
}
|
||||
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
|
||||
// Render pipeline state
|
||||
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
|
||||
desc->setObjectFunction(mtlObjectShader->GetFunction());
|
||||
desc->setMeshFunction(mtlMeshShader->GetFunction());
|
||||
desc->setFragmentFunction(mtlPixelShader->GetFunction());
|
||||
desc->setObjectFunction(objectShaderMtl->GetFunction());
|
||||
desc->setMeshFunction(meshShaderMtl->GetFunction());
|
||||
|
||||
SetFragmentState(desc, lastUsedFBO, activeFBO, lcr);
|
||||
SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr);
|
||||
|
||||
TryLoadBinaryArchive();
|
||||
|
||||
@ -486,7 +496,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
|
||||
desc->release();
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
|
||||
cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
|
||||
@ -594,7 +604,7 @@ void MetalPipelineCache::TryLoadBinaryArchive()
|
||||
m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("failed to create binary archive: %s\n", error->localizedDescription()->utf8String());
|
||||
cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
}
|
||||
|
@ -879,7 +879,7 @@ void MetalRenderer::draw_beginSequence()
|
||||
LatteRenderTarget_updateScissorBox();
|
||||
|
||||
// check for conditions which would turn the drawcalls into no-ops
|
||||
bool rasterizerEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL() == false;
|
||||
bool rasterizerEnable = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||
|
||||
// GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly
|
||||
// for now we use a workaround:
|
||||
@ -888,18 +888,6 @@ void MetalRenderer::draw_beginSequence()
|
||||
|
||||
if (!rasterizerEnable && !streamoutEnable)
|
||||
m_state.m_skipDrawSequence = true;
|
||||
|
||||
// Both faces are culled
|
||||
// TODO: can we really skip the draw?
|
||||
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
|
||||
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
|
||||
uint32 cullBack = polygonControlReg.get_CULL_BACK();
|
||||
if (cullFront && cullBack)
|
||||
m_state.m_skipDrawSequence = true;
|
||||
|
||||
// TODO: is this even needed?
|
||||
if (!m_state.m_activeFBO)
|
||||
m_state.m_skipDrawSequence = true;
|
||||
}
|
||||
|
||||
void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst)
|
||||
@ -1065,10 +1053,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
|
||||
// Cull mode
|
||||
|
||||
// Handled in draw_beginSequence
|
||||
if (cullFront && cullBack)
|
||||
cemu_assert_suspicious();
|
||||
|
||||
// Cull front and back is handled by disabling rasterization
|
||||
if (!(cullFront && cullBack))
|
||||
{
|
||||
MTL::CullMode cullMode;
|
||||
if (cullFront)
|
||||
cullMode = MTL::CullModeFront;
|
||||
@ -1082,6 +1069,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
renderCommandEncoder->setCullMode(cullMode);
|
||||
encoderState.m_cullMode = cullMode;
|
||||
}
|
||||
}
|
||||
|
||||
// Front face
|
||||
MTL::Winding frontFaceWinding;
|
||||
@ -1164,12 +1152,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
else
|
||||
renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew);
|
||||
|
||||
// HACK
|
||||
if (!renderPipelineState)
|
||||
{
|
||||
printf("invalid render pipeline state, skipping draw\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (renderPipelineState != encoderState.m_renderPipelineState)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user