fix: shadows

This commit is contained in:
Samuliak 2024-08-08 19:25:12 +02:00
parent 5c246d55bd
commit a38ddb5fc2
8 changed files with 91 additions and 74 deletions

View File

@ -2187,6 +2187,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
}
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
bool isCompare = shaderContext->shader->textureUsesDepthCompare[texInstruction->textureFetch.textureIndex];
char tempBuffer0[32];
char tempBuffer1[32];
@ -2212,6 +2213,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
}
// texture sampler opcode
uint32 texOpcode = texInstruction->opcode;
// TODO: is this needed?
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
{
// vertex shader forces LOD to zero, but certain sampler types don't support textureLod(...) API
@ -2275,7 +2277,10 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
}
else
{
src->addFmt("sample(samplr{}, ", texInstruction->textureFetch.textureIndex);
src->add("sample");
if (isCompare)
src->add("_compare");
src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex);
}
// for textureGather() add shift (todo: depends on rounding mode set in sampler registers?)
@ -2493,61 +2498,68 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
}
}
// lod bias
if( texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ )
{
src->add(").");
if (numWrittenElements > 1)
{
// result is copied into multiple channels
for (sint32 f = 0; f < numWrittenElements; f++)
{
cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined
src->add("x");
}
}
else
{
src->add("x");
}
}
else
{
src->add(").");
for (sint32 f = 0; f < 4; f++)
{
if( texInstruction->dstSel[f] < 4 )
{
uint8 elemIndex = texInstruction->dstSel[f];
if (texOpcode == GPU7_TEX_INST_FETCH4)
{
// 's textureGather() and GPU7's FETCH4 instruction have a different order of elements
// xyzw: top-left, top-right, bottom-right, bottom-left
// textureGather xyzw
// fetch4 yzxw
// translate index from fetch4 to textureGather order
static uint8 fetchToGather[4] =
{
2, // x -> z
0, // y -> x
1, // z -> y
3, // w -> w
};
elemIndex = fetchToGather[elemIndex];
}
src->add(resultElemTable[elemIndex]);
numWrittenElements++;
}
else if( texInstruction->dstSel[f] == 7 )
{
// masked and not written
}
else
{
cemu_assert_unimplemented();
}
}
// lod bias (TODO: wht?)
src->add(")");
// sample_compare doesn't return a float
if (!isCompare)
{
if( texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ )
{
src->add(".");
if (numWrittenElements > 1)
{
// result is copied into multiple channels
for (sint32 f = 0; f < numWrittenElements; f++)
{
cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined
src->add("x");
}
}
else
{
src->add("x");
}
}
else
{
src->add(".");
for (sint32 f = 0; f < 4; f++)
{
if( texInstruction->dstSel[f] < 4 )
{
uint8 elemIndex = texInstruction->dstSel[f];
if (texOpcode == GPU7_TEX_INST_FETCH4)
{
// 's textureGather() and GPU7's FETCH4 instruction have a different order of elements
// xyzw: top-left, top-right, bottom-right, bottom-left
// textureGather xyzw
// fetch4 yzxw
// translate index from fetch4 to textureGather order
static uint8 fetchToGather[4] =
{
2, // x -> z
0, // y -> x
1, // z -> y
3, // w -> w
};
elemIndex = fetchToGather[elemIndex];
}
src->add(resultElemTable[elemIndex]);
numWrittenElements++;
}
else if( texInstruction->dstSel[f] == 7 )
{
// masked and not written
}
else
{
cemu_assert_unimplemented();
}
}
}
}
src->add(");");

View File

@ -224,6 +224,7 @@ namespace LatteDecompiler
src->add("#define GET_FRAGCOORD() vec4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF);
src->add("struct FragmentIn {" _CRLF);
src->add("float4 position [[position]];" _CRLF);
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
for (sint32 i = 0; i < psInputTable->count; i++)
@ -271,7 +272,7 @@ namespace LatteDecompiler
// generate depth output for pixel shader
if (decompilerContext->shader->depthWritten)
{
src->add("float passDepth [[depth]];" _CRLF);
src->add("float passDepth [[depth(any)]];" _CRLF);
}
src->add("};" _CRLF _CRLF);
@ -323,26 +324,31 @@ namespace LatteDecompiler
src->add(", ");
if (shaderContext->shader->textureUsesDepthCompare[i])
src->add("depth");
else
src->add("texture");
if (shaderContext->shader->textureIsIntegerFormat[i])
{
// integer samplers
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("texture1d<uint>");
src->add("1d<uint>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("texture2d<uint>");
src->add("2d<uint>");
else
cemu_assert_unimplemented();
}
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("texture2d<float>");
src->add("2d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("texture1d<float>");
src->add("1d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
src->add("texture2d_array<float>");
src->add("2d_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
src->add("texturecube_array<float>");
src->add("cube_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
src->add("texture3d<float>");
src->add("3d<float>");
else
{
cemu_assert_unimplemented();

View File

@ -39,11 +39,12 @@ MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle)
sint32 freeIndex = -1;
for (sint32 i = 0; i < std::size(m_viewCache); i++)
{
if (m_viewCache[i].key == gpuSamplerSwizzle)
const auto& entry = m_viewCache[i];
if (entry.key == gpuSamplerSwizzle)
{
return m_viewCache[i].texture;
return entry.texture;
}
else if (m_viewCache[i].key == INVALID_SWIZZLE && freeIndex == -1)
else if (entry.key == INVALID_SWIZZLE && freeIndex == -1)
{
freeIndex = i;
}

View File

@ -29,7 +29,7 @@ private:
struct {
uint32 key;
MTL::Texture* texture;
} m_viewCache[4] = {{INVALID_SWIZZLE, nullptr}};
} m_viewCache[4] = {{INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}};
std::unordered_map<uint32, MTL::Texture*> m_fallbackViewCache;
MTL::Texture* CreateSwizzledView(uint32 gpuSamplerSwizzle);

View File

@ -30,12 +30,10 @@ MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const Latte
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
desc->setDepthWriteEnabled(depthWriteEnable);
auto depthCompareFunc = GetMtlCompareFunc(depthFunc);
if (!depthEnable)
if (depthEnable)
{
depthCompareFunc = MTL::CompareFunctionAlways;
desc->setDepthCompareFunction(GetMtlCompareFunc(depthFunc));
}
desc->setDepthCompareFunction(depthCompareFunc);
// Stencil state
bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();

View File

@ -105,6 +105,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
{
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
}
debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange->size, newSize);
restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride;

View File

@ -43,7 +43,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
auto attribute = vertexDescriptor->attributes()->object(semanticId);
attribute->setOffset(attr.offset);
// Bind from the end to not conflict with uniform buffers
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
attribute->setFormat(GetMtlVertexFormat(attr.format));

View File

@ -19,6 +19,7 @@
#include "Metal/MTLResource.hpp"
#include "Metal/MTLTypes.hpp"
#include "gui/guiWrapper.h"
#include <stdexcept>
extern bool hasValidFramebufferAttached;
@ -596,8 +597,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
debug_printf("no vertex function, skipping draw\n");
return;
}
auto fetchShader = vertexShader->compatibleFetchShader;
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
// Render pipeline state
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.activeFBO, LatteGPUState.contextNew);