VideoCommon: Implement primitive breaking for primitive lists

This commit is contained in:
TellowKrinkle 2024-10-20 15:16:59 -05:00
parent 24e9fc120c
commit 5ef4fcb005

View File

@ -376,6 +376,22 @@ static void CheckCPConfiguration(int vtx_attr_group)
} }
} }
static bool CanSplit(OpcodeDecoder::Primitive primitive)
{
// Splitting is currently only implemented for the easy cases (individual lines/points/triangles)
switch (primitive)
{
case OpcodeDecoder::Primitive::GX_DRAW_QUADS:
case OpcodeDecoder::Primitive::GX_DRAW_QUADS_2:
case OpcodeDecoder::Primitive::GX_DRAW_TRIANGLES:
case OpcodeDecoder::Primitive::GX_DRAW_LINES:
case OpcodeDecoder::Primitive::GX_DRAW_POINTS:
return true;
default:
return false;
}
}
template <bool IsPreprocess> template <bool IsPreprocess>
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, const u8* src) int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, const u8* src)
{ {
@ -414,9 +430,9 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data // CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
// Therefore it's only useful to check if culling could remove a flush // Therefore it's only useful to check if culling could remove a flush
const bool can_cpu_cull = g_ActiveConfig.bCPUCull && bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES && primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
!g_vertex_manager->HasSendableVertices(); !g_vertex_manager->HasSendableVertices();
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze // They still need to go through vertex loading, because we need to calculate a zfreeze
@ -425,24 +441,35 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
const int stride = loader->m_native_vtx_decl.stride; const int stride = loader->m_native_vtx_decl.stride;
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride, do
cullall || can_cpu_cull);
count = loader->RunVertices(src, dst.GetPointer(), count);
if (can_cpu_cull && !cullall)
{ {
if (!g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), count)) const int max_vertices = 16380; // Max is 16383, but 16380 is divisible by both 4 and 3
const int run = CanSplit(primitive) && count > max_vertices ? max_vertices : count;
count -= run;
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, run, stride,
cullall || can_cpu_cull);
const int num_loaded = loader->RunVertices(src, dst.GetPointer(), run);
src += loader->m_vertex_size * max_vertices;
if (can_cpu_cull && !cullall)
{ {
DataReader new_dst = g_vertex_manager->DisableCullAll(stride); const bool all_culled =
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride); g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), num_loaded);
if (!all_culled)
{
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
memmove(new_dst.GetPointer(), dst.GetPointer(), num_loaded * stride);
can_cpu_cull = false;
}
} }
}
g_vertex_manager->AddIndices(primitive, count); g_vertex_manager->AddIndices(primitive, num_loaded);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride); g_vertex_manager->FlushData(num_loaded, stride);
ADDSTAT(g_stats.this_frame.num_prims, num_loaded);
} while (count);
ADDSTAT(g_stats.this_frame.num_prims, count);
INCSTAT(g_stats.this_frame.num_primitive_joins); INCSTAT(g_stats.this_frame.num_primitive_joins);
} }
return size; return size;