VideoCommon: Cull vertices on the CPU

This commit is contained in:
TellowKrinkle 2022-07-26 03:57:30 -05:00
parent b170ef9651
commit 1be0149146
7 changed files with 57 additions and 7 deletions

View File

@ -93,6 +93,7 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
{System::GFX, "Settings", "SaveTextureCacheToState"}, true}; {System::GFX, "Settings", "SaveTextureCacheToState"}, true};
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{ const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false}; {System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
const Info<bool> GFX_CPU_CULL{{System::GFX, "Settings", "CPUCull"}, false};
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{ const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto}; {System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};

View File

@ -82,6 +82,7 @@ extern const Info<int> GFX_SHADER_COMPILER_THREADS;
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS; extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE; extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION; extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
extern const Info<bool> GFX_CPU_CULL;
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS; extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE; extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;

View File

@ -30,6 +30,7 @@
#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h" #include "VideoCommon/XFMemory.h"
namespace VertexLoaderManager namespace VertexLoaderManager
@ -366,17 +367,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
vertex_shader_manager.SetVertexFormat(loader->m_native_components, vertex_shader_manager.SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration()); loader->m_native_vertex_format->GetVertexDeclaration());
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
// Therefore it's only useful to check if culling could remove a flush
const bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
!g_vertex_manager->HasSendableVertices();
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence // They still need to go through vertex loading, because we need to calculate a zfreeze
// slope. // reference slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All && const bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
DataReader dst = g_vertex_manager->PrepareForAdditionalData( const int stride = loader->m_native_vtx_decl.stride;
primitive, count, loader->m_native_vtx_decl.stride, cullall); DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride,
cullall || can_cpu_cull);
count = loader->RunVertices(src, dst.GetPointer(), count); count = loader->RunVertices(src, dst.GetPointer(), count);
if (can_cpu_cull && !cullall)
{
if (!g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), count))
{
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride);
}
}
g_vertex_manager->AddIndices(primitive, count); g_vertex_manager->AddIndices(primitive, count);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride); g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);

View File

@ -104,6 +104,7 @@ VertexManagerBase::~VertexManagerBase() = default;
bool VertexManagerBase::Initialize() bool VertexManagerBase::Initialize()
{ {
m_index_generator.Init(); m_index_generator.Init();
m_cpu_cull.Init();
return true; return true;
} }
@ -117,6 +118,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v
m_index_generator.AddIndices(primitive, num_vertices); m_index_generator.AddIndices(primitive, num_vertices);
} }
bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
OpcodeDecoder::Primitive primitive, const u8* src,
u32 count)
{
return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
}
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
u32 count, u32 stride, bool cullall) u32 count, u32 stride, bool cullall)
{ {
@ -185,6 +193,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer); return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
} }
DataReader VertexManagerBase::DisableCullAll(u32 stride)
{
if (m_cull_all)
{
m_cull_all = false;
ResetBuffer(stride);
}
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
}
void VertexManagerBase::FlushData(u32 count, u32 stride) void VertexManagerBase::FlushData(u32 count, u32 stride)
{ {
m_cur_buffer_pointer += count * stride; m_cur_buffer_pointer += count * stride;
@ -546,6 +564,8 @@ void VertexManagerBase::Flush()
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call // Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
// must be careful to not upload any utility vertices, as the binding will be lost otherwise. // must be careful to not upload any utility vertices, as the binding will be lost otherwise.
const u32 num_indices = m_index_generator.GetIndexLen(); const u32 num_indices = m_index_generator.GetIndexLen();
if (num_indices == 0)
return;
u32 base_vertex, base_index; u32 base_vertex, base_index;
CommitBuffer(m_index_generator.GetNumVerts(), CommitBuffer(m_index_generator.GetNumVerts(),
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,

View File

@ -9,6 +9,7 @@
#include "Common/BitSet.h" #include "Common/BitSet.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "VideoCommon/CPUCull.h"
#include "VideoCommon/IndexGenerator.h" #include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/RenderState.h" #include "VideoCommon/RenderState.h"
#include "VideoCommon/ShaderCache.h" #include "VideoCommon/ShaderCache.h"
@ -100,11 +101,18 @@ public:
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
bool AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
const u8* src, u32 count);
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
u32 stride, bool cullall); u32 stride, bool cullall);
/// Switch cullall off after a call to PrepareForAdditionalData with cullall true
/// Expects that you will add a nonzero number of primitives before the next flush
/// Returns whether cullall was changed (false if cullall was already off)
DataReader DisableCullAll(u32 stride);
void FlushData(u32 count, u32 stride); void FlushData(u32 count, u32 stride);
void Flush(); void Flush();
bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; }
void DoState(PointerWrap& p); void DoState(PointerWrap& p);
@ -201,6 +209,7 @@ protected:
bool m_cull_all = false; bool m_cull_all = false;
IndexGenerator m_index_generator; IndexGenerator m_index_generator;
CPUCull m_cpu_cull;
private: private:
// Minimum number of draws per command buffer when attempting to preempt a readback operation. // Minimum number of draws per command buffer when attempting to preempt a readback operation.

View File

@ -113,6 +113,7 @@ void VideoConfig::Refresh()
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE); iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS); iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS); iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
bCPUCull = Config::Get(Config::GFX_CPU_CULL);
texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING); texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY); iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);

View File

@ -138,6 +138,7 @@ struct VideoConfig final
bool bPerfQueriesEnable = false; bool bPerfQueriesEnable = false;
bool bBBoxEnable = false; bool bBBoxEnable = false;
bool bForceProgressive = false; bool bForceProgressive = false;
bool bCPUCull = false;
bool bEFBEmulateFormatChanges = false; bool bEFBEmulateFormatChanges = false;
bool bSkipEFBCopyToRam = false; bool bSkipEFBCopyToRam = false;