From ec28a80e00ae6893b1a71c1e0538561df78665e5 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Dec 2014 22:39:58 +0100 Subject: [PATCH 1/9] VideoLoader: remove VAT_*_FRACBITS They are used to remove the flush amounts, but as we don't flush anymore on vertex loader changes (only on native vertex format right now), this optimization is now unneeded. This will allow us to hard code the frac factors within the vertex loaders. --- .../VideoBackends/Software/SWVertexLoader.cpp | 2 +- Source/Core/VideoCommon/CPMemory.h | 7 --- Source/Core/VideoCommon/VertexLoader.cpp | 20 ++------ Source/Core/VideoCommon/VertexLoader.h | 11 +++-- .../Core/VideoCommon/VertexLoaderManager.cpp | 2 +- .../VideoCommon/VertexLoaderTest.cpp | 48 +++++++++++-------- 6 files changed, 40 insertions(+), 50 deletions(-) diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index ec53bc6c3b..c00e43e2fd 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -176,7 +176,7 @@ void SWVertexLoader::LoadVertex() // convert the vertex from the gc format to the videocommon (hardware optimized) format u8* old = g_video_buffer_read_ptr; m_CurrentLoader->RunVertices( - g_main_cp_state.vtx_attr[m_attributeIndex], m_primitiveType, 1, + m_primitiveType, 1, DataReader(g_video_buffer_read_ptr, nullptr), // src DataReader(m_LoadedVertices.data(), m_LoadedVertices.data() + m_LoadedVertices.size()) // dst ); diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index a256fc6849..d61fbb02c4 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -45,13 +45,6 @@ enum FORMAT_32B_8888 = 5, }; -enum -{ - VAT_0_FRACBITS = 0x3e0001f0, - VAT_1_FRACBITS = 0x07c3e1f0, - VAT_2_FRACBITS = 0xf87c3e1f, -}; - #pragma pack(4) union TVtxDesc { diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index f293564e85..2dea025d96 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -119,6 +119,7 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) VertexLoader_TextCoord::Init(); m_VtxDesc = vtx_desc; + m_vat = vtx_attr; SetVAT(vtx_attr); #ifdef USE_VERTEX_LOADER_JIT @@ -444,21 +445,10 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) } #endif -void VertexLoader::SetupRunVertices(const VAT& vat, int primitive, int const count) +void VertexLoader::SetupRunVertices(int primitive, int const count) { m_numLoadedVertices += count; - // Load position and texcoord scale factors. - m_VtxAttr.PosFrac = vat.g0.PosFrac; - m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac; - m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac; - m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac; - m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac; - m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac; - m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac; - m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac; - m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac; - posScale[0] = posScale[1] = posScale[2] = posScale[3] = fractionTable[m_VtxAttr.PosFrac]; if (m_native_components & VB_HAS_UVALL) for (int i = 0; i < 8; i++) @@ -468,7 +458,7 @@ void VertexLoader::SetupRunVertices(const VAT& vat, int primitive, int const cou // Prepare bounding box if (!g_ActiveConfig.backend_info.bSupportsBBox) - BoundingBox::Prepare(vat, primitive, m_VtxDesc, m_native_vtx_decl); + BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl); } void VertexLoader::ConvertVertices ( int count ) @@ -491,11 +481,11 @@ void VertexLoader::ConvertVertices ( int count ) #endif } -int VertexLoader::RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst) +int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataReader dst) { dst.WritePointer(&g_vertex_manager_write_ptr); src.WritePointer(&g_video_buffer_read_ptr); - SetupRunVertices(vat, primitive, count); + SetupRunVertices(primitive, count); ConvertVertices(count); return count; } diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index df1bb445a5..801978231b 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -51,9 +51,9 @@ public: { vid[0] = vtx_desc.Hex & 0xFFFFFFFF; vid[1] = vtx_desc.Hex >> 32; - vid[2] = vat.g0.Hex & ~VAT_0_FRACBITS; - vid[3] = vat.g1.Hex & ~VAT_1_FRACBITS; - vid[4] = vat.g2.Hex & ~VAT_2_FRACBITS; + vid[2] = vat.g0.Hex; + vid[3] = vat.g1.Hex; + vid[4] = vat.g2.Hex; hash = CalculateHash(); } @@ -117,8 +117,8 @@ public: const PortableVertexDeclaration& GetNativeVertexDeclaration() const { return m_native_vtx_decl; } - void SetupRunVertices(const VAT& vat, int primitive, int const count); - int RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst); + void SetupRunVertices(int primitive, int const count); + int RunVertices(int primitive, int count, DataReader src, DataReader dst); // For debugging / profiling void AppendToString(std::string *dest) const; @@ -133,6 +133,7 @@ private: // GC vertex format TVtxAttr m_VtxAttr; // VAT decoded into easy format TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided. + VAT m_vat; // PC vertex format u32 m_native_components; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index cf638c2495..e1d0b194ef 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -159,7 +159,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, loader->GetNativeVertexDeclaration().stride); - count = loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count, src, dst); + count = loader->RunVertices(primitive, count, src, dst); IndexGenerator::AddIndices(primitive, count); diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 45d8a23ab1..44fb6dc314 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -93,10 +93,10 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoader* loader = new VertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->GetNativeVertexDeclaration().stride); + ASSERT_EQ(3 * sizeof (float), (u32)loader->GetVertexSize()); // Write some vertices. Input(0.0f); Input(0.0f); Input(0.0f); @@ -105,9 +105,10 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) Input(0.0f); Input(0.0f); Input(1.0f); // Convert 4 points. "7" -> primitive are points. - int count = loader.RunVertices(m_vtx_attr, 7, 4, src, dst); - src.Skip(4 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 4, src, dst); + src.Skip(4 * loader->GetVertexSize()); + dst.Skip(count * loader->GetNativeVertexDeclaration().stride); + delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f); @@ -117,10 +118,12 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) // Test that scale does nothing for floating point inputs. Input(1.0f); Input(2.0f); Input(4.0f); m_vtx_attr.g0.PosFrac = 1; - count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst); - src.Skip(1 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + loader = new VertexLoader(m_vtx_desc, m_vtx_attr); + count = loader->RunVertices(7, 1, src, dst); + src.Skip(1 * loader->GetVertexSize()); + dst.Skip(count * loader->GetNativeVertexDeclaration().stride); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f); + delete loader; } TEST_F(VertexLoaderTest, PositionDirectU16XY) @@ -129,10 +132,10 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoader* loader = new VertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(2 * sizeof (u16), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->GetNativeVertexDeclaration().stride); + ASSERT_EQ(2 * sizeof (u16), (u32)loader->GetVertexSize()); // Write some vertices. Input(0); Input(0); @@ -142,9 +145,10 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) Input(12345); Input(54321); // Convert 5 points. "7" -> primitive are points. - int count = loader.RunVertices(m_vtx_attr, 7, 5, src, dst); - src.Skip(5 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 5, src, dst); + src.Skip(5 * loader->GetVertexSize()); + dst.Skip(count * loader->GetNativeVertexDeclaration().stride); + delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f); @@ -155,10 +159,12 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) // Test that scale works on U16 inputs. Input(42); Input(24); m_vtx_attr.g0.PosFrac = 1; - count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst); - src.Skip(1 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + loader = new VertexLoader(m_vtx_desc, m_vtx_attr); + count = loader->RunVertices(7, 1, src, dst); + src.Skip(1 * loader->GetVertexSize()); + dst.Skip(count * loader->GetNativeVertexDeclaration().stride); ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f); + delete loader; } TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) @@ -175,7 +181,7 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) for (int i = 0; i < 1000; ++i) { ResetPointers(); - int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); + int count = loader.RunVertices(7, 100000, src, dst); src.Skip(100000 * loader.GetVertexSize()); dst.Skip(count * loader.GetNativeVertexDeclaration().stride); } @@ -195,7 +201,7 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) for (int i = 0; i < 1000; ++i) { ResetPointers(); - int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); + int count = loader.RunVertices(7, 100000, src, dst); src.Skip(100000 * loader.GetVertexSize()); dst.Skip(count * loader.GetNativeVertexDeclaration().stride); } @@ -258,7 +264,7 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) for (int i = 0; i < 100; ++i) { ResetPointers(); - int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); + int count = loader.RunVertices(7, 100000, src, dst); src.Skip(100000 * loader.GetVertexSize()); dst.Skip(count * loader.GetNativeVertexDeclaration().stride); } From 12817989926f8c4e7efe2da3880d1b182fa1a602 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 11 Dec 2014 22:47:56 +0100 Subject: [PATCH 2/9] VertexLoaderUid: remove operator< Not needed for unordered map. --- .../VideoBackends/Software/SWVertexLoader.h | 2 +- Source/Core/VideoCommon/VertexLoader.h | 30 +++++++------------ .../Core/VideoCommon/VertexLoaderManager.cpp | 14 --------- 3 files changed, 12 insertions(+), 34 deletions(-) diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index 4a5d38d52a..fe4e5347ba 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -28,7 +28,7 @@ class SWVertexLoader bool m_TexGenSpecialCase; - std::map> m_VertexLoaderMap; + std::unordered_map> m_VertexLoaderMap; std::vector m_LoadedVertices; VertexLoader* m_CurrentLoader; diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 801978231b..4594e43d47 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -57,25 +57,6 @@ public: hash = CalculateHash(); } - bool operator < (const VertexLoaderUID &other) const - { - // This is complex because of speed. - if (vid[0] < other.vid[0]) - return true; - else if (vid[0] > other.vid[0]) - return false; - - for (int i = 1; i < 5; ++i) - { - if (vid[i] < other.vid[i]) - return true; - else if (vid[i] > other.vid[i]) - return false; - } - - return false; - } - bool operator == (const VertexLoaderUID& rh) const { return hash == rh.hash && std::equal(vid, vid + sizeof(vid) / sizeof(vid[0]), rh.vid); @@ -101,6 +82,17 @@ private: } }; +namespace std +{ +template <> struct hash +{ + size_t operator()(const VertexLoaderUID& uid) const + { + return uid.GetHash(); + } +}; +} + // ARMTODO: This should be done in a better way #ifndef _M_GENERIC class VertexLoader : public Gen::X64CodeBlock diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index e1d0b194ef..1bfd05cace 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -23,20 +23,6 @@ static NativeVertexFormat* s_current_vtx_fmt; -namespace std -{ - -template <> -struct hash -{ - size_t operator()(const VertexLoaderUID& uid) const - { - return uid.GetHash(); - } -}; - -} - typedef std::unordered_map> VertexLoaderMap; namespace VertexLoaderManager From 6e3b2712d2fafcd8fae7ba3653207980685708e6 Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 12 Dec 2014 08:53:48 +0100 Subject: [PATCH 3/9] VideoCommon: Move NativeVertexFormat cache to VertexLoaderManager --- Source/Core/VideoCommon/VertexLoader.cpp | 19 ------------- Source/Core/VideoCommon/VertexLoader.h | 7 +---- .../Core/VideoCommon/VertexLoaderManager.cpp | 28 ++++++++++++++----- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 2dea025d96..ddf5097986 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -591,22 +591,3 @@ void VertexLoader::AppendToString(std::string *dest) const } dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); } - -NativeVertexFormat* VertexLoader::GetNativeVertexFormat() -{ - if (m_native_vertex_format) - return m_native_vertex_format; - auto& native = s_native_vertex_map[m_native_vtx_decl]; - if (!native) - { - auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat(); - native = std::unique_ptr(raw_pointer); - native->Initialize(m_native_vtx_decl); - native->m_components = m_native_components; - } - m_native_vertex_format = native.get(); - return native.get(); - -} - -std::unordered_map> VertexLoader::s_native_vertex_map; diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 4594e43d47..92adfbeb54 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -8,9 +8,7 @@ // Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt #include -#include #include -#include #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" @@ -116,8 +114,7 @@ public: void AppendToString(std::string *dest) const; int GetNumLoadedVerts() const { return m_numLoadedVertices; } - NativeVertexFormat* GetNativeVertexFormat(); - static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); } + NativeVertexFormat* m_native_vertex_format; // used by VertexLoaderManager to cache the NativeVertexFormat objects private: int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator. @@ -141,8 +138,6 @@ private: int m_numLoadedVertices; - NativeVertexFormat* m_native_vertex_format; - static std::unordered_map> s_native_vertex_map; void SetVAT(const VAT& vat); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 1bfd05cace..48f924d9d4 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -21,13 +21,16 @@ #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" -static NativeVertexFormat* s_current_vtx_fmt; -typedef std::unordered_map> VertexLoaderMap; namespace VertexLoaderManager { +typedef std::unordered_map> NativeVertexFormatMap; +static NativeVertexFormatMap s_native_vertex_map; +static NativeVertexFormat* s_current_vtx_fmt; + +typedef std::unordered_map> VertexLoaderMap; static std::mutex s_vertex_loader_map_lock; static VertexLoaderMap s_vertex_loader_map; // TODO - change into array of pointers. Keep a map of all seen so far. @@ -46,7 +49,7 @@ void Shutdown() { std::lock_guard lk(s_vertex_loader_map_lock); s_vertex_loader_map.clear(); - VertexLoader::ClearNativeVertexFormatCache(); + s_native_vertex_map.clear(); } namespace @@ -106,6 +109,19 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) { loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); s_vertex_loader_map[uid] = std::unique_ptr(loader); + + // search for a cached native vertex format + const PortableVertexDeclaration& format = loader->GetNativeVertexDeclaration(); + auto& native = s_native_vertex_map[format]; + if (!native) + { + auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat(); + native = std::unique_ptr(raw_pointer); + native->Initialize(format); + native->m_components = loader->GetNativeComponents(); + } + loader->m_native_vertex_format = native.get(); + INCSTAT(stats.numVertexLoaders); } state->vertex_loaders[vtx_attr_group] = loader; @@ -135,12 +151,10 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo return size; } - NativeVertexFormat* native = loader->GetNativeVertexFormat(); - // If the native vertex format changed, force a flush. - if (native != s_current_vtx_fmt) + if (loader->m_native_vertex_format != s_current_vtx_fmt) VertexManager::Flush(); - s_current_vtx_fmt = native; + s_current_vtx_fmt = loader->m_native_vertex_format; DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, loader->GetNativeVertexDeclaration().stride); From a71c8158d91c7ee0976b9b16eeba5ea3ac18ff99 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 13 Dec 2014 00:23:54 +0100 Subject: [PATCH 4/9] VertexLoader: remove inlined getters They just blow up the code. --- .../VideoBackends/Software/SWVertexLoader.cpp | 6 +-- Source/Core/VideoCommon/VertexLoader.h | 24 ++++------ .../Core/VideoCommon/VertexLoaderManager.cpp | 14 +++--- .../VideoCommon/VertexLoaderTest.cpp | 48 +++++++++---------- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index c00e43e2fd..fecfb94abe 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -46,7 +46,7 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) m_VertexLoaderMap[uid] = std::unique_ptr(m_CurrentLoader); } - m_VertexSize = m_CurrentLoader->GetVertexSize(); + m_VertexSize = m_CurrentLoader->m_VertexSize; m_CurrentVat = &g_main_cp_state.vtx_attr[m_attributeIndex]; @@ -168,7 +168,7 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec) void SWVertexLoader::LoadVertex() { - const PortableVertexDeclaration& vdec = m_CurrentLoader->GetNativeVertexDeclaration(); + const PortableVertexDeclaration& vdec = m_CurrentLoader->m_native_vtx_decl; // reserve memory for the destination of the vertex loader m_LoadedVertices.resize(vdec.stride + 4); @@ -180,7 +180,7 @@ void SWVertexLoader::LoadVertex() DataReader(g_video_buffer_read_ptr, nullptr), // src DataReader(m_LoadedVertices.data(), m_LoadedVertices.data() + m_LoadedVertices.size()) // dst ); - g_video_buffer_read_ptr = old + m_CurrentLoader->GetVertexSize(); + g_video_buffer_read_ptr = old + m_CurrentLoader->m_VertexSize; // parse the videocommon format to our own struct format (m_Vertex) ParseVertex(vdec); diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 92adfbeb54..0e1f60792a 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -102,32 +102,27 @@ public: VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); ~VertexLoader(); - int GetVertexSize() const {return m_VertexSize;} - u32 GetNativeComponents() const { return m_native_components; } - const PortableVertexDeclaration& GetNativeVertexDeclaration() const - { return m_native_vtx_decl; } - void SetupRunVertices(int primitive, int const count); int RunVertices(int primitive, int count, DataReader src, DataReader dst); // For debugging / profiling void AppendToString(std::string *dest) const; - int GetNumLoadedVerts() const { return m_numLoadedVertices; } - NativeVertexFormat* m_native_vertex_format; // used by VertexLoaderManager to cache the NativeVertexFormat objects + // per loader public state + int m_VertexSize; // number of bytes of a raw GC vertex + PortableVertexDeclaration m_native_vtx_decl; + u32 m_native_components; + + // used by VertexLoaderManager + NativeVertexFormat* m_native_vertex_format; + int m_numLoadedVertices; private: - int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator. - // GC vertex format TVtxAttr m_VtxAttr; // VAT decoded into easy format TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided. VAT m_vat; - // PC vertex format - u32 m_native_components; - PortableVertexDeclaration m_native_vtx_decl; - #ifndef USE_VERTEX_LOADER_JIT // Pipeline. TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower. @@ -136,9 +131,6 @@ private: const u8 *m_compiledCode; - int m_numLoadedVertices; - - void SetVAT(const VAT& vat); void CompileVertexTranslator(); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 48f924d9d4..98f92e3d10 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -75,7 +75,7 @@ void AppendListToString(std::string *dest) { entry e; map_entry.second->AppendToString(&e.text); - e.num_verts = map_entry.second->GetNumLoadedVerts(); + e.num_verts = map_entry.second->m_numLoadedVertices; entries.push_back(e); total_size += e.text.size() + 1; } @@ -111,14 +111,14 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) s_vertex_loader_map[uid] = std::unique_ptr(loader); // search for a cached native vertex format - const PortableVertexDeclaration& format = loader->GetNativeVertexDeclaration(); + const PortableVertexDeclaration& format = loader->m_native_vtx_decl; auto& native = s_native_vertex_map[format]; if (!native) { auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat(); native = std::unique_ptr(raw_pointer); native->Initialize(format); - native->m_components = loader->GetNativeComponents(); + native->m_components = loader->m_native_components; } loader->m_native_vertex_format = native.get(); @@ -141,7 +141,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo VertexLoader* loader = RefreshLoader(vtx_attr_group, state); - int size = count * loader->GetVertexSize(); + int size = count * loader->m_VertexSize; if ((int)src.size() < size) return -1; @@ -157,13 +157,13 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo s_current_vtx_fmt = loader->m_native_vertex_format; DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, - loader->GetNativeVertexDeclaration().stride); + loader->m_native_vtx_decl.stride); count = loader->RunVertices(primitive, count, src, dst); IndexGenerator::AddIndices(primitive, count); - VertexManager::FlushData(count, loader->GetNativeVertexDeclaration().stride); + VertexManager::FlushData(count, loader->m_native_vtx_decl.stride); ADDSTAT(stats.thisFrame.numPrims, count); INCSTAT(stats.thisFrame.numPrimitiveJoins); @@ -172,7 +172,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo int GetVertexSize(int vtx_attr_group, bool preprocess) { - return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->GetVertexSize(); + return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->m_VertexSize; } NativeVertexFormat* GetCurrentVertexFormat() diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 44fb6dc314..d1032ae6f9 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -1,4 +1,4 @@ -#include +#include #include "Common/Common.h" #include "VideoCommon/DataReader.h" @@ -10,7 +10,7 @@ TEST(VertexLoaderUID, UniqueEnough) { - std::set uids; + std::unordered_set uids; TVtxDesc vtx_desc; memset(&vtx_desc, 0, sizeof (vtx_desc)); @@ -95,8 +95,8 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) VertexLoader* loader = new VertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader->GetNativeVertexDeclaration().stride); - ASSERT_EQ(3 * sizeof (float), (u32)loader->GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_VertexSize); // Write some vertices. Input(0.0f); Input(0.0f); Input(0.0f); @@ -106,8 +106,8 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) // Convert 4 points. "7" -> primitive are points. int count = loader->RunVertices(7, 4, src, dst); - src.Skip(4 * loader->GetVertexSize()); - dst.Skip(count * loader->GetNativeVertexDeclaration().stride); + src.Skip(4 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); @@ -120,8 +120,8 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) m_vtx_attr.g0.PosFrac = 1; loader = new VertexLoader(m_vtx_desc, m_vtx_attr); count = loader->RunVertices(7, 1, src, dst); - src.Skip(1 * loader->GetVertexSize()); - dst.Skip(count * loader->GetNativeVertexDeclaration().stride); + src.Skip(1 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f); delete loader; } @@ -134,8 +134,8 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) VertexLoader* loader = new VertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader->GetNativeVertexDeclaration().stride); - ASSERT_EQ(2 * sizeof (u16), (u32)loader->GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); // Write some vertices. Input(0); Input(0); @@ -146,8 +146,8 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) // Convert 5 points. "7" -> primitive are points. int count = loader->RunVertices(7, 5, src, dst); - src.Skip(5 * loader->GetVertexSize()); - dst.Skip(count * loader->GetNativeVertexDeclaration().stride); + src.Skip(5 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); @@ -161,8 +161,8 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) m_vtx_attr.g0.PosFrac = 1; loader = new VertexLoader(m_vtx_desc, m_vtx_attr); count = loader->RunVertices(7, 1, src, dst); - src.Skip(1 * loader->GetVertexSize()); - dst.Skip(count * loader->GetNativeVertexDeclaration().stride); + src.Skip(1 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f); delete loader; } @@ -175,15 +175,15 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) VertexLoader loader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader.m_native_vtx_decl.stride); + ASSERT_EQ(3 * sizeof (float), (u32)loader.m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); int count = loader.RunVertices(7, 100000, src, dst); - src.Skip(100000 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + src.Skip(100000 * loader.m_VertexSize); + dst.Skip(count * loader.m_native_vtx_decl.stride); } } @@ -195,15 +195,15 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) VertexLoader loader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(2 * sizeof (u16), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader.m_native_vtx_decl.stride); + ASSERT_EQ(2 * sizeof (u16), (u32)loader.m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); int count = loader.RunVertices(7, 100000, src, dst); - src.Skip(100000 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + src.Skip(100000 * loader.m_VertexSize); + dst.Skip(count * loader.m_native_vtx_decl.stride); } } @@ -265,7 +265,7 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) { ResetPointers(); int count = loader.RunVertices(7, 100000, src, dst); - src.Skip(100000 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + src.Skip(100000 * loader.m_VertexSize); + dst.Skip(count * loader.m_native_vtx_decl.stride); } } From 809117102e1c8007345c48aba5eabfec9cff26ee Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 13 Dec 2014 01:51:14 +0100 Subject: [PATCH 5/9] VideoCommon: split VertexLoaderBase from VertexLoader --- Source/Core/VideoBackends/OGL/Render.cpp | 1 - Source/Core/VideoBackends/OGL/main.cpp | 1 - .../Software/NativeVertexFormat.h | 8 - .../VideoBackends/Software/SWVertexLoader.cpp | 12 +- .../VideoBackends/Software/SWVertexLoader.h | 6 +- Source/Core/VideoCommon/BPStructs.cpp | 1 - Source/Core/VideoCommon/CMakeLists.txt | 1 + Source/Core/VideoCommon/CPMemory.h | 4 +- Source/Core/VideoCommon/NativeVertexFormat.h | 8 - Source/Core/VideoCommon/VertexLoader.cpp | 112 +------------- Source/Core/VideoCommon/VertexLoader.h | 97 +++--------- Source/Core/VideoCommon/VertexLoaderBase.cpp | 139 ++++++++++++++++++ Source/Core/VideoCommon/VertexLoaderBase.h | 103 +++++++++++++ .../Core/VideoCommon/VertexLoaderManager.cpp | 14 +- Source/Core/VideoCommon/VideoCommon.vcxproj | 2 + .../VideoCommon/VideoCommon.vcxproj.filters | 6 + .../VideoCommon/VertexLoaderTest.cpp | 45 +++--- 17 files changed, 309 insertions(+), 251 deletions(-) create mode 100644 Source/Core/VideoCommon/VertexLoaderBase.cpp create mode 100644 Source/Core/VideoCommon/VertexLoaderBase.h diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index af344c344c..a03f8cd61d 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -43,7 +43,6 @@ #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/Statistics.h" -#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VertexShaderManager.h" diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 8642d78cc1..6969b97ba4 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -74,7 +74,6 @@ Make AA apply instantly during gameplay if possible #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" diff --git a/Source/Core/VideoBackends/Software/NativeVertexFormat.h b/Source/Core/VideoBackends/Software/NativeVertexFormat.h index ad71e2caf9..c4f140cac4 100644 --- a/Source/Core/VideoBackends/Software/NativeVertexFormat.h +++ b/Source/Core/VideoBackends/Software/NativeVertexFormat.h @@ -7,14 +7,6 @@ #include "Common/ChunkFile.h" #include "VideoBackends/Software/Vec3.h" -#ifdef WIN32 -#define LOADERDECL __cdecl -#else -#define LOADERDECL -#endif - -typedef void (LOADERDECL *TPipelineFunction)(); - struct Vec4 { float x; diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index fecfb94abe..2523b5b01a 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -13,12 +13,8 @@ #include "VideoBackends/Software/TransformUnit.h" #include "VideoBackends/Software/XFMemLoader.h" -#include "VideoCommon/VertexLoader.h" -#include "VideoCommon/VertexLoader_Color.h" -#include "VideoCommon/VertexLoader_Normal.h" -#include "VideoCommon/VertexLoader_Position.h" -#include "VideoCommon/VertexLoader_TextCoord.h" -#include "VideoCommon/VertexManagerBase.h" +#include "VideoCommon/VertexLoaderBase.h" +#include "VideoCommon/VertexLoaderUtils.h" SWVertexLoader::SWVertexLoader() : m_VertexSize(0) @@ -42,8 +38,8 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) if (!m_CurrentLoader) { - m_CurrentLoader = new VertexLoader(g_main_cp_state.vtx_desc, g_main_cp_state.vtx_attr[m_attributeIndex]); - m_VertexLoaderMap[uid] = std::unique_ptr(m_CurrentLoader); + m_CurrentLoader = VertexLoaderBase::CreateVertexLoader(g_main_cp_state.vtx_desc, g_main_cp_state.vtx_attr[m_attributeIndex]); + m_VertexLoaderMap[uid] = std::unique_ptr(m_CurrentLoader); } m_VertexSize = m_CurrentLoader->m_VertexSize; diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index fe4e5347ba..890d1fa52d 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -9,7 +9,7 @@ #include "VideoBackends/Software/CPMemLoader.h" #include "VideoBackends/Software/NativeVertexFormat.h" -#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" class PointerWrap; class SetupUnit; @@ -28,9 +28,9 @@ class SWVertexLoader bool m_TexGenSpecialCase; - std::unordered_map> m_VertexLoaderMap; + std::unordered_map> m_VertexLoaderMap; std::vector m_LoadedVertices; - VertexLoader* m_CurrentLoader; + VertexLoaderBase* m_CurrentLoader; u8 m_attributeIndex; u8 m_primitiveType; diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 6c4ad70331..44f6964cfb 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -21,7 +21,6 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 95ebaf7433..a450a8e379 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -28,6 +28,7 @@ set(SRCS BoundingBox.cpp TextureConversionShader.cpp TextureDecoder_Common.cpp VertexLoader.cpp + VertexLoaderBase.cpp VertexLoaderManager.cpp VertexLoader_Color.cpp VertexLoader_Normal.cpp diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index d61fbb02c4..e348f7f38d 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -232,7 +232,7 @@ struct VAT UVAT_group2 g2; }; -class VertexLoader; +class VertexLoaderBase; // STATE_TO_SAVE struct CPState final @@ -247,7 +247,7 @@ struct CPState final // Attributes that actually belong to VertexLoaderManager: BitSet32 attr_dirty; - VertexLoader* vertex_loaders[8]; + VertexLoaderBase* vertex_loaders[8]; }; class PointerWrap; diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 612f671f5d..024f4f070d 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -44,14 +44,6 @@ enum VB_HAS_UVTEXMTXSHIFT=13, }; -#ifdef WIN32 -#define LOADERDECL __cdecl -#else -#define LOADERDECL -#endif - -typedef void (LOADERDECL *TPipelineFunction)(); - enum VarType { VAR_UNSIGNED_BYTE, // GX_U8 = 0 diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index ddf5097986..32cb95607b 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -4,7 +4,6 @@ #include "Common/CommonTypes.h" #include "Common/MemoryUtil.h" -#include "Common/StringUtil.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" @@ -109,19 +108,13 @@ static void LOADERDECL TexMtx_Write_Float4() } VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) +: VertexLoaderBase(vtx_desc, vtx_attr) { m_compiledCode = nullptr; - m_numLoadedVertices = 0; - m_VertexSize = 0; - m_native_vertex_format = nullptr; VertexLoader_Normal::Init(); VertexLoader_Position::Init(); VertexLoader_TextCoord::Init(); - m_VtxDesc = vtx_desc; - m_vat = vtx_attr; - SetVAT(vtx_attr); - #ifdef USE_VERTEX_LOADER_JIT AllocCodeSpace(COMPILED_CODE_SIZE); CompileVertexTranslator(); @@ -130,7 +123,6 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) m_numPipelineStages = 0; CompileVertexTranslator(); #endif - } VertexLoader::~VertexLoader() @@ -489,105 +481,3 @@ int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataRead ConvertVertices(count); return count; } - -void VertexLoader::SetVAT(const VAT& vat) -{ - m_VtxAttr.PosElements = vat.g0.PosElements; - m_VtxAttr.PosFormat = vat.g0.PosFormat; - m_VtxAttr.PosFrac = vat.g0.PosFrac; - m_VtxAttr.NormalElements = vat.g0.NormalElements; - m_VtxAttr.NormalFormat = vat.g0.NormalFormat; - m_VtxAttr.color[0].Elements = vat.g0.Color0Elements; - m_VtxAttr.color[0].Comp = vat.g0.Color0Comp; - m_VtxAttr.color[1].Elements = vat.g0.Color1Elements; - m_VtxAttr.color[1].Comp = vat.g0.Color1Comp; - m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements; - m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat; - m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac; - m_VtxAttr.ByteDequant = vat.g0.ByteDequant; - m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3; - - m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements; - m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat; - m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac; - m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements; - m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat; - m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac; - m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements; - m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat; - m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac; - m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements; - m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat; - - m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac; - m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements; - m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat; - m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac; - m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements; - m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat; - m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac; - m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements; - m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat; - m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac; - - if (!m_VtxAttr.ByteDequant) - { - ERROR_LOG(VIDEO, "ByteDequant is set to zero"); - } -}; - -void VertexLoader::AppendToString(std::string *dest) const -{ - dest->reserve(250); - static const char *posMode[4] = { - "Inv", - "Dir", - "I8", - "I16", - }; - static const char *posFormats[5] = { - "u8", "s8", "u16", "s16", "flt", - }; - static const char *colorFormat[8] = { - "565", - "888", - "888x", - "4444", - "6666", - "8888", - "Inv", - "Inv", - }; - - dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ", - m_VertexSize, (u32)m_VtxDesc.PosMatIdx, - m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat])); - - if (m_VtxDesc.Normal) - { - dest->append(StringFromFormat("Nrm: %i %s-%s ", - m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat])); - } - - u64 color_mode[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1}; - for (int i = 0; i < 2; i++) - { - if (color_mode[i]) - { - dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements, posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp])); - } - } - u64 tex_mode[8] = { - m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord, - m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord - }; - for (int i = 0; i < 8; i++) - { - if (tex_mode[i]) - { - dest->append(StringFromFormat("T%i: %i %s-%s ", - i, m_VtxAttr.texCoord[i].Elements, posMode[tex_mode[i]], posFormats[m_VtxAttr.texCoord[i].Format])); - } - } - dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); -} diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 0e1f60792a..0d886735ca 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -16,6 +16,7 @@ #include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderUtils.h" #if _M_SSE >= 0x401 @@ -29,6 +30,14 @@ #define USE_VERTEX_LOADER_JIT #endif +#ifdef WIN32 +#define LOADERDECL __cdecl +#else +#define LOADERDECL +#endif + +typedef void (LOADERDECL *TPipelineFunction)(); + // They are used for the communication with the loader functions extern int tcIndex; extern int colIndex; @@ -36,105 +45,31 @@ extern int colElements[2]; GC_ALIGNED128(extern float posScale[4]); GC_ALIGNED64(extern float tcScale[8][2]); -class VertexLoaderUID -{ - u32 vid[5]; - size_t hash; -public: - VertexLoaderUID() - { - } - - VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat) - { - vid[0] = vtx_desc.Hex & 0xFFFFFFFF; - vid[1] = vtx_desc.Hex >> 32; - vid[2] = vat.g0.Hex; - vid[3] = vat.g1.Hex; - vid[4] = vat.g2.Hex; - hash = CalculateHash(); - } - - bool operator == (const VertexLoaderUID& rh) const - { - return hash == rh.hash && std::equal(vid, vid + sizeof(vid) / sizeof(vid[0]), rh.vid); - } - - size_t GetHash() const - { - return hash; - } - -private: - - size_t CalculateHash() - { - size_t h = -1; - - for (auto word : vid) - { - h = h * 137 + word; - } - - return h; - } -}; - -namespace std -{ -template <> struct hash -{ - size_t operator()(const VertexLoaderUID& uid) const - { - return uid.GetHash(); - } -}; -} - // ARMTODO: This should be done in a better way #ifndef _M_GENERIC -class VertexLoader : public Gen::X64CodeBlock +class VertexLoader : public Gen::X64CodeBlock, public VertexLoaderBase #else -class VertexLoader +class VertexLoader : public VertexLoaderBase #endif { public: VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); ~VertexLoader(); - void SetupRunVertices(int primitive, int const count); - int RunVertices(int primitive, int count, DataReader src, DataReader dst); - - // For debugging / profiling - void AppendToString(std::string *dest) const; - - // per loader public state - int m_VertexSize; // number of bytes of a raw GC vertex - PortableVertexDeclaration m_native_vtx_decl; - u32 m_native_components; - - // used by VertexLoaderManager - NativeVertexFormat* m_native_vertex_format; - int m_numLoadedVertices; + int RunVertices(int primitive, int count, DataReader src, DataReader dst) override; + std::string GetName() const override { return "OldLoader"; } + bool IsInitialized() override { return true; } // This vertex loader supports all formats private: - // GC vertex format - TVtxAttr m_VtxAttr; // VAT decoded into easy format - TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided. - VAT m_vat; - #ifndef USE_VERTEX_LOADER_JIT // Pipeline. TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower. int m_numPipelineStages; #endif - const u8 *m_compiledCode; - - void SetVAT(const VAT& vat); - void CompileVertexTranslator(); void ConvertVertices(int count); + void SetupRunVertices(int primitive, int const count); void WriteCall(TPipelineFunction); @@ -142,6 +77,8 @@ private: void WriteGetVariable(int bits, Gen::OpArg dest, void *address); void WriteSetVariable(int bits, void *address, Gen::OpArg dest); #endif + + const u8 *m_compiledCode; }; #if _M_SSE >= 0x301 diff --git a/Source/Core/VideoCommon/VertexLoaderBase.cpp b/Source/Core/VideoCommon/VertexLoaderBase.cpp new file mode 100644 index 0000000000..4372729506 --- /dev/null +++ b/Source/Core/VideoCommon/VertexLoaderBase.cpp @@ -0,0 +1,139 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/StringUtil.h" + +#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" + +VertexLoaderBase::VertexLoaderBase(const TVtxDesc &vtx_desc, const VAT &vtx_attr) +{ + m_numLoadedVertices = 0; + m_VertexSize = 0; + m_native_vertex_format = nullptr; + + SetVAT(vtx_attr); + m_VtxDesc = vtx_desc; + m_vat = vtx_attr; +} + +void VertexLoaderBase::SetVAT(const VAT& vat) +{ + m_VtxAttr.PosElements = vat.g0.PosElements; + m_VtxAttr.PosFormat = vat.g0.PosFormat; + m_VtxAttr.PosFrac = vat.g0.PosFrac; + m_VtxAttr.NormalElements = vat.g0.NormalElements; + m_VtxAttr.NormalFormat = vat.g0.NormalFormat; + m_VtxAttr.color[0].Elements = vat.g0.Color0Elements; + m_VtxAttr.color[0].Comp = vat.g0.Color0Comp; + m_VtxAttr.color[1].Elements = vat.g0.Color1Elements; + m_VtxAttr.color[1].Comp = vat.g0.Color1Comp; + m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements; + m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat; + m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac; + m_VtxAttr.ByteDequant = vat.g0.ByteDequant; + m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3; + + m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements; + m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat; + m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac; + m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements; + m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat; + m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac; + m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements; + m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat; + m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac; + m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements; + m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat; + + m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac; + m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements; + m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat; + m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac; + m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements; + m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat; + m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac; + m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements; + m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat; + m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac; + + if (!m_VtxAttr.ByteDequant) + { + ERROR_LOG(VIDEO, "ByteDequant is set to zero"); + } +}; + +void VertexLoaderBase::AppendToString(std::string *dest) const +{ + dest->reserve(250); + + dest->append(GetName()); + dest->append(": "); + + static const char *posMode[4] = { + "Inv", + "Dir", + "I8", + "I16", + }; + static const char *posFormats[5] = { + "u8", "s8", "u16", "s16", "flt", + }; + static const char *colorFormat[8] = { + "565", + "888", + "888x", + "4444", + "6666", + "8888", + "Inv", + "Inv", + }; + + dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ", + m_VertexSize, (u32)m_VtxDesc.PosMatIdx, + m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat])); + + if (m_VtxDesc.Normal) + { + dest->append(StringFromFormat("Nrm: %i %s-%s ", + m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat])); + } + + u64 color_mode[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1}; + for (int i = 0; i < 2; i++) + { + if (color_mode[i]) + { + dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements, posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp])); + } + } + u64 tex_mode[8] = { + m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord, + m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord + }; + for (int i = 0; i < 8; i++) + { + if (tex_mode[i]) + { + dest->append(StringFromFormat("T%i: %i %s-%s ", + i, m_VtxAttr.texCoord[i].Elements, posMode[tex_mode[i]], posFormats[m_VtxAttr.texCoord[i].Format])); + } + } + dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); +} + +VertexLoaderBase* VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr) +{ + VertexLoaderBase* loader; + + // last try: The old VertexLoader + loader = new VertexLoader(vtx_desc, vtx_attr); + if (loader->IsInitialized()) + return loader; + delete loader; + + PanicAlert("No Vertex Loader found."); + return nullptr; +} diff --git a/Source/Core/VideoCommon/VertexLoaderBase.h b/Source/Core/VideoCommon/VertexLoaderBase.h new file mode 100644 index 0000000000..7da2d866f9 --- /dev/null +++ b/Source/Core/VideoCommon/VertexLoaderBase.h @@ -0,0 +1,103 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" + +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/DataReader.h" +#include "VideoCommon/NativeVertexFormat.h" + +class VertexLoaderUID +{ + std::array vid; + size_t hash; +public: + VertexLoaderUID() + { + } + + VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat) + { + vid[0] = vtx_desc.Hex & 0xFFFFFFFF; + vid[1] = vtx_desc.Hex >> 32; + vid[2] = vat.g0.Hex; + vid[3] = vat.g1.Hex; + vid[4] = vat.g2.Hex; + hash = CalculateHash(); + } + + bool operator == (const VertexLoaderUID& rh) const + { + return vid == rh.vid; + } + + size_t GetHash() const + { + return hash; + } + +private: + + size_t CalculateHash() const + { + size_t h = -1; + + for (auto word : vid) + { + h = h * 137 + word; + } + + return h; + } +}; + +namespace std +{ +template <> struct hash +{ + size_t operator()(const VertexLoaderUID& uid) const + { + return uid.GetHash(); + } +}; +} + +class VertexLoaderBase +{ +public: + static VertexLoaderBase* CreateVertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); + virtual ~VertexLoaderBase() {}; + + virtual int RunVertices(int primitive, int count, DataReader src, DataReader dst) = 0; + + virtual bool IsInitialized() = 0; + + // For debugging / profiling + void AppendToString(std::string *dest) const; + + virtual std::string GetName() const = 0; + + // per loader public state + int m_VertexSize; // number of bytes of a raw GC vertex + PortableVertexDeclaration m_native_vtx_decl; + u32 m_native_components; + + // used by VertexLoaderManager + NativeVertexFormat* m_native_vertex_format; + int m_numLoadedVertices; + +protected: + VertexLoaderBase(const TVtxDesc &vtx_desc, const VAT &vtx_attr); + void SetVAT(const VAT& vat); + + // GC vertex format + TVtxAttr m_VtxAttr; // VAT decoded into easy format + TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided. + VAT m_vat; +}; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 98f92e3d10..0ab87b323d 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -15,7 +15,7 @@ #include "VideoCommon/BPMemory.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" -#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" @@ -30,7 +30,7 @@ typedef std::unordered_map> VertexLoaderMap; +typedef std::unordered_map> VertexLoaderMap; static std::mutex s_vertex_loader_map_lock; static VertexLoaderMap s_vertex_loader_map; // TODO - change into array of pointers. Keep a map of all seen so far. @@ -93,9 +93,9 @@ void MarkAllDirty() g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8); } -static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) +static VertexLoaderBase* RefreshLoader(int vtx_attr_group, CPState* state) { - VertexLoader* loader; + VertexLoaderBase* loader; if (state->attr_dirty[vtx_attr_group]) { VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]); @@ -107,8 +107,8 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) } else { - loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); - s_vertex_loader_map[uid] = std::unique_ptr(loader); + loader = VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); + s_vertex_loader_map[uid] = std::unique_ptr(loader); // search for a cached native vertex format const PortableVertexDeclaration& format = loader->m_native_vtx_decl; @@ -139,7 +139,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo CPState* state = &g_main_cp_state; - VertexLoader* loader = RefreshLoader(vtx_attr_group, state); + VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, state); int size = count * loader->m_VertexSize; if ((int)src.size() < size) diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 65488aa4fa..b4db374e28 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -65,6 +65,7 @@ + @@ -118,6 +119,7 @@ + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index cd4901f303..d00ee5518c 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -119,6 +119,9 @@ Vertex Loading + + Vertex Loading + Vertex Loading @@ -263,6 +266,9 @@ Vertex Loading + + Vertex Loading + Vertex Loading diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index d1032ae6f9..601e90dcc2 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -2,7 +2,7 @@ #include "Common/Common.h" #include "VideoCommon/DataReader.h" -#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" // Needs to be included later because it defines a TEST macro that conflicts // with a TEST method definition in x64Emitter.h. @@ -93,7 +93,7 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float - VertexLoader* loader = new VertexLoader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_VertexSize); @@ -118,7 +118,7 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) // Test that scale does nothing for floating point inputs. Input(1.0f); Input(2.0f); Input(4.0f); m_vtx_attr.g0.PosFrac = 1; - loader = new VertexLoader(m_vtx_desc, m_vtx_attr); + loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); count = loader->RunVertices(7, 1, src, dst); src.Skip(1 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); @@ -132,7 +132,7 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 - VertexLoader* loader = new VertexLoader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); @@ -159,7 +159,7 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) // Test that scale works on U16 inputs. Input(42); Input(24); m_vtx_attr.g0.PosFrac = 1; - loader = new VertexLoader(m_vtx_desc, m_vtx_attr); + loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); count = loader->RunVertices(7, 1, src, dst); src.Skip(1 * loader->m_VertexSize); dst.Skip(count * loader->m_native_vtx_decl.stride); @@ -173,18 +173,19 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.m_native_vtx_decl.stride); - ASSERT_EQ(3 * sizeof (float), (u32)loader.m_VertexSize); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); - int count = loader.RunVertices(7, 100000, src, dst); - src.Skip(100000 * loader.m_VertexSize); - dst.Skip(count * loader.m_native_vtx_decl.stride); + int count = loader->RunVertices(7, 100000, src, dst); + src.Skip(100000 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); } + delete loader; } TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) @@ -193,18 +194,19 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.m_native_vtx_decl.stride); - ASSERT_EQ(2 * sizeof (u16), (u32)loader.m_VertexSize); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); - int count = loader.RunVertices(7, 100000, src, dst); - src.Skip(100000 * loader.m_VertexSize); - dst.Skip(count * loader.m_native_vtx_decl.stride); + int count = loader->RunVertices(7, 100000, src, dst); + src.Skip(100000 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); } + delete loader; } TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) @@ -257,15 +259,16 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) m_vtx_attr.g2.Tex7CoordElements = 1; // ST m_vtx_attr.g2.Tex7CoordFormat = 4; // Float - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); // This test is only done 100x in a row since it's ~20x slower using the // current vertex loader implementation. for (int i = 0; i < 100; ++i) { ResetPointers(); - int count = loader.RunVertices(7, 100000, src, dst); - src.Skip(100000 * loader.m_VertexSize); - dst.Skip(count * loader.m_native_vtx_decl.stride); + int count = loader->RunVertices(7, 100000, src, dst); + src.Skip(100000 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); } + delete loader; } From 7edf6ec4e4506be5bf575eee5f33287be19ae372 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 18 Dec 2014 23:27:10 +0100 Subject: [PATCH 6/9] VertexLoader: Add a test loader which compares two vertex loaders --- Source/Core/VideoCommon/VertexLoaderBase.cpp | 60 ++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/Source/Core/VideoCommon/VertexLoaderBase.cpp b/Source/Core/VideoCommon/VertexLoaderBase.cpp index 4372729506..ed56db0655 100644 --- a/Source/Core/VideoCommon/VertexLoaderBase.cpp +++ b/Source/Core/VideoCommon/VertexLoaderBase.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include + #include "Common/StringUtil.h" #include "VideoCommon/VertexLoader.h" @@ -124,10 +126,68 @@ void VertexLoaderBase::AppendToString(std::string *dest) const dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); } +// a hacky implementation to compare two vertex loaders +class VertexLoaderTester : public VertexLoaderBase +{ +public: + VertexLoaderTester(VertexLoaderBase* _a, VertexLoaderBase* _b, const TVtxDesc& vtx_desc, const VAT& vtx_attr) + : VertexLoaderBase(vtx_desc, vtx_attr) + { + a = _a; + b = _b; + m_initialized = a && b && a->IsInitialized() && b->IsInitialized(); + m_initialized = m_initialized && (a->m_VertexSize == b->m_VertexSize); + m_initialized = m_initialized && (a->m_native_vtx_decl.stride == b->m_native_vtx_decl.stride); + } + ~VertexLoaderTester() + { + delete a; + delete b; + } + + int RunVertices(int primitive, int count, DataReader src, DataReader dst) override + { + buffer_a.resize(count * a->m_native_vtx_decl.stride); + buffer_b.resize(count * b->m_native_vtx_decl.stride); + + int count_a = a->RunVertices(primitive, count, src, DataReader(buffer_a.data(), buffer_a.data()+buffer_a.size())); + int count_b = b->RunVertices(primitive, count, src, DataReader(buffer_b.data(), buffer_b.data()+buffer_b.size())); + + if (count_a != count_b) + ERROR_LOG(VIDEO, "Both vertexloaders have loaded a different amount of vertices."); + + if (memcmp(buffer_a.data(), buffer_b.data(), std::min(count_a, count_b))) + ERROR_LOG(VIDEO, "Both vertexloaders have loaded different data."); + + u8* dstptr; + dst.WritePointer(&dstptr); + memcpy(dstptr, buffer_a.data(), count_a); + return count_a; + } + std::string GetName() const override { return "CompareLoader"; } + bool IsInitialized() override { return m_initialized; } + +private: + VertexLoaderBase *a, *b; + bool m_initialized; + std::vector buffer_a, buffer_b; +}; + VertexLoaderBase* VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr) { VertexLoaderBase* loader; +#if 0 + // first try: Any new VertexLoader vs the old one + loader = new VertexLoaderTester( + new VertexLoader(vtx_desc, vtx_attr), // the software one + new VertexLoader(vtx_desc, vtx_attr), // the new one to compare + vtx_desc, vtx_attr); + if (loader->IsInitialized()) + return loader; + delete loader; +#endif + // last try: The old VertexLoader loader = new VertexLoader(vtx_desc, vtx_attr); if (loader->IsInitialized()) From 7c486a8c243c3bb223b1d31aab34bf039c6a02a1 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 13 Dec 2014 10:57:46 +0100 Subject: [PATCH 7/9] VertexLoader: Add a VertexLoader pointer to each function call --- Source/Core/VideoCommon/BoundingBox.cpp | 4 +- Source/Core/VideoCommon/BoundingBox.h | 4 +- Source/Core/VideoCommon/VertexLoader.cpp | 110 ++++++++---------- Source/Core/VideoCommon/VertexLoader.h | 30 +++-- .../Core/VideoCommon/VertexLoader_Color.cpp | 102 ++++++++-------- Source/Core/VideoCommon/VertexLoader_Color.h | 38 +++--- .../Core/VideoCommon/VertexLoader_Normal.cpp | 12 +- .../VideoCommon/VertexLoader_Position.cpp | 16 +-- .../VideoCommon/VertexLoader_TextCoord.cpp | 34 +++--- 9 files changed, 178 insertions(+), 172 deletions(-) diff --git a/Source/Core/VideoCommon/BoundingBox.cpp b/Source/Core/VideoCommon/BoundingBox.cpp index 4b4400f2e7..9ea0fa5689 100644 --- a/Source/Core/VideoCommon/BoundingBox.cpp +++ b/Source/Core/VideoCommon/BoundingBox.cpp @@ -30,7 +30,7 @@ static TVtxDesc vertexDesc; static PortableVertexDeclaration vertexDecl; // Gets the pointer to the current buffer position -void LOADERDECL SetVertexBufferPosition() +void LOADERDECL SetVertexBufferPosition(VertexLoader* loader) { bufferPos = g_vertex_manager_write_ptr; } @@ -76,7 +76,7 @@ void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const Por } // Updates the bounding box -void LOADERDECL Update() +void LOADERDECL Update(VertexLoader* loader) { if (!active) return; diff --git a/Source/Core/VideoCommon/BoundingBox.h b/Source/Core/VideoCommon/BoundingBox.h index d6952b184e..afff0e3e72 100644 --- a/Source/Core/VideoCommon/BoundingBox.h +++ b/Source/Core/VideoCommon/BoundingBox.h @@ -31,8 +31,8 @@ extern u8 posMtxIdx; // Texture matrix indexes extern u8 texMtxIdx[8]; -void LOADERDECL SetVertexBufferPosition(); -void LOADERDECL Update(); +void LOADERDECL SetVertexBufferPosition(VertexLoader* loader); +void LOADERDECL Update(VertexLoader* loader); void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl); // Save state diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 32cb95607b..d7c8fe3cfa 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -29,79 +29,64 @@ #define inline #endif -// Matrix components are first in GC format but later in PC format - we need to store it temporarily -// when decoding each vertex. -static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx; -static u8 s_curtexmtx[8]; -static int s_texmtxwrite = 0; -static int s_texmtxread = 0; - -// Vertex loaders read these. Although the scale ones should be baked into the shader. -int tcIndex; -int colIndex; -int colElements[2]; -// Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT -GC_ALIGNED128(float posScale[4]); -GC_ALIGNED64(float tcScale[8][2]); - // This pointer is used as the source/dst for all fixed function loader calls u8* g_video_buffer_read_ptr; u8* g_vertex_manager_write_ptr; -static const float fractionTable[32] = { - 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), - 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), - 1.0f / (1U << 8), 1.0f / (1U << 9), 1.0f / (1U << 10), 1.0f / (1U << 11), - 1.0f / (1U << 12), 1.0f / (1U << 13), 1.0f / (1U << 14), 1.0f / (1U << 15), - 1.0f / (1U << 16), 1.0f / (1U << 17), 1.0f / (1U << 18), 1.0f / (1U << 19), - 1.0f / (1U << 20), 1.0f / (1U << 21), 1.0f / (1U << 22), 1.0f / (1U << 23), - 1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27), - 1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31), -}; - using namespace Gen; -static void LOADERDECL PosMtx_ReadDirect_UByte() + +void* VertexLoader::operator new (size_t size) { - BoundingBox::posMtxIdx = s_curposmtx = DataReadU8() & 0x3f; - PRIM_LOG("posmtx: %d, ", s_curposmtx); + return AllocateAlignedMemory(size, 16); } -static void LOADERDECL PosMtx_Write() +void VertexLoader::operator delete (void *p) +{ + FreeAlignedMemory(p); +} + +static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader) +{ + BoundingBox::posMtxIdx = loader->m_curposmtx = DataReadU8() & 0x3f; + PRIM_LOG("posmtx: %d, ", loader->m_curposmtx); +} + +static void LOADERDECL PosMtx_Write(VertexLoader* loader) { // u8, 0, 0, 0 - DataWrite(s_curposmtx); + DataWrite(loader->m_curposmtx); } -static void LOADERDECL TexMtx_ReadDirect_UByte() +static void LOADERDECL TexMtx_ReadDirect_UByte(VertexLoader* loader) { - BoundingBox::texMtxIdx[s_texmtxread] = s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; + BoundingBox::texMtxIdx[loader->m_texmtxread] = loader->m_curtexmtx[loader->m_texmtxread] = DataReadU8() & 0x3f; - PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]); - s_texmtxread++; + PRIM_LOG("texmtx%d: %d, ", loader->m_texmtxread, loader->m_curtexmtx[loader->m_texmtxread]); + loader->m_texmtxread++; } -static void LOADERDECL TexMtx_Write_Float() +static void LOADERDECL TexMtx_Write_Float(VertexLoader* loader) { - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); } -static void LOADERDECL TexMtx_Write_Float2() +static void LOADERDECL TexMtx_Write_Float2(VertexLoader* loader) { DataWrite(0.f); - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); } -static void LOADERDECL TexMtx_Write_Float4() +static void LOADERDECL TexMtx_Write_Float4(VertexLoader* loader) { #if _M_SSE >= 0x200 - __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]); + __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), loader->m_curtexmtx[loader->m_texmtxwrite++]); _mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); g_vertex_manager_write_ptr += sizeof(float) * 4; #else DataWrite(0.f); DataWrite(0.f); - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); // Just to fill out with 0. DataWrite(0.f); #endif @@ -123,6 +108,14 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) m_numPipelineStages = 0; CompileVertexTranslator(); #endif + + // generate frac factors + m_posScale[0] = m_posScale[1] = m_posScale[2] = m_posScale[3] = 1.0f / (1U << m_VtxAttr.PosFrac); + for (int i = 0; i < 8; i++) + m_tcScale[i][0] = m_tcScale[i][1] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac); + + for (int i = 0; i < 2; i++) + m_colElements[i] = m_VtxAttr.color[i].Elements; } VertexLoader::~VertexLoader() @@ -143,11 +136,14 @@ void VertexLoader::CompileVertexTranslator() m_compiledCode = GetCodePtr(); // We only use RAX (caller saved) and RBX (callee saved). - ABI_PushRegistersAndAdjustStack({RBX}, 8); + ABI_PushRegistersAndAdjustStack({RBX, RBP}, 8); // save count MOV(64, R(RBX), R(ABI_PARAM1)); + // save loader + MOV(64, R(RBP), R(ABI_PARAM2)); + // Start loop here const u8 *loop_start = GetCodePtr(); @@ -155,17 +151,17 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord || m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) { - WriteSetVariable(32, &tcIndex, Imm32(0)); + WriteSetVariable(32, &m_tcIndex, Imm32(0)); } if (m_VtxDesc.Color0 || m_VtxDesc.Color1) { - WriteSetVariable(32, &colIndex, Imm32(0)); + WriteSetVariable(32, &m_colIndex, Imm32(0)); } if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx || m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) { - WriteSetVariable(32, &s_texmtxwrite, Imm32(0)); - WriteSetVariable(32, &s_texmtxread, Imm32(0)); + WriteSetVariable(32, &m_texmtxwrite, Imm32(0)); + WriteSetVariable(32, &m_texmtxread, Imm32(0)); } #else // Reset pipeline @@ -405,7 +401,7 @@ void VertexLoader::CompileVertexTranslator() SUB(64, R(RBX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopRegistersAndAdjustStack({RBX}, 8); + ABI_PopRegistersAndAdjustStack({RBX, RBP}, 8); RET(); #endif } @@ -413,6 +409,7 @@ void VertexLoader::CompileVertexTranslator() void VertexLoader::WriteCall(TPipelineFunction func) { #ifdef USE_VERTEX_LOADER_JIT + MOV(64, R(ABI_PARAM1), R(RBP)); ABI_CallFunction((const void*)func); #else m_PipelineStages[m_numPipelineStages++] = func; @@ -441,13 +438,6 @@ void VertexLoader::SetupRunVertices(int primitive, int const count) { m_numLoadedVertices += count; - posScale[0] = posScale[1] = posScale[2] = posScale[3] = fractionTable[m_VtxAttr.PosFrac]; - if (m_native_components & VB_HAS_UVALL) - for (int i = 0; i < 8; i++) - tcScale[i][0] = tcScale[i][1] = fractionTable[m_VtxAttr.texCoord[i].Frac]; - for (int i = 0; i < 2; i++) - colElements[i] = m_VtxAttr.color[i].Elements; - // Prepare bounding box if (!g_ActiveConfig.backend_info.bSupportsBBox) BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl); @@ -458,16 +448,16 @@ void VertexLoader::ConvertVertices ( int count ) #ifdef USE_VERTEX_LOADER_JIT if (count > 0) { - ((void (*)(int))(void*)m_compiledCode)(count); + ((void (*)(int, VertexLoader* loader))(void*)m_compiledCode)(count, this); } #else for (int s = 0; s < count; s++) { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; + m_tcIndex = 0; + m_colIndex = 0; + m_texmtxwrite = m_texmtxread = 0; for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); + m_PipelineStages[i](this); PRIM_LOG("\n"); } #endif diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 0d886735ca..749f738b25 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -36,14 +36,8 @@ #define LOADERDECL #endif -typedef void (LOADERDECL *TPipelineFunction)(); - -// They are used for the communication with the loader functions -extern int tcIndex; -extern int colIndex; -extern int colElements[2]; -GC_ALIGNED128(extern float posScale[4]); -GC_ALIGNED64(extern float tcScale[8][2]); +class VertexLoader; +typedef void (LOADERDECL *TPipelineFunction)(VertexLoader* loader); // ARMTODO: This should be done in a better way #ifndef _M_GENERIC @@ -53,6 +47,11 @@ class VertexLoader : public VertexLoaderBase #endif { public: + // This class need a 16 byte alignment. As this is broken on + // MSVC right now (Dec 2014), we use custom allocation. + void* operator new (size_t size); + void operator delete (void *p); + VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); ~VertexLoader(); @@ -60,6 +59,21 @@ public: std::string GetName() const override { return "OldLoader"; } bool IsInitialized() override { return true; } // This vertex loader supports all formats + // They are used for the communication with the loader functions + // Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT + GC_ALIGNED128(float m_posScale[4]); + GC_ALIGNED64(float m_tcScale[8][2]); + int m_tcIndex; + int m_colIndex; + int m_colElements[2]; + + // Matrix components are first in GC format but later in PC format - we need to store it temporarily + // when decoding each vertex. + u8 m_curposmtx; + u8 m_curtexmtx[8]; + int m_texmtxwrite; + int m_texmtxread; + private: #ifndef USE_VERTEX_LOADER_JIT // Pipeline. diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 0120e3bea5..58b1b96f31 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -16,46 +16,46 @@ #define ASHIFT 24 #define AMASK 0xFF000000 -__forceinline void _SetCol(u32 val) +__forceinline void _SetCol(VertexLoader* loader, u32 val) { DataWrite(val); - colIndex++; + loader->m_colIndex++; } //color comes in format BARG in 16 bits //BARG -> AABBGGRR -__forceinline void _SetCol4444(u16 val) +__forceinline void _SetCol4444(VertexLoader* loader, u16 val) { u32 col = (val & 0xF0); // col = 000000R0; col |= (val & 0xF ) << 12; // col |= 0000G000; col |= (((u32)val) & 0xF000) << 8; // col |= 00B00000; col |= (((u32)val) & 0x0F00) << 20; // col |= A0000000; col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R; - _SetCol(col); + _SetCol(loader, col); } //color comes in format RGBA //RRRRRRGG GGGGBBBB BBAAAAAA -__forceinline void _SetCol6666(u32 val) +__forceinline void _SetCol6666(VertexLoader* loader, u32 val) { u32 col = (val >> 16) & 0xFC; col |= (val >> 2) & 0xFC00; col |= (val << 12) & 0xFC0000; col |= (val << 26) & 0xFC000000; col |= (col >> 6) & 0x03030303; - _SetCol(col); + _SetCol(loader, col); } //color comes in RGB //RRRRRGGG GGGBBBBB -__forceinline void _SetCol565(u16 val) +__forceinline void _SetCol565(VertexLoader* loader, u16 val) { u32 col = (val >> 8) & 0xF8; col |= (val << 5) & 0xFC00; col |=(((u32)val) << 19) & 0xF80000; col |= (col >> 5) & 0x070007; col |= (col >> 6) & 0x000300; - _SetCol(col | AMASK); + _SetCol(loader, col | AMASK); } __forceinline u32 _Read24(const u8 *addr) @@ -69,29 +69,29 @@ __forceinline u32 _Read32(const u8 *addr) } -void LOADERDECL Color_ReadDirect_24b_888() +void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader) { - _SetCol(_Read24(DataGetPosition())); + _SetCol(loader, _Read24(DataGetPosition())); DataSkip(3); } -void LOADERDECL Color_ReadDirect_32b_888x() +void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader) { - _SetCol(_Read24(DataGetPosition())); + _SetCol(loader, _Read24(DataGetPosition())); DataSkip(4); } -void LOADERDECL Color_ReadDirect_16b_565() +void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader) { - _SetCol565(DataReadU16()); + _SetCol565(loader, DataReadU16()); } -void LOADERDECL Color_ReadDirect_16b_4444() +void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader) { - _SetCol4444(*(u16*)DataGetPosition()); + _SetCol4444(loader, *(u16*)DataGetPosition()); DataSkip(2); } -void LOADERDECL Color_ReadDirect_24b_6666() +void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader) { - _SetCol6666(Common::swap32(DataGetPosition() - 1)); + _SetCol6666(loader, Common::swap32(DataGetPosition() - 1)); DataSkip(3); } // F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only @@ -101,77 +101,77 @@ void LOADERDECL Color_ReadDirect_24b_6666() // else // col |= 0xFF<m_colElements[loader->m_colIndex]) col |= 0xFF << ASHIFT; - _SetCol(col); + _SetCol(loader, col); } template -void Color_ReadIndex_16b_565() +void Color_ReadIndex_16b_565(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]))); - _SetCol565(val); + u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]))); + _SetCol565(loader, val); } template -void Color_ReadIndex_24b_888() +void Color_ReadIndex_24b_888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read24(iAddress)); } template -void Color_ReadIndex_32b_888x() +void Color_ReadIndex_32b_888x(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read24(iAddress)); } template -void Color_ReadIndex_16b_4444() +void Color_ReadIndex_16b_4444(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])); - _SetCol4444(val); + u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex])); + _SetCol4444(loader, val); } template -void Color_ReadIndex_24b_6666() +void Color_ReadIndex_24b_6666(VertexLoader* loader) { auto const Index = DataRead(); - const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1; + const u8* pData = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]) - 1; u32 val = Common::swap32(pData); - _SetCol6666(val); + _SetCol6666(loader, val); } template -void Color_ReadIndex_32b_8888() +void Color_ReadIndex_32b_8888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read32(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read32(iAddress)); } -void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565(); } -void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888(); } -void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x(); } -void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444(); } -void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666(); } -void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888(); } +void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565(loader); } +void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888(loader); } +void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x(loader); } +void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444(loader); } +void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666(loader); } +void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888(loader); } -void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565(); } -void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888(); } -void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x(); } -void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444(); } -void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666(); } -void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888(); } +void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565(loader); } +void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888(loader); } +void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x(loader); } +void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444(loader); } +void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666(loader); } +void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888(loader); } diff --git a/Source/Core/VideoCommon/VertexLoader_Color.h b/Source/Core/VideoCommon/VertexLoader_Color.h index 90b267064e..b3bee00c13 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.h +++ b/Source/Core/VideoCommon/VertexLoader_Color.h @@ -6,23 +6,25 @@ #include "VideoCommon/NativeVertexFormat.h" -void LOADERDECL Color_ReadDirect_24b_888(); -void LOADERDECL Color_ReadDirect_32b_888x(); -void LOADERDECL Color_ReadDirect_16b_565(); -void LOADERDECL Color_ReadDirect_16b_4444(); -void LOADERDECL Color_ReadDirect_24b_6666(); -void LOADERDECL Color_ReadDirect_32b_8888(); +class VertexLoader; -void LOADERDECL Color_ReadIndex8_16b_565(); -void LOADERDECL Color_ReadIndex8_24b_888(); -void LOADERDECL Color_ReadIndex8_32b_888x(); -void LOADERDECL Color_ReadIndex8_16b_4444(); -void LOADERDECL Color_ReadIndex8_24b_6666(); -void LOADERDECL Color_ReadIndex8_32b_8888(); +void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_32b_8888(VertexLoader* loader); -void LOADERDECL Color_ReadIndex16_16b_565(); -void LOADERDECL Color_ReadIndex16_24b_888(); -void LOADERDECL Color_ReadIndex16_32b_888x(); -void LOADERDECL Color_ReadIndex16_16b_4444(); -void LOADERDECL Color_ReadIndex16_24b_6666(); -void LOADERDECL Color_ReadIndex16_32b_8888(); +void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader); + +void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader); diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 1e428b66a2..70081ead11 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -57,7 +57,7 @@ __forceinline void ReadIndirect(const T* data) template struct Normal_Direct { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { auto const source = reinterpret_cast(DataGetPosition()); ReadIndirect(source); @@ -81,7 +81,7 @@ __forceinline void Normal_Index_Offset() template struct Normal_Index { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset(); } @@ -92,7 +92,7 @@ struct Normal_Index template struct Normal_Index_Indices3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset(); Normal_Index_Offset(); @@ -106,7 +106,7 @@ struct Normal_Index_Indices3 template struct Normal_Direct_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed::value - 1)); @@ -136,7 +136,7 @@ __forceinline void Normal_Index_Offset_SSSE3() template struct Normal_Index_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset_SSSE3(); } @@ -147,7 +147,7 @@ struct Normal_Index_SSSE3 template struct Normal_Index_Indices3_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset_SSSE3(); Normal_Index_Offset_SSSE3(); diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 60a6115ee0..c0fac7ef93 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -71,10 +71,10 @@ float PosScale(float val, float scale) } template -void LOADERDECL Pos_ReadDirect() +void LOADERDECL Pos_ReadDirect(VertexLoader* loader) { static_assert(N <= 3, "N > 3 is not sane!"); - auto const scale = posScale[0]; + auto const scale = loader->m_posScale[0];; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -87,14 +87,14 @@ void LOADERDECL Pos_ReadDirect() } template -void LOADERDECL Pos_ReadIndex() +void LOADERDECL Pos_ReadIndex(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); static_assert(N <= 3, "N > 3 is not sane!"); auto const index = DataRead(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - auto const scale = posScale[0]; + auto const scale = loader->m_posScale[0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i < 3; ++i) @@ -106,21 +106,21 @@ void LOADERDECL Pos_ReadIndex() #if _M_SSE >= 0x301 template -void LOADERDECL Pos_ReadDirect_SSSE3() +void LOADERDECL Pos_ReadDirect_SSSE3(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); - Vertex_Read_SSSE3(pData, *(__m128*)posScale); + Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); DataSkip<(2 + three) * sizeof(T)>(); LOG_VTX(); } template -void LOADERDECL Pos_ReadIndex_SSSE3() +void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - Vertex_Read_SSSE3(pData, *(__m128*)posScale); + Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); LOG_VTX(); } #endif diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index 002ec13e55..4a858c84f9 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -29,9 +29,9 @@ __forceinline void LOG_TEX<2>() // PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); } -static void LOADERDECL TexCoord_Read_Dummy() +static void LOADERDECL TexCoord_Read_Dummy(VertexLoader* loader) { - tcIndex++; + loader->m_tcIndex++; } template @@ -47,9 +47,9 @@ float TCScale(float val, float scale) } template -void LOADERDECL TexCoord_ReadDirect() +void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader) { - auto const scale = tcScale[tcIndex][0]; + auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -60,18 +60,18 @@ void LOADERDECL TexCoord_ReadDirect() src.WritePointer(&g_video_buffer_read_ptr); LOG_TEX(); - ++tcIndex; + ++loader->m_tcIndex; } template -void LOADERDECL TexCoord_ReadIndex() +void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] - + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); - auto const scale = tcScale[tcIndex][0]; + auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i != N; ++i) @@ -79,32 +79,32 @@ void LOADERDECL TexCoord_ReadIndex() dst.WritePointer(&g_vertex_manager_write_ptr); LOG_TEX(); - ++tcIndex; + ++loader->m_tcIndex; } #if _M_SSE >= 0x301 template -void LOADERDECL TexCoord_ReadDirect2_SSSE3() +void LOADERDECL TexCoord_ReadDirect2_SSSE3(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); + __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); Vertex_Read_SSSE3(pData, scale); DataSkip<2 * sizeof(T)>(); LOG_TEX<2>(); - tcIndex++; + loader->m_tcIndex++; } template -void LOADERDECL TexCoord_ReadIndex2_SSSE3() +void LOADERDECL TexCoord_ReadIndex2_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); + const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); Vertex_Read_SSSE3(pData, scale); LOG_TEX<2>(); - tcIndex++; + loader->m_tcIndex++; } #endif From 325e8e370eddb236e29fd851a848f16ce7cdfd3d Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 19 Dec 2014 00:32:46 +0100 Subject: [PATCH 8/9] VertexLoader: Merge dummy functions --- Source/Core/VideoCommon/VertexLoader.cpp | 15 ++++----------- Source/Core/VideoCommon/VertexLoader.h | 2 -- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index d7c8fe3cfa..61303ab6f7 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -434,17 +434,17 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) } #endif -void VertexLoader::SetupRunVertices(int primitive, int const count) +int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataReader dst) { + dst.WritePointer(&g_vertex_manager_write_ptr); + src.WritePointer(&g_video_buffer_read_ptr); + m_numLoadedVertices += count; // Prepare bounding box if (!g_ActiveConfig.backend_info.bSupportsBBox) BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl); -} -void VertexLoader::ConvertVertices ( int count ) -{ #ifdef USE_VERTEX_LOADER_JIT if (count > 0) { @@ -461,13 +461,6 @@ void VertexLoader::ConvertVertices ( int count ) PRIM_LOG("\n"); } #endif -} -int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataReader dst) -{ - dst.WritePointer(&g_vertex_manager_write_ptr); - src.WritePointer(&g_video_buffer_read_ptr); - SetupRunVertices(primitive, count); - ConvertVertices(count); return count; } diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 749f738b25..2b61e683bc 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -82,8 +82,6 @@ private: #endif void CompileVertexTranslator(); - void ConvertVertices(int count); - void SetupRunVertices(int primitive, int const count); void WriteCall(TPipelineFunction); From 1efd00227d3c38458d44193c4aa554f70a3b9c55 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 21 Dec 2014 14:29:44 +0100 Subject: [PATCH 9/9] VertexLoader: Skip vertices with position index = -1 --- Source/Core/VideoCommon/VertexLoader.cpp | 20 ++++++++++++++++++- Source/Core/VideoCommon/VertexLoader.h | 2 ++ .../VideoCommon/VertexLoader_Position.cpp | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 61303ab6f7..e2a2073ccf 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -92,6 +92,17 @@ static void LOADERDECL TexMtx_Write_Float4(VertexLoader* loader) #endif } +static void LOADERDECL SkipVertex(VertexLoader* loader) +{ + if (loader->m_vertexSkip) + { + // reset the output buffer + g_vertex_manager_write_ptr -= loader->m_native_vtx_decl.stride; + + loader->m_skippedVertices++; + } +} + VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) : VertexLoaderBase(vtx_desc, vtx_attr) { @@ -393,6 +404,12 @@ void VertexLoader::CompileVertexTranslator() nat_offset += 4; } + // indexed position formats may skip a the vertex + if (m_VtxDesc.Position & 2) + { + WriteCall(SkipVertex); + } + m_native_components = components; m_native_vtx_decl.stride = nat_offset; @@ -440,6 +457,7 @@ int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataRead src.WritePointer(&g_video_buffer_read_ptr); m_numLoadedVertices += count; + m_skippedVertices = 0; // Prepare bounding box if (!g_ActiveConfig.backend_info.bSupportsBBox) @@ -462,5 +480,5 @@ int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataRead } #endif - return count; + return count - m_skippedVertices; } diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 2b61e683bc..4c883a3242 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -73,6 +73,8 @@ public: u8 m_curtexmtx[8]; int m_texmtxwrite; int m_texmtxread; + bool m_vertexSkip; + int m_skippedVertices; private: #ifndef USE_VERTEX_LOADER_JIT diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index c0fac7ef93..419c041b5b 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -93,6 +93,7 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader) static_assert(N <= 3, "N > 3 is not sane!"); auto const index = DataRead(); + loader->m_vertexSkip = index == std::numeric_limits::max(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); auto const scale = loader->m_posScale[0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); @@ -119,6 +120,7 @@ void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); + loader->m_vertexSkip = index == std::numeric_limits::max(); const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); LOG_VTX();