From 7a89faf13f1d02f9f5d2b01ffa33fe6e8b2c6d99 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 11:36:10 +0100 Subject: [PATCH 1/3] converting the last vertices again instead of copying from buffer on buffer split for mapping, this buffer must be write only, so we cannot copy anthing. converting again needs more cpu, but should happen rarely --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 63 +++++++++++--------- Source/Core/VideoCommon/Src/VertexLoader.h | 1 + 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 5bffab8ee7..7eb410fc43 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -571,7 +571,6 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) if (remainingVerts < granularity) { INCSTAT(stats.thisFrame.numBufferSplits); // This buffer full - break current primitive and flush, to switch to the next buffer. - u8* plastptr = VertexManager::s_pCurBufferPointer; if (v - startv > 0) VertexManager::AddVertices(primitive, v - startv + extraverts); VertexManager::Flush(); @@ -581,27 +580,28 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) case 3: // triangle strip, copy last two vertices // a little trick since we have to keep track of signs if (v & 1) { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); - memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); - VertexManager::s_pCurBufferPointer += native_stride*3; + g_pVideoData -= m_VertexSize*2; + ConvertVertices(1); + g_pVideoData -= m_VertexSize; + ConvertVertices(2); extraverts = 3; } else { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); - VertexManager::s_pCurBufferPointer += native_stride*2; + g_pVideoData -= m_VertexSize*2; + ConvertVertices(2); extraverts = 2; } break; case 4: // tri fan, copy first and last vert - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + g_pVideoData -= m_VertexSize*(v-startv+extraverts); + ConvertVertices(1); + g_pVideoData += m_VertexSize*(v-startv+extraverts-2); + ConvertVertices(1); extraverts = 2; break; case 6: // line strip - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + g_pVideoData -= m_VertexSize*1; + ConvertVertices(1); extraverts = 1; break; default: @@ -615,22 +615,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) if (count - v < remainingVerts) remainingVerts = count - v; - #ifdef USE_JIT - if (remainingVerts > 0) { - loop_counter = remainingVerts; - ((void (*)())(void*)m_compiledCode)(); - } - #else - for (int s = 0; s < remainingVerts; s++) - { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; - for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); - PRIM_LOG("\n"); - } - #endif + ConvertVertices(remainingVerts); + v += remainingVerts; } @@ -639,6 +625,27 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) } +void VertexLoader::ConvertVertices ( int count ) +{ +#ifdef USE_JIT + if (count > 0) { + loop_counter = count; + ((void (*)())(void*)m_compiledCode)(); + } +#else + for (int s = 0; s < count; s++) + { + tcIndex = 0; + colIndex = 0; + s_texmtxwrite = s_texmtxread = 0; + for (int i = 0; i < m_numPipelineStages; i++) + m_PipelineStages[i](); + PRIM_LOG("\n"); + } +#endif +} + + void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.h b/Source/Core/VideoCommon/Src/VertexLoader.h index 98a57cb9ff..4f4fc19e99 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.h +++ b/Source/Core/VideoCommon/Src/VertexLoader.h @@ -119,6 +119,7 @@ private: void SetVAT(u32 _group0, u32 _group1, u32 _group2); void CompileVertexTranslator(); + void ConvertVertices(int count); void WriteCall(TPipelineFunction); From 0d33e200260978ebeb159e84fbf6a792363b2574 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 11:41:14 +0100 Subject: [PATCH 2/3] bbox: replace s_pCurBufferPointer with locale buffer, so it can be read without read from the writeonly buffer --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 40 +++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 7eb410fc43..23064dec9a 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -72,6 +72,10 @@ int colElements[2]; float posScale; float tcScale[8]; +// bbox must read vertex position, so convert it to this buffer +static float s_bbox_vertex_buffer[3]; +static u8 *s_bbox_pCurBufferPointer_orig; + static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), @@ -99,17 +103,32 @@ void LOADERDECL PosMtx_Write() *VertexManager::s_pCurBufferPointer++ = 0; } +void LOADERDECL UpdateBoundingBoxPrepare() +{ + if (!PixelEngine::bbox_active) + return; + + // set our buffer as videodata buffer, so we will get a copy of the vertex positions + // this is a big hack, but so we can use the same converting function then without bbox + s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer; + VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer; +} + void LOADERDECL UpdateBoundingBox() { if (!PixelEngine::bbox_active) return; + + // reset videodata pointer + VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig; + + // copy vertex pointers + memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12); + VertexManager::s_pCurBufferPointer += 12; - // Truly evil hack, reading backwards from the write pointer. If we were writing to write-only - // memory like we might have been with a D3D vertex buffer, this would have been a bad idea. - float *data = (float *)(VertexManager::s_pCurBufferPointer - 12); // We must transform the just loaded point by the current world and projection matrix - in software. // Then convert to screen space and update the bounding box. - float p[3] = {data[0], data[1], data[2]}; + float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]}; const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; const float *proj_matrix = &g_fProjectionMatrix[0]; @@ -267,15 +286,16 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } // Write vertex position loader - WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + if(g_ActiveConfig.bUseBBox) { + WriteCall(UpdateBoundingBoxPrepare); + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + WriteCall(UpdateBoundingBox); + } else { + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + } m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); nat_offset += 12; - // OK, so we just got a point. Let's go back and read it for the bounding box. - - if(g_ActiveConfig.bUseBBox) - WriteCall(UpdateBoundingBox); - // Normals vtx_decl.num_normals = 0; if (m_VtxDesc.Normal != NOT_PRESENT) From 66d0c1c301c1a40414a0135d94525dab5198a8eb Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 11:45:29 +0100 Subject: [PATCH 3/3] small cleanups of s_pCurBufferPointer --- Source/Core/VideoCommon/Src/DLCache.cpp | 3 +-- Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp | 1 + Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp | 7 +++---- Source/Core/VideoCommon/Src/VideoCommon.h | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoCommon/Src/DLCache.cpp b/Source/Core/VideoCommon/Src/DLCache.cpp index c828ea3a03..86d321114c 100644 --- a/Source/Core/VideoCommon/Src/DLCache.cpp +++ b/Source/Core/VideoCommon/Src/DLCache.cpp @@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices); - u8* EndAddress = VertexManager::s_pCurBufferPointer; - u32 Vdatasize = (u32)(EndAddress - StartAddress); + u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress); if (Vdatasize > 0) { // Compile diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 830bd3de13..82d04f5ea2 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -30,6 +30,7 @@ #include #endif +// warning: mapping buffer should be disabled to use this #define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index ba3bb73f43..fd6f191ae3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -28,6 +28,7 @@ #include #endif +// warning: mapping buffer should be disabled to use this #define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); #define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); @@ -355,11 +356,9 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); - u8* p = VertexManager::s_pCurBufferPointer; - _mm_storel_epi64((__m128i*)p, b); + _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); LOG_TEX2(); - p += 8; - VertexManager::s_pCurBufferPointer = p; + VertexManager::s_pCurBufferPointer += 8; tcIndex++; } #endif diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index 1eeed20b82..7681d8ba82 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -90,7 +90,7 @@ struct TargetRectangle : public MathUtil::Rectangle #define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__) #endif - +// warning: mapping buffer should be disabled to use this // #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); #define LOG_VTX()