diff --git a/Source/Core/VideoCommon/Src/DLCache.cpp b/Source/Core/VideoCommon/Src/DLCache.cpp index c828ea3a03..86d321114c 100644 --- a/Source/Core/VideoCommon/Src/DLCache.cpp +++ b/Source/Core/VideoCommon/Src/DLCache.cpp @@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices); - u8* EndAddress = VertexManager::s_pCurBufferPointer; - u32 Vdatasize = (u32)(EndAddress - StartAddress); + u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress); if (Vdatasize > 0) { // Compile diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 5bffab8ee7..23064dec9a 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -72,6 +72,10 @@ int colElements[2]; float posScale; float tcScale[8]; +// bbox must read vertex position, so convert it to this buffer +static float s_bbox_vertex_buffer[3]; +static u8 *s_bbox_pCurBufferPointer_orig; + static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), @@ -99,17 +103,32 @@ void LOADERDECL PosMtx_Write() *VertexManager::s_pCurBufferPointer++ = 0; } +void LOADERDECL UpdateBoundingBoxPrepare() +{ + if (!PixelEngine::bbox_active) + return; + + // set our buffer as videodata buffer, so we will get a copy of the vertex positions + // this is a big hack, but so we can use the same converting function then without bbox + s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer; + VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer; +} + void LOADERDECL UpdateBoundingBox() { if (!PixelEngine::bbox_active) return; + + // reset videodata pointer + VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig; + + // copy vertex pointers + memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12); + VertexManager::s_pCurBufferPointer += 12; - // Truly evil hack, reading backwards from the write pointer. If we were writing to write-only - // memory like we might have been with a D3D vertex buffer, this would have been a bad idea. - float *data = (float *)(VertexManager::s_pCurBufferPointer - 12); // We must transform the just loaded point by the current world and projection matrix - in software. // Then convert to screen space and update the bounding box. - float p[3] = {data[0], data[1], data[2]}; + float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]}; const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; const float *proj_matrix = &g_fProjectionMatrix[0]; @@ -267,15 +286,16 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } // Write vertex position loader - WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + if(g_ActiveConfig.bUseBBox) { + WriteCall(UpdateBoundingBoxPrepare); + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + WriteCall(UpdateBoundingBox); + } else { + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + } m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); nat_offset += 12; - // OK, so we just got a point. Let's go back and read it for the bounding box. - - if(g_ActiveConfig.bUseBBox) - WriteCall(UpdateBoundingBox); - // Normals vtx_decl.num_normals = 0; if (m_VtxDesc.Normal != NOT_PRESENT) @@ -571,7 +591,6 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) if (remainingVerts < granularity) { INCSTAT(stats.thisFrame.numBufferSplits); // This buffer full - break current primitive and flush, to switch to the next buffer. - u8* plastptr = VertexManager::s_pCurBufferPointer; if (v - startv > 0) VertexManager::AddVertices(primitive, v - startv + extraverts); VertexManager::Flush(); @@ -581,27 +600,28 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) case 3: // triangle strip, copy last two vertices // a little trick since we have to keep track of signs if (v & 1) { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); - memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); - VertexManager::s_pCurBufferPointer += native_stride*3; + g_pVideoData -= m_VertexSize*2; + ConvertVertices(1); + g_pVideoData -= m_VertexSize; + ConvertVertices(2); extraverts = 3; } else { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); - VertexManager::s_pCurBufferPointer += native_stride*2; + g_pVideoData -= m_VertexSize*2; + ConvertVertices(2); extraverts = 2; } break; case 4: // tri fan, copy first and last vert - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + g_pVideoData -= m_VertexSize*(v-startv+extraverts); + ConvertVertices(1); + g_pVideoData += m_VertexSize*(v-startv+extraverts-2); + ConvertVertices(1); extraverts = 2; break; case 6: // line strip - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + g_pVideoData -= m_VertexSize*1; + ConvertVertices(1); extraverts = 1; break; default: @@ -615,22 +635,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) if (count - v < remainingVerts) remainingVerts = count - v; - #ifdef USE_JIT - if (remainingVerts > 0) { - loop_counter = remainingVerts; - ((void (*)())(void*)m_compiledCode)(); - } - #else - for (int s = 0; s < remainingVerts; s++) - { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; - for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); - PRIM_LOG("\n"); - } - #endif + ConvertVertices(remainingVerts); + v += remainingVerts; } @@ -639,6 +645,27 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) } +void VertexLoader::ConvertVertices ( int count ) +{ +#ifdef USE_JIT + if (count > 0) { + loop_counter = count; + ((void (*)())(void*)m_compiledCode)(); + } +#else + for (int s = 0; s < count; s++) + { + tcIndex = 0; + colIndex = 0; + s_texmtxwrite = s_texmtxread = 0; + for (int i = 0; i < m_numPipelineStages; i++) + m_PipelineStages[i](); + PRIM_LOG("\n"); + } +#endif +} + + void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.h b/Source/Core/VideoCommon/Src/VertexLoader.h index 98a57cb9ff..4f4fc19e99 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.h +++ b/Source/Core/VideoCommon/Src/VertexLoader.h @@ -119,6 +119,7 @@ private: void SetVAT(u32 _group0, u32 _group1, u32 _group2); void CompileVertexTranslator(); + void ConvertVertices(int count); void WriteCall(TPipelineFunction); diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 34a3190eff..4102fdff69 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -31,6 +31,7 @@ #include #endif +// warning: mapping buffer should be disabled to use this #define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index b5e7d890ff..33efe33bdf 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -34,12 +34,14 @@ void LOG_TEX(); template <> __forceinline void LOG_TEX<1>() { + // warning: mapping buffer should be disabled to use this // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); } template <> __forceinline void LOG_TEX<2>() { + // warning: mapping buffer should be disabled to use this // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); } @@ -125,11 +127,9 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); - u8* p = VertexManager::s_pCurBufferPointer; - _mm_storel_epi64((__m128i*)p, b); + _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); LOG_TEX2(); - p += 8; - VertexManager::s_pCurBufferPointer = p; + VertexManager::s_pCurBufferPointer += 8; tcIndex++; } #endif diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index 1eeed20b82..7681d8ba82 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -90,7 +90,7 @@ struct TargetRectangle : public MathUtil::Rectangle #define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__) #endif - +// warning: mapping buffer should be disabled to use this // #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); #define LOG_VTX()