From 59072adc32bf779dee4879a456fcb2bdc71036fc Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 8 Oct 2011 17:33:21 +0200 Subject: [PATCH] Align stack variables on a 16-bytes boundary in SSSE3 functions Fixes issue 4450. Thanks to pholklore1 for his patch. --- Source/Core/VideoCommon/Src/VertexLoader_Position.cpp | 4 ++-- Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index b8483b119c..06481f9ddf 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -179,8 +179,8 @@ void Pos_ReadIndex_Float_SSSE3(int Index) if(Index < MaxSize) { const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); - const __m128i a = _mm_loadu_si128((__m128i*)pData); - __m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2); + GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData)); + GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2)); _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); LOG_VTX(); VertexManager::s_pCurBufferPointer += 12; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index de853081ed..ba3bb73f43 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -353,8 +353,8 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() { u16 Index = DataReadU16(); const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - const __m128i a = _mm_loadl_epi64((__m128i*)pData); - const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32); + GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); + GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); u8* p = VertexManager::s_pCurBufferPointer; _mm_storel_epi64((__m128i*)p, b); LOG_TEX2();