diff --git a/Source/Core/VideoCommon/DataReader.h b/Source/Core/VideoCommon/DataReader.h index 17c927a42b..85beec3a11 100644 --- a/Source/Core/VideoCommon/DataReader.h +++ b/Source/Core/VideoCommon/DataReader.h @@ -92,46 +92,6 @@ __forceinline u32 DataReadU32() return DataRead(); } -#if _M_SSE >= 0x301 -const __m128i bs_mask = _mm_set_epi32(0x0C0D0E0FL, 0x08090A0BL, 0x04050607L, 0x00010203L); - -template -void DataReadU32xN_SSSE3(u32 *bufx16) -{ - memcpy(bufx16, g_pVideoData, sizeof(u32) * N); - __m128i* buf = (__m128i *)bufx16; - if (N>12) { _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); buf++; } - if (N>8) { _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); buf++; } - if (N>4) { _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); buf++; } - _mm_store_si128(buf, _mm_shuffle_epi8(_mm_load_si128(buf), bs_mask)); - g_pVideoData += (sizeof(u32) * N); -} - -#endif - -template -void DataReadU32xN(u32 *bufx16) -{ - memcpy(bufx16, g_pVideoData, sizeof(u32) * N); - if (N >= 1) bufx16[0] = Common::swap32(bufx16[0]); - if (N >= 2) bufx16[1] = Common::swap32(bufx16[1]); - if (N >= 3) bufx16[2] = Common::swap32(bufx16[2]); - if (N >= 4) bufx16[3] = Common::swap32(bufx16[3]); - if (N >= 5) bufx16[4] = Common::swap32(bufx16[4]); - if (N >= 6) bufx16[5] = Common::swap32(bufx16[5]); - if (N >= 7) bufx16[6] = Common::swap32(bufx16[6]); - if (N >= 8) bufx16[7] = Common::swap32(bufx16[7]); - if (N >= 9) bufx16[8] = Common::swap32(bufx16[8]); - if (N >= 10) bufx16[9] = Common::swap32(bufx16[9]); - if (N >= 11) bufx16[10] = Common::swap32(bufx16[10]); - if (N >= 12) bufx16[11] = Common::swap32(bufx16[11]); - if (N >= 13) bufx16[12] = Common::swap32(bufx16[12]); - if (N >= 14) bufx16[13] = Common::swap32(bufx16[13]); - if (N >= 15) bufx16[14] = Common::swap32(bufx16[14]); - if (N >= 16) bufx16[15] = Common::swap32(bufx16[15]); - g_pVideoData += (sizeof(u32) * N); -} - __forceinline u32 DataReadU32Unswapped() { u32 tmp = *(u32*)g_pVideoData; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index fe644db21e..a5bd8a62ee 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -34,47 +34,6 @@ u8* g_pVideoData = nullptr; bool g_bRecordFifoData = false; -typedef void (*DataReadU32xNfunc)(u32 *buf); -#if _M_SSE >= 0x301 -static DataReadU32xNfunc DataReadU32xFuncs_SSSE3[16] = { - DataReadU32xN_SSSE3<1>, - DataReadU32xN_SSSE3<2>, - DataReadU32xN_SSSE3<3>, - DataReadU32xN_SSSE3<4>, - DataReadU32xN_SSSE3<5>, - DataReadU32xN_SSSE3<6>, - DataReadU32xN_SSSE3<7>, - DataReadU32xN_SSSE3<8>, - DataReadU32xN_SSSE3<9>, - DataReadU32xN_SSSE3<10>, - DataReadU32xN_SSSE3<11>, - DataReadU32xN_SSSE3<12>, - DataReadU32xN_SSSE3<13>, - DataReadU32xN_SSSE3<14>, - DataReadU32xN_SSSE3<15>, - DataReadU32xN_SSSE3<16> -}; -#endif - -static DataReadU32xNfunc DataReadU32xFuncs[16] = { - DataReadU32xN<1>, - DataReadU32xN<2>, - DataReadU32xN<3>, - DataReadU32xN<4>, - DataReadU32xN<5>, - DataReadU32xN<6>, - DataReadU32xN<7>, - DataReadU32xN<8>, - DataReadU32xN<9>, - DataReadU32xN<10>, - DataReadU32xN<11>, - DataReadU32xN<12>, - DataReadU32xN<13>, - DataReadU32xN<14>, - DataReadU32xN<15>, - DataReadU32xN<16> -}; - static u32 InterpretDisplayList(u32 address, u32 size) { u8* old_pVideoData = g_pVideoData; @@ -182,9 +141,7 @@ static u32 Decode(u8* end, bool skipped_frame) return 0; cycles = 18 + 6 * transfer_size; u32 xf_address = Cmd2 & 0xFFFF; - GC_ALIGNED128(u32 data_buffer[16]); - DataReadU32xFuncs[transfer_size-1](data_buffer); - LoadXFReg(transfer_size, xf_address, data_buffer); + LoadXFReg(transfer_size, xf_address); INCSTAT(stats.thisFrame.numXFLoads); } @@ -296,14 +253,6 @@ static u32 Decode(u8* end, bool skipped_frame) void OpcodeDecoder_Init() { g_pVideoData = GetVideoBufferStartPtr(); - -#if _M_SSE >= 0x301 - if (cpu_info.bSSSE3) - { - for (int i = 0; i < 16; ++i) - DataReadU32xFuncs[i] = DataReadU32xFuncs_SSSE3[i]; - } -#endif } diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index f36378b1e9..3646a70264 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -273,5 +273,5 @@ struct XFMemory extern XFMemory xfmem; -void LoadXFReg(u32 transferSize, u32 address, u32 *pData); +void LoadXFReg(u32 transferSize, u32 address); void LoadIndexedXF(u32 val, int array); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index 235c76bea6..650a4a913b 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -5,6 +5,7 @@ #include "Common/Common.h" #include "Core/HW/Memmap.h" #include "VideoCommon/CPMemory.h" +#include "VideoCommon/DataReader.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" @@ -17,14 +18,14 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress) VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); } -static void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData) +static void XFRegWritten(int transferSize, u32 baseAddress) { u32 address = baseAddress; u32 dataIndex = 0; while (transferSize > 0 && address < 0x1058) { - u32 newValue = pData[dataIndex]; + u32 newValue = DataPeek(dataIndex * sizeof(u32)); u32 nextAddress = address + 1; switch (address) @@ -191,7 +192,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, u32 *pData) } } -void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) +void LoadXFReg(u32 transferSize, u32 baseAddress) { // do not allow writes past registers if (baseAddress + transferSize > 0x1058) @@ -225,16 +226,20 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) } XFMemWritten(xfMemTransferSize, xfMemBase); - memcpy((u32*)(&xfmem) + xfMemBase, pData, xfMemTransferSize * 4); - - pData += xfMemTransferSize; + for (u32 i = 0; i < xfMemTransferSize; i++) + { + ((u32*)&xfmem)[xfMemBase + i] = DataRead(); + } } // write to XF regs if (transferSize > 0) { - XFRegWritten(transferSize, baseAddress, pData); - memcpy((u32*)(&xfmem) + baseAddress, pData, transferSize * 4); + XFRegWritten(transferSize, baseAddress); + for (u32 i = 0; i < transferSize; i++) + { + ((u32*)&xfmem)[baseAddress + i] = DataRead(); + } } }