From 683ba7ecaec08785042fe207ecd4b941b8d07ff2 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Wed, 20 Aug 2008 18:31:48 +0000 Subject: [PATCH] Minor speedup in texdecoder. More informative fifo error messages. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@249 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Core/VideoCommon/Src/TextureDecoder.cpp | 25 ++++++++----------- Source/Plugins/Plugin_VideoDX9/Src/Fifo.cpp | 10 ++++---- Source/Plugins/Plugin_VideoOGL/Src/Fifo.cpp | 2 +- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index 5a9bdad0cd..86d20101a5 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -256,23 +256,20 @@ inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels) dst[x] = decode5A3(Common::swap16(src[x])); } -inline void decodebytesARGB8pass1(u32 *dst, const u16 *src, const u16 *src2, int numpixels) +// This one is used by many video formats. It'd therefore be good if it was fast. +inline void decodebytesARGB8_4(u32 *dst, const u16 *src, const u16 *src2) { - // This can probably be done in a few SSE pack/unpack instructions. - for (int x = 0; x < numpixels; x++) { + for (int x = 0; x < 4; x++) { dst[x] = Common::swap32((src2[x] << 16) | src[x]); } -} -inline void decodebytesARGB8pass2(u32 *dst, const u16 *src, int numpixels) -{ - for (int x = 0; x < numpixels; x++) - { - int val = Common::swap16(src[x]); - int a = val & 0xFF; - val >>= 8; - *dst++ |= (val<<8) | (a<<0); - } + // This can probably be done in a few SSE pack/unpack instructions + pshufb + // some unpack instruction x2: + // ABABABABABABABAB 1212121212121212 -> + // AB12AB12AB12AB12 AB12AB12AB12AB12 + // 2x pshufb-> + // 21BA21BA21BA21BA 21BA21BA21BA21BA + // and we are done. } inline u32 makecol(int r, int g, int b, int a) @@ -411,7 +408,7 @@ PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, in for (int x = 0; x < width; x += 4) { for (int iy = 0; iy < 4; iy++) { - decodebytesARGB8pass1((u32*)dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16, 4); + decodebytesARGB8_4((u32*)dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16); } src += 64; } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Fifo.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Fifo.cpp index 1e95450c85..c7ebf90e4d 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Fifo.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Fifo.cpp @@ -46,11 +46,11 @@ void Fifo_Shutdown() int FAKE_GetFifoSize() { - if (size < readptr) - { - PanicAlert("GFX Fifo underrun encountered."); - } - return (size - readptr); + if (size < readptr) + { + PanicAlert("GFX Fifo underrun encountered (size = %i, readptr = %i)", size, readptr); + } + return (size - readptr); } u8 FAKE_PeekFifo8(u32 _uOffset) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Fifo.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Fifo.cpp index 77f6b60f34..867a8f52aa 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Fifo.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Fifo.cpp @@ -46,7 +46,7 @@ int FAKE_GetFifoSize() { if (size < readptr) { - PanicAlert("GFX Fifo underrun encountered."); + PanicAlert("GFX Fifo underrun encountered (size = %i, readptr = %i)", size, readptr); } return (size - readptr); }