TextureDecoder: Pass the TLUT address straight into the texture decoder

This removes the requirement for the TextureDecoder to have access to
global texture memory.
This commit is contained in:
Jasper St. Pierre 2014-08-10 15:28:42 -04:00
parent fcd4ecc942
commit ea1245d191
6 changed files with 76 additions and 74 deletions

View File

@ -106,6 +106,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
TexMode0& tm0 = texUnit.texMode0[subTexmap];
TexImage0& ti0 = texUnit.texImage0[subTexmap];
TexTLUT& texTlut = texUnit.texTlut[subTexmap];
TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format;
u8 *imageSrc, *imageSrcOdd = nullptr;
if (texUnit.texImage1[subTexmap].image_type)
@ -124,6 +125,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
int imageHeight = ti0.height;
int tlutAddress = texTlut.tmem_offset << 9;
const u8* tlut = &texMem[tlutAddress];
// reduce sample location and texture size to mip level
// move texture pointer to mip location
@ -179,20 +181,18 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth);
WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight);
TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format;
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type))
{
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, tlutfmt);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlut, tlutfmt);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt);
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (fractT));
}
else
@ -226,7 +226,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
WrapCoord(imageT, tm0.wrap_t, imageHeight);
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, (TlutFormat) texTlut.tlut_format);
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, tlutfmt);
else
TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, imageWidth);
}

View File

@ -490,7 +490,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
{
if (!(texformat == GX_TF_RGBA8 && from_tmem))
{
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlutaddr, (TlutFormat) tlutfmt);
const u8* tlut = &texMem[tlutaddr];
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, (TlutFormat) tlutfmt);
}
else
{
@ -566,7 +567,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
const u8*& mip_src_data = from_tmem
? ((level % 2) ? ptr_odd : ptr_even)
: src_data;
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, (TlutFormat) tlutfmt);
const u8* tlut = &texMem[tlutaddr];
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlut, (TlutFormat) tlutfmt);
mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
entry->Load(mip_width, mip_height, expanded_mip_width, level);

View File

@ -78,12 +78,12 @@ enum PC_TexFormat
PC_TEX_FMT_DXT1,
};
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt);
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt);
PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut, TlutFormat tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
/* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt);
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt);

View File

@ -242,9 +242,9 @@ static void TexDecoder_DrawOverlay(u8 *dst, int width, int height, int texformat
}
}
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt)
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
{
PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlutaddr, tlutfmt);
PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt);
if (TexFmt_Overlay_Enable && pc_texformat != PC_TEX_FMT_NONE)
TexDecoder_DrawOverlay(dst, width, height, texformat, pc_texformat);
@ -301,7 +301,7 @@ struct DXTBlock
u8 lines[4];
};
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt)
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut_, TlutFormat tlutfmt)
{
/* General formula for computing texture offset
//
@ -330,7 +330,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
u32 offset = base + (blkOff >> 1);
u8 val = (*(src + offset) >> rs) & 0xF;
u16 *tlut = (u16*)(texMem + tlutaddr);
u16 *tlut = (u16*) tlut_;
switch (tlutfmt)
{
@ -395,7 +395,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
u32 blkOff = (blkT << 3) + blkS;
u8 val = *(src + base + blkOff);
u16 *tlut = (u16*)(texMem + tlutaddr);
u16 *tlut = (u16*) tlut_;
switch (tlutfmt)
{
@ -460,7 +460,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
const u16* valAddr = (u16*)(src + offset);
u16 val = Common::swap16(*valAddr) & 0x3FFF;
u16 *tlut = (u16*)(texMem + tlutaddr);
u16 *tlut = (u16*) tlut_;
switch (tlutfmt)
{

View File

@ -60,9 +60,9 @@ struct DXTBlock
u8 lines[4];
};
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr)
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_)
{
u16 *tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
@ -71,9 +71,9 @@ static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlut
}
}
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem+tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
@ -82,9 +82,9 @@ static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutadd
}
}
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem+tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
@ -93,9 +93,9 @@ static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlut
}
}
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr)
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_)
{
u16 *tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
u8 val = src[x];
@ -103,27 +103,27 @@ static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlut
}
}
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
*dst++ = decodeIA8Swapped(tlut[src[x]]);
}
}
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
*dst++ = decode565RGBA(Common::swap16(tlut[src[x]]));
}
}
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr)
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_)
{
u16 *tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
@ -131,9 +131,9 @@ static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tl
}
}
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr)
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
@ -141,9 +141,9 @@ static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlu
}
}
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr)
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
@ -246,7 +246,7 @@ static void decodeDXTBlockRGBA(u32 *dst, const DXTBlock *src, int pitch)
// TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt)
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
{
const int Wsteps4 = (width + 3) / 4;
@ -261,21 +261,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
}
else if (tlutfmt == GX_TL_IA8)
{
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
}
else if (tlutfmt == GX_TL_RGB565)
{
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
}
break;
case GX_TF_I4:
@ -323,21 +323,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut);
}
else if (tlutfmt == GX_TL_IA8)
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
}
else if (tlutfmt == GX_TL_RGB565)
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
}
break;
@ -372,21 +372,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
}
else if (tlutfmt == GX_TL_IA8)
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
}
else if (tlutfmt == GX_TL_RGB565)
{
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
}
break;
case GX_TF_RGB565:

View File

@ -83,9 +83,9 @@ struct DXTBlock
u8 lines[4];
};
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr)
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_)
{
u16 *tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
@ -94,9 +94,9 @@ static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlut
}
}
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem+tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
@ -105,9 +105,9 @@ static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutadd
}
}
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem+tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u8 val = src[x];
@ -116,9 +116,9 @@ static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlut
}
}
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr)
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_)
{
u16 *tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
u8 val = src[x];
@ -126,27 +126,27 @@ static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlut
}
}
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
*dst++ = decodeIA8Swapped(tlut[src[x]]);
}
}
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 8; x++)
{
*dst++ = decode565RGBA(Common::swap16(tlut[src[x]]));
}
}
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr)
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_)
{
u16 *tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
@ -154,9 +154,9 @@ static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tl
}
}
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr)
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
@ -164,9 +164,9 @@ static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlu
}
}
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr)
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
{
u16* tlut = (u16*)(texMem + tlutaddr);
const u16* tlut = (u16*) tlut_;
for (int x = 0; x < 4; x++)
{
u16 val = Common::swap16(src[x]);
@ -287,7 +287,7 @@ static inline void SetOpenMPThreadCount(int width, int height)
// TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt)
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
{
SetOpenMPThreadCount(width, height);
@ -304,7 +304,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
}
else if (tlutfmt == GX_TL_IA8)
{
@ -312,7 +312,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
}
else if (tlutfmt == GX_TL_RGB565)
@ -321,7 +321,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
}
break;
case GX_TF_I4:
@ -562,7 +562,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut);
}
else if (tlutfmt == GX_TL_IA8)
{
@ -570,7 +570,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
}
else if (tlutfmt == GX_TL_RGB565)
@ -579,7 +579,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
}
break;
@ -675,7 +675,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
}
else if (tlutfmt == GX_TL_IA8)
{
@ -683,7 +683,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
}
else if (tlutfmt == GX_TL_RGB565)
{
@ -691,7 +691,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
}
break;
case GX_TF_RGB565: