mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
TextureDecoder: Pass the TLUT address straight into the texture decoder
This removes the requirement for the TextureDecoder to have access to global texture memory.
This commit is contained in:
parent
fcd4ecc942
commit
ea1245d191
@ -106,6 +106,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
|
||||
TexMode0& tm0 = texUnit.texMode0[subTexmap];
|
||||
TexImage0& ti0 = texUnit.texImage0[subTexmap];
|
||||
TexTLUT& texTlut = texUnit.texTlut[subTexmap];
|
||||
TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format;
|
||||
|
||||
u8 *imageSrc, *imageSrcOdd = nullptr;
|
||||
if (texUnit.texImage1[subTexmap].image_type)
|
||||
@ -124,6 +125,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
|
||||
int imageHeight = ti0.height;
|
||||
|
||||
int tlutAddress = texTlut.tmem_offset << 9;
|
||||
const u8* tlut = &texMem[tlutAddress];
|
||||
|
||||
// reduce sample location and texture size to mip level
|
||||
// move texture pointer to mip location
|
||||
@ -179,20 +181,18 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
|
||||
WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth);
|
||||
WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight);
|
||||
|
||||
TlutFormat tlutfmt = (TlutFormat) texTlut.tlut_format;
|
||||
|
||||
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type))
|
||||
{
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt);
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, tlutfmt);
|
||||
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
|
||||
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, tlutfmt);
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlut, tlutfmt);
|
||||
AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
|
||||
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt);
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlut, tlutfmt);
|
||||
AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
|
||||
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, tlutfmt);
|
||||
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlut, tlutfmt);
|
||||
AddTexel(sampledTex, texel, (fractS) * (fractT));
|
||||
}
|
||||
else
|
||||
@ -226,7 +226,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample)
|
||||
WrapCoord(imageT, tm0.wrap_t, imageHeight);
|
||||
|
||||
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type))
|
||||
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, (TlutFormat) texTlut.tlut_format);
|
||||
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, tlutfmt);
|
||||
else
|
||||
TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, imageWidth);
|
||||
}
|
||||
|
@ -490,7 +490,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
{
|
||||
if (!(texformat == GX_TF_RGBA8 && from_tmem))
|
||||
{
|
||||
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlutaddr, (TlutFormat) tlutfmt);
|
||||
const u8* tlut = &texMem[tlutaddr];
|
||||
pcfmt = TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, (TlutFormat) tlutfmt);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -566,7 +567,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(unsigned int const stage,
|
||||
const u8*& mip_src_data = from_tmem
|
||||
? ((level % 2) ? ptr_odd : ptr_even)
|
||||
: src_data;
|
||||
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlutaddr, (TlutFormat) tlutfmt);
|
||||
const u8* tlut = &texMem[tlutaddr];
|
||||
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlut, (TlutFormat) tlutfmt);
|
||||
mip_src_data += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
|
||||
|
||||
entry->Load(mip_width, mip_height, expanded_mip_width, level);
|
||||
|
@ -78,12 +78,12 @@ enum PC_TexFormat
|
||||
PC_TEX_FMT_DXT1,
|
||||
};
|
||||
|
||||
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt);
|
||||
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt);
|
||||
PC_TexFormat TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8 *src_ar, const u8 *src_gb, int width, int height);
|
||||
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt);
|
||||
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut, TlutFormat tlutfmt);
|
||||
void TexDecoder_DecodeTexelRGBA8FromTmem(u8 *dst, const u8 *src_ar, const u8* src_gb, int s, int t, int imageWidth);
|
||||
|
||||
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
|
||||
|
||||
/* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */
|
||||
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt);
|
||||
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt);
|
||||
|
@ -242,9 +242,9 @@ static void TexDecoder_DrawOverlay(u8 *dst, int width, int height, int texformat
|
||||
}
|
||||
}
|
||||
|
||||
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt)
|
||||
PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
|
||||
{
|
||||
PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlutaddr, tlutfmt);
|
||||
PC_TexFormat pc_texformat = _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt);
|
||||
|
||||
if (TexFmt_Overlay_Enable && pc_texformat != PC_TEX_FMT_NONE)
|
||||
TexDecoder_DrawOverlay(dst, width, height, texformat, pc_texformat);
|
||||
@ -301,7 +301,7 @@ struct DXTBlock
|
||||
u8 lines[4];
|
||||
};
|
||||
|
||||
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, TlutFormat tlutfmt)
|
||||
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, const u8* tlut_, TlutFormat tlutfmt)
|
||||
{
|
||||
/* General formula for computing texture offset
|
||||
//
|
||||
@ -330,7 +330,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
|
||||
u32 offset = base + (blkOff >> 1);
|
||||
|
||||
u8 val = (*(src + offset) >> rs) & 0xF;
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
u16 *tlut = (u16*) tlut_;
|
||||
|
||||
switch (tlutfmt)
|
||||
{
|
||||
@ -395,7 +395,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
|
||||
u32 blkOff = (blkT << 3) + blkS;
|
||||
|
||||
u8 val = *(src + base + blkOff);
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
u16 *tlut = (u16*) tlut_;
|
||||
|
||||
switch (tlutfmt)
|
||||
{
|
||||
@ -460,7 +460,7 @@ void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth
|
||||
const u16* valAddr = (u16*)(src + offset);
|
||||
|
||||
u16 val = Common::swap16(*valAddr) & 0x3FFF;
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
u16 *tlut = (u16*) tlut_;
|
||||
|
||||
switch (tlutfmt)
|
||||
{
|
||||
|
@ -60,9 +60,9 @@ struct DXTBlock
|
||||
u8 lines[4];
|
||||
};
|
||||
|
||||
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr)
|
||||
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_)
|
||||
{
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -71,9 +71,9 @@ static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlut
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem+tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -82,9 +82,9 @@ static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutadd
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem+tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -93,9 +93,9 @@ static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlut
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr)
|
||||
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_)
|
||||
{
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 8; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -103,27 +103,27 @@ static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlut
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 8; x++)
|
||||
{
|
||||
*dst++ = decodeIA8Swapped(tlut[src[x]]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 8; x++)
|
||||
{
|
||||
*dst++ = decode565RGBA(Common::swap16(tlut[src[x]]));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr)
|
||||
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_)
|
||||
{
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u16 val = Common::swap16(src[x]);
|
||||
@ -131,9 +131,9 @@ static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tl
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr)
|
||||
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u16 val = Common::swap16(src[x]);
|
||||
@ -141,9 +141,9 @@ static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlu
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr)
|
||||
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u16 val = Common::swap16(src[x]);
|
||||
@ -246,7 +246,7 @@ static void decodeDXTBlockRGBA(u32 *dst, const DXTBlock *src, int pitch)
|
||||
// TODO: complete SSE2 optimization of less often used texture formats.
|
||||
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
|
||||
|
||||
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt)
|
||||
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
|
||||
{
|
||||
|
||||
const int Wsteps4 = (width + 3) / 4;
|
||||
@ -261,21 +261,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
|
||||
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
|
||||
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
|
||||
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_IA8)
|
||||
{
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
|
||||
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
|
||||
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
|
||||
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_RGB565)
|
||||
{
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
|
||||
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
|
||||
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
|
||||
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
|
||||
}
|
||||
break;
|
||||
case GX_TF_I4:
|
||||
@ -323,21 +323,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
|
||||
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_IA8)
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
|
||||
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_RGB565)
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
|
||||
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
|
||||
|
||||
}
|
||||
break;
|
||||
@ -372,21 +372,21 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
|
||||
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_IA8)
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
|
||||
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_RGB565)
|
||||
{
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
|
||||
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
|
||||
}
|
||||
break;
|
||||
case GX_TF_RGB565:
|
||||
|
@ -83,9 +83,9 @@ struct DXTBlock
|
||||
u8 lines[4];
|
||||
};
|
||||
|
||||
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlutaddr)
|
||||
static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, const u8* tlut_)
|
||||
{
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -94,9 +94,9 @@ static inline void decodebytesC4_5A3_To_rgba32(u32 *dst, const u8 *src, int tlut
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem+tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -105,9 +105,9 @@ static inline void decodebytesC4IA8_To_RGBA(u32* dst, const u8* src, int tlutadd
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem+tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -116,9 +116,9 @@ static inline void decodebytesC4RGB565_To_RGBA(u32* dst, const u8* src, int tlut
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlutaddr)
|
||||
static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, const u8* tlut_)
|
||||
{
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 8; x++)
|
||||
{
|
||||
u8 val = src[x];
|
||||
@ -126,27 +126,27 @@ static inline void decodebytesC8_5A3_To_RGBA32(u32 *dst, const u8 *src, int tlut
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC8IA8_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 8; x++)
|
||||
{
|
||||
*dst++ = decodeIA8Swapped(tlut[src[x]]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, int tlutaddr)
|
||||
static inline void decodebytesC8RGB565_To_RGBA(u32* dst, const u8* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 8; x++)
|
||||
{
|
||||
*dst++ = decode565RGBA(Common::swap16(tlut[src[x]]));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tlutaddr)
|
||||
static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, const u8* tlut_)
|
||||
{
|
||||
u16 *tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u16 val = Common::swap16(src[x]);
|
||||
@ -154,9 +154,9 @@ static inline void decodebytesC14X2_5A3_To_RGBA(u32 *dst, const u16 *src, int tl
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlutaddr)
|
||||
static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u16 val = Common::swap16(src[x]);
|
||||
@ -164,9 +164,9 @@ static inline void decodebytesC14X2IA8_To_RGBA(u32* dst, const u16* src, int tlu
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, int tlutaddr)
|
||||
static inline void decodebytesC14X2rgb565_To_RGBA(u32* dst, const u16* src, const u8* tlut_)
|
||||
{
|
||||
u16* tlut = (u16*)(texMem + tlutaddr);
|
||||
const u16* tlut = (u16*) tlut_;
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
u16 val = Common::swap16(src[x]);
|
||||
@ -287,7 +287,7 @@ static inline void SetOpenMPThreadCount(int width, int height)
|
||||
// TODO: complete SSE2 optimization of less often used texture formats.
|
||||
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
|
||||
|
||||
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, int tlutaddr, TlutFormat tlutfmt)
|
||||
PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int height, int texformat, const u8* tlut, TlutFormat tlutfmt)
|
||||
{
|
||||
SetOpenMPThreadCount(width, height);
|
||||
|
||||
@ -304,7 +304,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
|
||||
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
|
||||
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
|
||||
decodebytesC4_5A3_To_rgba32(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_IA8)
|
||||
{
|
||||
@ -312,7 +312,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
|
||||
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
|
||||
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
|
||||
decodebytesC4IA8_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
|
||||
|
||||
}
|
||||
else if (tlutfmt == GX_TL_RGB565)
|
||||
@ -321,7 +321,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 8)
|
||||
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8,yStep++)
|
||||
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++,xStep++)
|
||||
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlutaddr);
|
||||
decodebytesC4RGB565_To_RGBA(dst + (y + iy) * width + x, src + 4 * xStep, tlut);
|
||||
}
|
||||
break;
|
||||
case GX_TF_I4:
|
||||
@ -562,7 +562,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
|
||||
decodebytesC8_5A3_To_RGBA32((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_IA8)
|
||||
{
|
||||
@ -570,7 +570,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
|
||||
decodebytesC8IA8_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
|
||||
|
||||
}
|
||||
else if (tlutfmt == GX_TL_RGB565)
|
||||
@ -579,7 +579,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlutaddr);
|
||||
decodebytesC8RGB565_To_RGBA(dst + (y + iy) * width + x, src + 8 * xStep, tlut);
|
||||
|
||||
}
|
||||
break;
|
||||
@ -675,7 +675,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
|
||||
decodebytesC14X2_5A3_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_IA8)
|
||||
{
|
||||
@ -683,7 +683,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
|
||||
decodebytesC14X2IA8_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
|
||||
}
|
||||
else if (tlutfmt == GX_TL_RGB565)
|
||||
{
|
||||
@ -691,7 +691,7 @@ PC_TexFormat _TexDecoder_DecodeImpl(u32 * dst, const u8 * src, int width, int he
|
||||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
|
||||
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
|
||||
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlutaddr);
|
||||
decodebytesC14X2rgb565_To_RGBA(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut);
|
||||
}
|
||||
break;
|
||||
case GX_TF_RGB565:
|
||||
|
Loading…
x
Reference in New Issue
Block a user