mirror of
synced 2025-03-12 14:46:49 +01:00
second try to implement a more correct safe texture cache, implemented full hashing of tlut textures as they are the more problematic, this should solve virtually all the problems with characters in all the games that have them.
sorry to tell but this will bring a speed drop, so let you decide if this change stay or not.( used the fastest open source hash algorithm i know) do not apply full hashing to other format because it kills the performance. for popular request added 9x SSAA believe me will kill your graphic card even if is the best but the image quality is exceptional. as always please test and let me know the results. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5034 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
@ -51,11 +51,21 @@ inline u32 _rotl(u32 x, int shift) {
return (x << shift) | (x >> (32 - shift));
inline u64 _rotl64(u64 x, unsigned int shift){
unsigned int n = shift % 64;
return (x << n) | (x >> (64 - n));
inline u32 _rotr(u32 x, int shift) {
shift &= 31;
if (!shift) return x;
return (x >> shift) | (x << (32 - shift));
inline u64 _rotr64(u64 x, unsigned int shift){
unsigned int n = shift % 64;
return (x >> n) | (x << (64 - n));
#define SLEEP(x) usleep(x*1000)
#else // WIN32
// Function Cross-Compatibility
@ -88,60 +88,61 @@ int TexDecoder_GetTextureSizeInBytes(int width, int height, int format)
return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2;
/*u64 TexDecoder_GetTlutHash(const u8* src, int len)
u64 TexDecoder_GetFullHash(const u8 *key, int len, u64 seed)
//char str[40000], st[20]; str[0]='\0';for (int i=0;i<len;i++){sprintf(st,"%02x ",src[i]);strcat(str,st);}
u64 hash = 0xbeefbabe1337c0de;
int step = len / 29 / 8;
if (!step) step = 1;
for (int i = 0; i < len/8; i += step) {
hash = _rotl64(hash, 17) ^ ((u64 *)(src + i))[0];
return hash;
const u64 m = 0xc6a4a7935bd1e995;
const int r = 47;
u64 h = seed ^ (len * m);
const u64 * data = (const u64 *)key;
const u64 * end = data + (len/8);
while(data != end)
u64 k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
u64 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed)
const u8 * data2 = (const u8*)data;
switch(len & 7)
int len = TexDecoder_GetTextureSizeInBytes(width, height, texformat);
u64 hash = seed ? seed : 0xbeefbabe1337c0de;
int step = len / 29 / 8;
case 7: h ^= u64(data2[6]) << 48;
case 6: h ^= u64(data2[5]) << 40;
case 5: h ^= u64(data2[4]) << 32;
case 4: h ^= u64(data2[3]) << 24;
case 3: h ^= u64(data2[2]) << 16;
case 2: h ^= u64(data2[1]) << 8;
case 1: h ^= u64(data2[0]);
h *= m;
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
u64 TexDecoder_GetFastHash(const u8 *src, int len, u64 seed)
u64 hash = seed ? seed : 0x1337c0debeefbabe;
int step = (len / 8) / 37;
if (!step) step = 1;
for (int i = 0; i < len / 8; i += step) {
hash = _rotl(hash, 17) ^ ((u64 *)src)[i];
return hash;
u32 TexDecoder_GetTlutHash(const u8* src, int len)
//char str[40000], st[20]; str[0]='\0';for (int i=0;i<len;i++){sprintf(st,"%02x ",src[i]);strcat(str,st);}
u32 hash = 0xbeefbabe;
for (int i = 0; i < len / 4; i ++) {
hash = _rotl(hash, 7) ^ ((u32 *)src)[i];
hash = _rotl64(hash, 19) ^ ((u64 *)src)[i];
hash += 7; // to add a bit more entropy/mess in here
return hash;
u32 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed)
int sz = TexDecoder_GetTextureSizeInBytes(width, height, texformat);
u32 hash = seed ? seed : 0x1337c0de;
if (sz < 2048) {
for (int i = 0; i < sz / 4; i += 13) {
hash = _rotl(hash, 19) ^ ((u32 *)src)[i];
return hash;
} else {
int step = sz / 23 / 4;
for (int i = 0; i < sz / 4; i += step) {
hash = _rotl(hash, 19) ^ ((u32 *)src)[i];
return hash;
int TexDecoder_GetBlockWidthInTexels(u32 format)
switch (format)
@ -89,8 +89,8 @@ PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt);
void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt);
u32 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed=0);
u32 TexDecoder_GetTlutHash(const u8* src, int len);
u64 TexDecoder_GetFullHash(const u8 *src, int len, u64 seed = 0);
u64 TexDecoder_GetFastHash(const u8 *src, int len, u64 seed = 0);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
@ -165,7 +165,10 @@ void Enumerate()
a.aa_levels.push_back(AALevel("None", D3DMULTISAMPLE_NONE, 0));
a.aa_levels.push_back(AALevel("2.25x SSAA", D3DMULTISAMPLE_NONE, 0));
a.aa_levels.push_back(AALevel("4x SSAA", D3DMULTISAMPLE_NONE, 0));
a.aa_levels.push_back(AALevel("9x SSAA", D3DMULTISAMPLE_NONE, 0));
//Add multisample modes
//disable them will they are not implemnted
DWORD qlevels = 0;
if (D3DERR_NOTAVAILABLE != D3D::D3D->CheckDeviceMultiSampleType(
@ -208,7 +211,7 @@ void Enumerate()
// Determine if INTZ is supported. Code from ATI's doc.
// http://developer.amd.com/gpu_assets/Advanced%20DX9%20Capabilities%20for%20ATI%20Radeon%20Cards.pdf
a.supports_intz = D3D_OK == D3D->CheckDeviceFormat(
@ -42,27 +42,26 @@ LinearDiskCache g_ps_disk_cache;
static float lastPSconstants[C_COLORMATRIX+16][4];
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[4];
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[4];
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[4];
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ClearZProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_FSAAColorMatrixProgram = 0;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram()
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode)
return s_ColorMatrixProgram;
return s_ColorMatrixProgram[SSAAMode];
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram()
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode)
return s_DepthMatrixProgram;
return s_DepthMatrixProgram[SSAAMode];
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram()
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode)
return s_ColorCopyProgram;
return s_ColorCopyProgram[SSAAMode];
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
@ -70,16 +69,6 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
return s_ClearProgram;
return s_FSAAProgram;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAColorMatrixProgram()
return s_FSAAColorMatrixProgram;
void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
@ -123,7 +112,8 @@ public:
void PixelShaderCache::Init()
char pprog[2048];
//program used for clear screen
char pprog[3072];
sprintf(pprog, "void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 incol0 : COLOR0){\n"
@ -131,61 +121,180 @@ void PixelShaderCache::Init()
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Used for Copy/resolve the color buffer
//1 Sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0.xy);\n"
"in float2 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0);\n"
s_ColorCopyProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//2 samples
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2){\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy))*0.5f;\n"
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 Samples
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4){\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv4.xy))*0.25f;\n"
s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//9 Samples
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4){\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz) + tex2D(samp0,uv4.xy) + tex2D(samp0,uv4.wz) + tex2D(samp0,uv0.xy))/9.0f;\n"
s_ColorCopyProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Color conversion Programs
//1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_ColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//2 samples
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 samples
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//9 samples
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_ColorMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Depth copy programs
//1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_DepthMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float2 uv0 : TEXCOORD0,\n"
"in float2 uv1 : TEXCOORD1,\n"
"in float2 uv2 : TEXCOORD2,\n"
"in float2 uv3 : TEXCOORD3){\n"
"ocol0 = (tex2D(samp0,uv0) + tex2D(samp0,uv1) + tex2D(samp0,uv2) + tex2D(samp0,uv3))*0.25f;\n"
s_FSAAProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//2 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float2 uv0 : TEXCOORD0,\n"
"in float2 uv1 : TEXCOORD1,\n"
"in float2 uv2 : TEXCOORD2,\n"
"in float2 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4){\n"
"float4 texcol = float4(0.0f,0.0f,0.0f,0.0f);\n"
"texcol = (tex2D(samp0,float2(clamp(uv0.x,uv4.x,uv4.z),clamp(uv0.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv1.x,uv4.x,uv4.z),clamp(uv1.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv2.x,uv4.x,uv4.z),clamp(uv2.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv3.x,uv4.x,uv4.z),clamp(uv3.y,uv4.y,uv4.w))))*0.25f;\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_FSAAColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//9 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
s_DepthMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
@ -213,18 +322,18 @@ void PixelShaderCache::Clear()
void PixelShaderCache::Shutdown()
if (s_ColorMatrixProgram) s_ColorMatrixProgram->Release();
s_ColorMatrixProgram = NULL;
if (s_ColorCopyProgram) s_ColorCopyProgram->Release();
s_ColorCopyProgram = NULL;
if (s_DepthMatrixProgram) s_DepthMatrixProgram->Release();
s_DepthMatrixProgram = NULL;
for(int i = 0;i<4;i++)
if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release();
s_ColorMatrixProgram[i] = NULL;
if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release();
s_ColorCopyProgram[i] = NULL;
if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release();
s_DepthMatrixProgram[i] = NULL;
if (s_ClearProgram) s_ClearProgram->Release();
s_ClearProgram = NULL;
if (s_FSAAProgram) s_FSAAProgram->Release();
s_FSAAProgram = NULL;
if (s_FSAAColorMatrixProgram) s_FSAAColorMatrixProgram->Release();
s_FSAAColorMatrixProgram = NULL;
@ -62,12 +62,10 @@ public:
static void Shutdown();
static bool SetShader(bool dstAlpha);
static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram();
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram();
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram();
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetClearProgram();
static LPDIRECT3DPIXELSHADER9 GetFSAAColorMatrixProgram();
#if defined(_DEBUG) || defined(DEBUGFAST)
static std::string GetCurrentShaderCode();
@ -259,7 +259,7 @@ bool Renderer::Init()
int fullScreenRes, w_temp, h_temp;
s_blendMode = 0;
// Anti-aliasing hasn't been implemented yet
// Multisample Anti-aliasing hasn't been implemented yet
int backbuffer_ms_mode = 0; // g_ActiveConfig.iMultisampleMode;
sscanf(g_Config.cFSResolution, "%dx%d", &w_temp, &h_temp);
@ -283,7 +283,7 @@ bool Renderer::Init()
// TODO: Grab target width from configured resolution?
s_target_width = s_backbuffer_width;
s_target_height = s_backbuffer_height * ((float)EFB_HEIGHT / 480.0f);
s_LastAA = g_ActiveConfig.iMultisampleMode;
s_LastAA = (g_ActiveConfig.iMultisampleMode > 3)?0:g_ActiveConfig.iMultisampleMode;
switch (s_LastAA)
@ -295,6 +295,10 @@ bool Renderer::Init()
s_target_width *= 2;
s_target_height *= 2;
case 3:
s_target_width *= 3;
s_target_height *= 3;
@ -580,14 +584,10 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc)
if(g_ActiveConfig.iMultisampleMode > 0 )
int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode;
D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetColorCopyProgram(SSAAMode),(SSAAMode != 0)?VertexShaderCache::GetFSAAVertexShader():VertexShaderCache::GetSimpleVertexShader());
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
@ -878,7 +878,13 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y)
D3D::drawShadedTexQuad(read_texture,&RectToLock, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(),(BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram():PixelShaderCache::GetDepthMatrixProgram(),VertexShaderCache::GetSimpleVertexShader());
Renderer::GetFullTargetWidth() ,
(BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram(0):PixelShaderCache::GetDepthMatrixProgram(0),
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
@ -1159,6 +1165,10 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
s_target_width *= 2;
s_target_height *= 2;
case 3:
s_target_width *= 3;
s_target_height *= 3;
@ -156,7 +156,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width,
if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures)
texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0);
if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2))
// WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up)
@ -166,10 +166,15 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width,
// each other stored in a single texture, and uses the palette to make different characters
// visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
// we must make sure that texture with different tluts get different IDs.
u64 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128);
texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
u64 tlutHash = TexDecoder_GetFullHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format));
texHash ^= tlutHash;
if (g_ActiveConfig.bSafeTextureCache)
texID ^= tlutHash;
texID = texID ^ ((u32)(tlutHash & 0xFFFFFFFF)) ^ ((u32)((tlutHash >> 32) & 0xFFFFFFFF));
texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
if (g_ActiveConfig.bSafeTextureCache)
hash_value = texHash;
@ -328,9 +333,28 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo
int tex_w = (abs(source_rect.GetWidth()) >> bScaleByHalf);
int tex_h = (abs(source_rect.GetHeight()) >> bScaleByHalf);
//compensate the texture grow if multisample is enabled to conserve memory usage
float MultiSampleCompensation = 1.0f;
if(g_ActiveConfig.iMultisampleMode > 0 && g_ActiveConfig.iMultisampleMode < 4)
switch (g_ActiveConfig.iMultisampleMode)
case 1:
MultiSampleCompensation = 2.0f/3.0f;
case 2:
MultiSampleCompensation = 0.5f;
case 3:
MultiSampleCompensation = 1.0f/3.0f;
int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * MultiSampleCompensation * tex_w)):tex_w;
int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * MultiSampleCompensation * tex_h)):tex_h;
int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * tex_w)):tex_w;
int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * tex_h)):tex_h;
TexCache::iterator iter;
iter = textures.find(address);
@ -525,23 +549,15 @@ have_texture:
D3DFORMAT bformat = FBManager::GetEFBDepthRTSurfaceFormat();
if(!bFromZBuffer && g_ActiveConfig.iMultisampleMode > 0)
int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode;
Renderer::GetFullTargetWidth() ,
Renderer::GetFullTargetWidth() ,
((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(): PixelShaderCache::GetColorMatrixProgram(),
((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(SSAAMode): PixelShaderCache::GetColorMatrixProgram(SSAAMode),
(SSAAMode != 0)? VertexShaderCache::GetFSAAVertexShader() : VertexShaderCache::GetSimpleVertexShader());
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
@ -162,11 +162,13 @@ void VertexShaderCache::Init()
"float4 vColor0 : COLOR0;\n"
"float2 vTexCoord : TEXCOORD0;\n"
"void main(out VSOUTPUT OUT,in float4 inPosition : POSITION,in float2 inTEX0 : TEXCOORD0,in float4 inColor0: COLOR0)\n"
"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float4 inColor0: COLOR0)\n"
"OUT.vPosition = inPosition;\n"
"OUT.vColor0 = inColor0;\n"
"OUT.vTexCoord = inTEX0;\n"
"return OUT;\n"
SimpleVertexShader = D3D::CompileAndCreateVertexShader(vSimpleProg, (int)strlen(vSimpleProg));
@ -175,20 +177,24 @@ void VertexShaderCache::Init()
sprintf(vFSAAProg, "struct VSOUTPUT\n"
"float4 vPosition : POSITION;\n"
"float2 vTexCoord : TEXCOORD0;\n"
"float2 vTexCoord1 : TEXCOORD1;\n"
"float2 vTexCoord2 : TEXCOORD2;\n"
"float2 vTexCoord3 : TEXCOORD3;\n"
"float4 vTexCoord : TEXCOORD0;\n"
"float4 vTexCoord1 : TEXCOORD1;\n"
"float4 vTexCoord2 : TEXCOORD2;\n"
"float4 vTexCoord3 : TEXCOORD3;\n"
"float4 vTexCoord4 : TEXCOORD4;\n"
"float4 vTexCoord5 : TEXCOORD5;\n"
"void main( out VSOUTPUT OUT,in float4 inPosition : POSITION,in float2 inTEX0 : TEXCOORD0,in float2 inTEX1 : TEXCOORD1,in float4 inTEX2 : TEXCOORD2)\n"
"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n"
"OUT.vPosition = inPosition;\n"
"OUT.vTexCoord = inTEX0 + (float2(-0.1830127f,-0.6830127f) * inTEX1);\n"
"OUT.vTexCoord1 = inTEX0 + (float2(-0.6830127f, 0.1830127f) * inTEX1);\n"
"OUT.vTexCoord2 = inTEX0 + (float2( 0.6830127f,-0.1830127f) * inTEX1);\n"
"OUT.vTexCoord3 = inTEX0 + (float2( 0.1830127f, 0.6830127f) * inTEX1);\n"
"OUT.vTexCoord4 = inTEX2;\n"
"OUT.vTexCoord = inTEX0.xyyx;\n"
"OUT.vTexCoord1 = inTEX0.xyyx + (float4(-0.1830127f,-0.6830127f,-0.7071068f,-1.2247449f) * inTEX1.xyyx);\n"
"OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.6830127f, 0.1830127f, 0.7071068f, 1.2247449f) * inTEX1.xyyx);\n"
"OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.6830127f,-0.1830127f,-1.2247449f, 0.7071068f) * inTEX1.xyyx);\n"
"OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.1830127f, 0.6830127f, 1.2247449f,-0.7071068f) * inTEX1.xyyx);\n"
"OUT.vTexCoord5 = inTEX2;\n"
"return OUT;\n"
FSAAVertexShader = D3D::CompileAndCreateVertexShader(vFSAAProg, (int)strlen(vFSAAProg));
@ -1081,9 +1081,9 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
g_Config.iSaveTargetId = 0;
bool last_copy_efb_to_ram = !g_ActiveConfig.bCopyEFBToTexture;
bool last_copy_efb_to_Texture = g_ActiveConfig.bCopyEFBToTexture;
if (last_copy_efb_to_ram != (!g_ActiveConfig.bCopyEFBToTexture))
if (last_copy_efb_to_Texture != g_ActiveConfig.bCopyEFBToTexture)
// For testing zbuffer targets.
@ -265,7 +265,6 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width
u32 FullFormat = (tex_format | (tlutfmt << 16));
if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures)
texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); // remove last arg
if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2))
// WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up)
@ -275,12 +274,17 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width
// each other stored in a single texture, and uses the palette to make different characters
// visible or invisible. Thus, unless we want to recreate the textures for every drawn character,
// we must make sure that texture with different tluts get different IDs.
u64 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128);
texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
u64 tlutHash = TexDecoder_GetFullHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format));
texHash ^= tlutHash;
if (g_ActiveConfig.bSafeTextureCache)
texID ^= tlutHash;
texID = texID ^ ((u32)(tlutHash & 0xFFFFFFFF)) ^ ((u32)((tlutHash >> 32) & 0xFFFFFFFF));
//DebugLog("addr: %08x | texID: %08x | texHash: %08x", address, texID, hash_value);
texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format));
if (g_ActiveConfig.bSafeTextureCache)
hash_value = texHash;
Reference in New Issue
Block a user