From e93e777ffb941af612805ee438db5b8c0e5b5c73 Mon Sep 17 00:00:00 2001 From: Rodolfo Osvaldo Bogado Date: Mon, 8 Feb 2010 23:23:04 +0000 Subject: [PATCH] second try to implement a more correct safe texture cache, implemented full hashing of tlut textures as they are the more problematic, this should solve virtually all the problems with characters in all the games that have them. sorry to tell but this will bring a speed drop, so let you decide if this change stay or not.( used the fastest open source hash algorithm i know) do not apply full hashing to other format because it kills the performance. for popular request added 9x SSAA believe me will kill your graphic card even if is the best but the image quality is exceptional. as always please test and let me know the results. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5034 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/CommonFuncs.h | 10 + .../Core/VideoCommon/Src/TextureDecoder.cpp | 91 +++---- Source/Core/VideoCommon/Src/TextureDecoder.h | 4 +- .../Plugins/Plugin_VideoDX9/Src/D3DBase.cpp | 5 +- .../Plugin_VideoDX9/Src/PixelShaderCache.cpp | 253 +++++++++++++----- .../Plugin_VideoDX9/Src/PixelShaderCache.h | 10 +- Source/Plugins/Plugin_VideoDX9/Src/Render.cpp | 32 ++- .../Plugin_VideoDX9/Src/TextureCache.cpp | 58 ++-- .../Plugin_VideoDX9/Src/VertexShaderCache.cpp | 30 ++- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 4 +- .../Plugin_VideoOGL/Src/TextureMngr.cpp | 10 +- 11 files changed, 332 insertions(+), 175 deletions(-) diff --git a/Source/Core/Common/Src/CommonFuncs.h b/Source/Core/Common/Src/CommonFuncs.h index a11169d27a..953da5bb61 100644 --- a/Source/Core/Common/Src/CommonFuncs.h +++ b/Source/Core/Common/Src/CommonFuncs.h @@ -51,11 +51,21 @@ inline u32 _rotl(u32 x, int shift) { return (x << shift) | (x >> (32 - shift)); } +inline u64 _rotl64(u64 x, unsigned int shift){ + unsigned int n = shift % 64; + return (x << n) | (x >> (64 - n)); +} + inline u32 _rotr(u32 x, int shift) { shift &= 31; if (!shift) return x; return (x >> shift) | (x << (32 - shift)); } + +inline u64 _rotr64(u64 x, unsigned int shift){ + unsigned int n = shift % 64; + return (x >> n) | (x << (64 - n)); +} #define SLEEP(x) usleep(x*1000) #else // WIN32 // Function Cross-Compatibility diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index c6db5582bb..1cf39c17ea 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -88,57 +88,58 @@ int TexDecoder_GetTextureSizeInBytes(int width, int height, int format) return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2; } -/*u64 TexDecoder_GetTlutHash(const u8* src, int len) +u64 TexDecoder_GetFullHash(const u8 *key, int len, u64 seed) { - //char str[40000], st[20]; str[0]='\0';for (int i=0;i> r; + k *= m; + + h ^= k; + h *= m; + } + + const u8 * data2 = (const u8*)data; + + switch(len & 7) + { + case 7: h ^= u64(data2[6]) << 48; + case 6: h ^= u64(data2[5]) << 40; + case 5: h ^= u64(data2[4]) << 32; + case 4: h ^= u64(data2[3]) << 24; + case 3: h ^= u64(data2[2]) << 16; + case 2: h ^= u64(data2[1]) << 8; + case 1: h ^= u64(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +u64 TexDecoder_GetFastHash(const u8 *src, int len, u64 seed) { - int len = TexDecoder_GetTextureSizeInBytes(width, height, texformat); - u64 hash = seed ? seed : 0xbeefbabe1337c0de; - int step = len / 29 / 8; + u64 hash = seed ? seed : 0x1337c0debeefbabe; + int step = (len / 8) / 37; if (!step) step = 1; for (int i = 0; i < len / 8; i += step) { - hash = _rotl(hash, 17) ^ ((u64 *)src)[i]; - } - return hash; -} -*/ - -u32 TexDecoder_GetTlutHash(const u8* src, int len) -{ - //char str[40000], st[20]; str[0]='\0';for (int i=0;iCheckDeviceMultiSampleType( i, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, TRUE, D3DMULTISAMPLE_2_SAMPLES, &qlevels)) @@ -208,7 +211,7 @@ void Enumerate() } } } - + */ // Determine if INTZ is supported. Code from ATI's doc. // http://developer.amd.com/gpu_assets/Advanced%20DX9%20Capabilities%20for%20ATI%20Radeon%20Cards.pdf a.supports_intz = D3D_OK == D3D->CheckDeviceFormat( diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index a3420d4e3e..897ea27f48 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -42,27 +42,26 @@ LinearDiskCache g_ps_disk_cache; static float lastPSconstants[C_COLORMATRIX+16][4]; -static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram = 0; +static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[4]; +static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[4]; +static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[4]; static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_ClearZProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_FSAAProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_FSAAColorMatrixProgram = 0; -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram() + + +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode) { - return s_ColorMatrixProgram; + return s_ColorMatrixProgram[SSAAMode]; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram() +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode) { - return s_DepthMatrixProgram; + return s_DepthMatrixProgram[SSAAMode]; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram() +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode) { - return s_ColorCopyProgram; + return s_ColorCopyProgram[SSAAMode]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() @@ -70,16 +69,6 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() return s_ClearProgram; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAProgram() -{ - return s_FSAAProgram; -} - -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAColorMatrixProgram() -{ - return s_FSAAColorMatrixProgram; -} - void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 || @@ -123,7 +112,8 @@ public: void PixelShaderCache::Init() { - char pprog[2048]; + //program used for clear screen + char pprog[3072]; sprintf(pprog, "void main(\n" "out float4 ocol0 : COLOR0,\n" " in float4 incol0 : COLOR0){\n" @@ -131,61 +121,180 @@ void PixelShaderCache::Init() "}\n"); s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + //Used for Copy/resolve the color buffer + //1 Sample sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0){\n" - "ocol0 = tex2D(samp0,uv0.xy);\n" + "in float2 uv0 : TEXCOORD0){\n" + "ocol0 = tex2D(samp0,uv0);\n" "}\n"); - s_ColorCopyProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float4 uv0 : TEXCOORD0){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_ColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float4 uv0 : TEXCOORD0){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n" - "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" - "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_DepthMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + //2 samples sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" - "in float2 uv0 : TEXCOORD0,\n" - "in float2 uv1 : TEXCOORD1,\n" - "in float2 uv2 : TEXCOORD2,\n" - "in float2 uv3 : TEXCOORD3){\n" - "ocol0 = (tex2D(samp0,uv0) + tex2D(samp0,uv1) + tex2D(samp0,uv2) + tex2D(samp0,uv3))*0.25f;\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy))*0.5f;\n" "}\n"); - s_FSAAProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + //4 Samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv4.xy))*0.25f;\n" + "}\n"); + s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //9 Samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz) + tex2D(samp0,uv4.xy) + tex2D(samp0,uv4.wz) + tex2D(samp0,uv0.xy))/9.0f;\n" + "}\n"); + s_ColorCopyProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //Color conversion Programs + //1 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + " in float2 uv0 : TEXCOORD0){\n" + "float4 texcol = tex2D(samp0,uv0);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //2 samples sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" - "in float2 uv0 : TEXCOORD0,\n" - "in float2 uv1 : TEXCOORD1,\n" - "in float2 uv2 : TEXCOORD2,\n" - "in float2 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4){\n" - "float4 texcol = float4(0.0f,0.0f,0.0f,0.0f);\n" - "texcol = (tex2D(samp0,float2(clamp(uv0.x,uv4.x,uv4.z),clamp(uv0.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv1.x,uv4.x,uv4.z),clamp(uv1.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv2.x,uv4.x,uv4.z),clamp(uv2.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv3.x,uv4.x,uv4.z),clamp(uv3.y,uv4.y,uv4.w))))*0.25f;\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); - s_FSAAColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //4 samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //9 samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_ColorMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //Depth copy programs + //1 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + " in float2 uv0 : TEXCOORD0){\n" + "float4 texcol = tex2D(samp0,uv0);\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //2 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //4 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //9 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); Clear(); @@ -213,18 +322,18 @@ void PixelShaderCache::Clear() void PixelShaderCache::Shutdown() { - if (s_ColorMatrixProgram) s_ColorMatrixProgram->Release(); - s_ColorMatrixProgram = NULL; - if (s_ColorCopyProgram) s_ColorCopyProgram->Release(); - s_ColorCopyProgram = NULL; - if (s_DepthMatrixProgram) s_DepthMatrixProgram->Release(); - s_DepthMatrixProgram = NULL; + for(int i = 0;i<4;i++) + { + if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release(); + s_ColorMatrixProgram[i] = NULL; + if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release(); + s_ColorCopyProgram[i] = NULL; + if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release(); + s_DepthMatrixProgram[i] = NULL; + } if (s_ClearProgram) s_ClearProgram->Release(); s_ClearProgram = NULL; - if (s_FSAAProgram) s_FSAAProgram->Release(); - s_FSAAProgram = NULL; - if (s_FSAAColorMatrixProgram) s_FSAAColorMatrixProgram->Release(); - s_FSAAColorMatrixProgram = NULL; + Clear(); g_ps_disk_cache.Sync(); g_ps_disk_cache.Close(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index 5cf64e1a3a..40ace0b67c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -62,12 +62,10 @@ public: static void Shutdown(); static bool SetShader(bool dstAlpha); static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); - static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(); - static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(); - static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(); - static LPDIRECT3DPIXELSHADER9 GetClearProgram(); - static LPDIRECT3DPIXELSHADER9 GetFSAAProgram(); - static LPDIRECT3DPIXELSHADER9 GetFSAAColorMatrixProgram(); + static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode); + static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode); + static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode); + static LPDIRECT3DPIXELSHADER9 GetClearProgram(); #if defined(_DEBUG) || defined(DEBUGFAST) static std::string GetCurrentShaderCode(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index e6f86b95f6..43a7ec7f4b 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -259,7 +259,7 @@ bool Renderer::Init() UpdateActiveConfig(); int fullScreenRes, w_temp, h_temp; s_blendMode = 0; - // Anti-aliasing hasn't been implemented yet + // Multisample Anti-aliasing hasn't been implemented yet int backbuffer_ms_mode = 0; // g_ActiveConfig.iMultisampleMode; sscanf(g_Config.cFSResolution, "%dx%d", &w_temp, &h_temp); @@ -283,7 +283,7 @@ bool Renderer::Init() // TODO: Grab target width from configured resolution? s_target_width = s_backbuffer_width; s_target_height = s_backbuffer_height * ((float)EFB_HEIGHT / 480.0f); - s_LastAA = g_ActiveConfig.iMultisampleMode; + s_LastAA = (g_ActiveConfig.iMultisampleMode > 3)?0:g_ActiveConfig.iMultisampleMode; switch (s_LastAA) { @@ -295,6 +295,10 @@ bool Renderer::Init() s_target_width *= 2; s_target_height *= 2; break; + case 3: + s_target_width *= 3; + s_target_height *= 3; + break; default: break; }; @@ -580,14 +584,10 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc) D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); - if(g_ActiveConfig.iMultisampleMode > 0 ) - { - D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetFSAAProgram(),VertexShaderCache::GetFSAAVertexShader()); - } - else - { - D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader()); - } + int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode; + + D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetColorCopyProgram(SSAAMode),(SSAAMode != 0)?VertexShaderCache::GetFSAAVertexShader():VertexShaderCache::GetSimpleVertexShader()); + D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); @@ -878,7 +878,13 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); - D3D::drawShadedTexQuad(read_texture,&RectToLock, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(),(BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram():PixelShaderCache::GetDepthMatrixProgram(),VertexShaderCache::GetSimpleVertexShader()); + D3D::drawShadedTexQuad( + read_texture, + &RectToLock, + Renderer::GetFullTargetWidth() , + Renderer::GetFullTargetHeight(), + (BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram(0):PixelShaderCache::GetDepthMatrixProgram(0), + VertexShaderCache::GetSimpleVertexShader()); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); @@ -1159,6 +1165,10 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) s_target_width *= 2; s_target_height *= 2; break; + case 3: + s_target_width *= 3; + s_target_height *= 3; + break; default: break; }; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 660edd3b75..1cc4d0832a 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -156,7 +156,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); + if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -166,10 +166,15 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); + texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); + u64 tlutHash = TexDecoder_GetFullHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format)); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) - texID ^= tlutHash; + texID = texID ^ ((u32)(tlutHash & 0xFFFFFFFF)) ^ ((u32)((tlutHash >> 32) & 0xFFFFFFFF)); + } + else + { + texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); } if (g_ActiveConfig.bSafeTextureCache) hash_value = texHash; @@ -328,9 +333,28 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo int tex_w = (abs(source_rect.GetWidth()) >> bScaleByHalf); int tex_h = (abs(source_rect.GetHeight()) >> bScaleByHalf); - - int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * tex_w)):tex_w; - int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * tex_h)):tex_h; + //compensate the texture grow if multisample is enabled to conserve memory usage + float MultiSampleCompensation = 1.0f; + if(g_ActiveConfig.iMultisampleMode > 0 && g_ActiveConfig.iMultisampleMode < 4) + { + switch (g_ActiveConfig.iMultisampleMode) + { + case 1: + MultiSampleCompensation = 2.0f/3.0f; + break; + case 2: + MultiSampleCompensation = 0.5f; + break; + case 3: + MultiSampleCompensation = 1.0f/3.0f; + break; + default: + break; + }; + } + int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * MultiSampleCompensation * tex_w)):tex_w; + int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * MultiSampleCompensation * tex_h)):tex_h; + TexCache::iterator iter; LPDIRECT3DTEXTURE9 tex; iter = textures.find(address); @@ -525,23 +549,15 @@ have_texture: D3DFORMAT bformat = FBManager::GetEFBDepthRTSurfaceFormat(); - if(!bFromZBuffer && g_ActiveConfig.iMultisampleMode > 0) - { - D3D::drawShadedTexQuad(read_texture, - &sourcerect, - Renderer::GetFullTargetWidth() , - Renderer::GetFullTargetHeight(), - PixelShaderCache::GetFSAAColorMatrixProgram(), - VertexShaderCache::GetFSAAVertexShader()); - } - else - { - D3D::drawShadedTexQuad(read_texture,&sourcerect, + int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode; + D3D::drawShadedTexQuad( + read_texture, + &sourcerect, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(), - ((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(): PixelShaderCache::GetColorMatrixProgram(), - VertexShaderCache::GetSimpleVertexShader()); - } + ((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(SSAAMode): PixelShaderCache::GetColorMatrixProgram(SSAAMode), + (SSAAMode != 0)? VertexShaderCache::GetFSAAVertexShader() : VertexShaderCache::GetSimpleVertexShader()); + D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index b888e5c8ff..925ef8d790 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -162,11 +162,13 @@ void VertexShaderCache::Init() "float4 vColor0 : COLOR0;\n" "float2 vTexCoord : TEXCOORD0;\n" "};\n" - "void main(out VSOUTPUT OUT,in float4 inPosition : POSITION,in float2 inTEX0 : TEXCOORD0,in float4 inColor0: COLOR0)\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float4 inColor0: COLOR0)\n" "{\n" + "VSOUTPUT OUT;\n" "OUT.vPosition = inPosition;\n" "OUT.vColor0 = inColor0;\n" - "OUT.vTexCoord = inTEX0;\n" + "OUT.vTexCoord = inTEX0;\n" + "return OUT;\n" "}\n"); SimpleVertexShader = D3D::CompileAndCreateVertexShader(vSimpleProg, (int)strlen(vSimpleProg)); @@ -175,20 +177,24 @@ void VertexShaderCache::Init() sprintf(vFSAAProg, "struct VSOUTPUT\n" "{\n" "float4 vPosition : POSITION;\n" - "float2 vTexCoord : TEXCOORD0;\n" - "float2 vTexCoord1 : TEXCOORD1;\n" - "float2 vTexCoord2 : TEXCOORD2;\n" - "float2 vTexCoord3 : TEXCOORD3;\n" + "float4 vTexCoord : TEXCOORD0;\n" + "float4 vTexCoord1 : TEXCOORD1;\n" + "float4 vTexCoord2 : TEXCOORD2;\n" + "float4 vTexCoord3 : TEXCOORD3;\n" "float4 vTexCoord4 : TEXCOORD4;\n" + "float4 vTexCoord5 : TEXCOORD5;\n" "};\n" - "void main( out VSOUTPUT OUT,in float4 inPosition : POSITION,in float2 inTEX0 : TEXCOORD0,in float2 inTEX1 : TEXCOORD1,in float4 inTEX2 : TEXCOORD2)\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n" "{\n" + "VSOUTPUT OUT;" "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0 + (float2(-0.1830127f,-0.6830127f) * inTEX1);\n" - "OUT.vTexCoord1 = inTEX0 + (float2(-0.6830127f, 0.1830127f) * inTEX1);\n" - "OUT.vTexCoord2 = inTEX0 + (float2( 0.6830127f,-0.1830127f) * inTEX1);\n" - "OUT.vTexCoord3 = inTEX0 + (float2( 0.1830127f, 0.6830127f) * inTEX1);\n" - "OUT.vTexCoord4 = inTEX2;\n" + "OUT.vTexCoord = inTEX0.xyyx;\n" + "OUT.vTexCoord1 = inTEX0.xyyx + (float4(-0.1830127f,-0.6830127f,-0.7071068f,-1.2247449f) * inTEX1.xyyx);\n" + "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.6830127f, 0.1830127f, 0.7071068f, 1.2247449f) * inTEX1.xyyx);\n" + "OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.6830127f,-0.1830127f,-1.2247449f, 0.7071068f) * inTEX1.xyyx);\n" + "OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.1830127f, 0.6830127f, 1.2247449f,-0.7071068f) * inTEX1.xyyx);\n" + "OUT.vTexCoord5 = inTEX2;\n" + "return OUT;\n" "}\n"); FSAAVertexShader = D3D::CompileAndCreateVertexShader(vFSAAProg, (int)strlen(vFSAAProg)); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 550c0373e1..5a1e28efe3 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -1081,9 +1081,9 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) GL_REPORT_ERRORD(); g_Config.iSaveTargetId = 0; - bool last_copy_efb_to_ram = !g_ActiveConfig.bCopyEFBToTexture; + bool last_copy_efb_to_Texture = g_ActiveConfig.bCopyEFBToTexture; UpdateActiveConfig(); - if (last_copy_efb_to_ram != (!g_ActiveConfig.bCopyEFBToTexture)) + if (last_copy_efb_to_Texture != g_ActiveConfig.bCopyEFBToTexture) TextureMngr::ClearRenderTargets(); // For testing zbuffer targets. diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index 3460d69b0b..4aac7e1d6d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -265,7 +265,6 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width u32 FullFormat = (tex_format | (tlutfmt << 16)); if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); // remove last arg if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -275,12 +274,17 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); + texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); + u64 tlutHash = TexDecoder_GetFullHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format)); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) - texID ^= tlutHash; + texID = texID ^ ((u32)(tlutHash & 0xFFFFFFFF)) ^ ((u32)((tlutHash >> 32) & 0xFFFFFFFF)); //DebugLog("addr: %08x | texID: %08x | texHash: %08x", address, texID, hash_value); } + else + { + texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); + } if (g_ActiveConfig.bSafeTextureCache) hash_value = texHash; }