dolphin/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp
Rodolfo Osvaldo Bogado 9e2bbec47f a lot of modifications here :)
first fixed scaling when updating backbuffer to make it friendly with encoders, now frame dumping must work without errors in any codec.
clean screenshot and frame dumping code now is more correct, faster and stable.
improve safe texture cache, improving the distribution of the hash algorithm, including tlut hash in the final hash of the texture, and making use of a 64 bit hash to make it more accurate.
clean a lot of code and corrected some missused vertex formats when drawing full screen quads.
and biggest change last:
implemented pseudo antialiasing: a image post-process algorithm that mimics antialiazing and is fare more easier to implement in this scenario.
you can change the intensity of the effect changing the values of the antialiasing combo. the right value depends on the game.
for example mkwii looks awesome with 8x.
please try all the changes and let me know the results.
if something is broken, please let me know and will fix it asap.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5000 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-02-03 03:52:50 +00:00

369 lines
12 KiB
C++

// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "FileUtil.h"
#include "LinearDiskCache.h"
#include "Globals.h"
#include "D3DBase.h"
#include "D3DShader.h"
#include "Statistics.h"
#include "Utils.h"
#include "VideoConfig.h"
#include "PixelShaderGen.h"
#include "PixelShaderManager.h"
#include "PixelShaderCache.h"
#include "VertexLoader.h"
#include "BPMemory.h"
#include "XFMemory.h"
#include "ImageWrite.h"
#include "Debugger/Debugger.h"
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
LinearDiskCache g_ps_disk_cache;
static float lastPSconstants[C_COLORMATRIX+16][4];
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_ClearZProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_FSAAProgram = 0;
static LPDIRECT3DPIXELSHADER9 s_FSAAColorMatrixProgram = 0;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram()
{
return s_ColorMatrixProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram()
{
return s_DepthMatrixProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram()
{
return s_ColorCopyProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
{
return s_ClearProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAProgram()
{
return s_FSAAProgram;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAColorMatrixProgram()
{
return s_FSAAColorMatrixProgram;
}
void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
{
if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4 )
{
const float f[4] = {f1, f2, f3, f4};
D3D::dev->SetPixelShaderConstantF(const_number, f, 1);
lastPSconstants[const_number][0] = f1;
lastPSconstants[const_number][1] = f2;
lastPSconstants[const_number][2] = f3;
lastPSconstants[const_number][3] = f4;
}
}
void SetPSConstant4fv(int const_number, const float *f)
{
if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] ||
lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3] )
{
D3D::dev->SetPixelShaderConstantF(const_number, f, 1);
lastPSconstants[const_number][0] = f[0];
lastPSconstants[const_number][1] = f[1];
lastPSconstants[const_number][2] = f[2];
lastPSconstants[const_number][3] = f[3];
}
}
class PixelShaderCacheInserter : public LinearDiskCacheReader {
public:
void Read(const u8 *key, int key_size, const u8 *value, int value_size)
{
PIXELSHADERUID uid;
if (key_size != sizeof(uid)) {
ERROR_LOG(VIDEO, "Wrong key size in pixel shader cache");
return;
}
memcpy(&uid, key, key_size);
PixelShaderCache::InsertByteCode(uid, value, value_size, false);
}
};
void PixelShaderCache::Init()
{
char pprog[1024];
sprintf(pprog, "void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 incol0 : COLOR0){\n"
"ocol0 = incol0;\n"
"}\n");
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0.xy);\n"
"}\n");
s_ColorCopyProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_ColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 uv0 : TEXCOORD0){\n"
"float4 texcol = tex2D(samp0,uv0.xy);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_DepthMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform sampler samp1 : register(s1);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 incol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float2 uv5 : TEXCOORD5,\n"
"in float2 uv6 : TEXCOORD6,\n"
"in float2 uv7 : TEXCOORD7){\n"
"float3 P1 = float3(tex2D(samp1,uv0.xy).x,tex2D(samp1,uv1.xy).x,tex2D(samp1,uv2.xy).x);\n"
"float3 P2 = float3(tex2D(samp1,uv3.xy).x,tex2D(samp1,uv4.xy).x,tex2D(samp1,uv5).x);\n"
"float3 P3 = float3(P1.z,tex2D(samp1,uv6).x,P2.z);\n"
"float3 P4 = float3(P1.x,tex2D(samp1,uv7).r,P2.x);\n"
"float3 P5 = float3(1.0f,2.0f,1.0f);\n"
"float3 T = float3(dot(P3,P5) - dot(P4,P5),dot(P1,P5) - dot(P2,P5),0.0f);\n"
"if (dot(T,T) > incol0.x)\n"
"{\n"
"ocol0 = (tex2D(samp0,uv0.wz) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.wz))*0.25f;// + tex2D(samp0,uv4.xy) + tex2D(samp0,uv5) + tex2D(samp0,uv6) + tex2D(samp0,uv7) + tex2D(samp0,uv4.wz)) / 9.0f;\n"
"} else {\n"
"ocol0 = tex2D(samp0,uv4.wz);\n"
"}\n"
"}\n");
s_FSAAProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform sampler samp1 : register(s1);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n"
"out float4 ocol0 : COLOR0,\n"
" in float4 incol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float2 uv5 : TEXCOORD5,\n"
"in float2 uv6 : TEXCOORD6,\n"
"in float2 uv7 : TEXCOORD7){\n"
"float3 P1 = float3(tex2D(samp1,uv0.xy).x,tex2D(samp1,uv1.xy).x,tex2D(samp1,uv2.xy).x);\n"
"float3 P2 = float3(tex2D(samp1,uv3.xy).x,tex2D(samp1,uv4.xy).x,tex2D(samp1,uv5).x);\n"
"float3 P3 = float3(P1.z,tex2D(samp1,uv6).x,P2.z);\n"
"float3 P4 = float3(P1.x,tex2D(samp1,uv7).r,P2.x);\n"
"float3 P5 = float3(1.0f,2.0f,1.0f);\n"
"float3 T = float3(dot(P3,P5) - dot(P4,P5),dot(P1,P5) - dot(P2,P5),0.0f);\n"
"float4 texcol = float4(0.0f,0.0f,0.0f,0.0f);\n"
"if (dot(T,T) > incol0.x)\n"
"{\n"
"texcol = (tex2D(samp0,uv0.wz) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.wz))*0.25f;// + tex2D(samp0,uv4.xy) + tex2D(samp0,uv5) + tex2D(samp0,uv6) + tex2D(samp0,uv7) + tex2D(samp0,uv4.wz)) / 9.0f;\n"
"} else {\n"
"texcol = tex2D(samp0,uv4.wz);\n"
"}\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX);
s_FSAAColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
Clear();
if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));
char cache_filename[MAX_PATH];
sprintf(cache_filename, "%s%s-ps.cache", File::GetUserPath(D_SHADERCACHE_IDX), globals->unique_id);
PixelShaderCacheInserter inserter;
int read_items = g_ps_disk_cache.OpenAndRead(cache_filename, &inserter);
}
// ONLY to be used during shutdown.
void PixelShaderCache::Clear()
{
PSCache::iterator iter = PixelShaders.begin();
for (; iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
for (int i = 0; i < (C_COLORMATRIX + 16) * 4; i++)
lastPSconstants[i / 4][i % 4] = -100000000.0f;
memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid));
}
void PixelShaderCache::Shutdown()
{
if (s_ColorMatrixProgram) s_ColorMatrixProgram->Release();
s_ColorMatrixProgram = NULL;
if (s_ColorCopyProgram) s_ColorCopyProgram->Release();
s_ColorCopyProgram = NULL;
if (s_DepthMatrixProgram) s_DepthMatrixProgram->Release();
s_DepthMatrixProgram = NULL;
if (s_ClearProgram) s_ClearProgram->Release();
s_ClearProgram = NULL;
if (s_FSAAProgram) s_FSAAProgram->Release();
s_FSAAProgram = NULL;
if (s_FSAAColorMatrixProgram) s_FSAAColorMatrixProgram->Release();
s_FSAAColorMatrixProgram = NULL;
Clear();
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();
}
bool PixelShaderCache::SetShader(bool dstAlpha)
{
PIXELSHADERUID uid;
GetPixelShaderId(&uid, PixelShaderManager::GetTextureMask(), dstAlpha);
// Is the shader already set?
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount)
{
PSCache::const_iterator iter = PixelShaders.find(uid);
if (iter != PixelShaders.end() && iter->second.shader)
return true; // Sure, we're done.
else
return false; // ?? something is wrong.
}
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID));
// Is the shader already in the cache?
PSCache::iterator iter;
iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
iter->second.frameCount = frameCount;
const PSCacheEntry &entry = iter->second;
last_entry = &entry;
DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
if (entry.shader)
{
D3D::SetPixelShader(entry.shader);
return true;
}
else
return false;
}
// OK, need to generate and compile it.
const char *code = GeneratePixelShaderCode(PixelShaderManager::GetTextureMask(), dstAlpha, 2);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
static int counter = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX), counter++);
SaveData(szTemp, code);
}
#endif
u8 *bytecode = 0;
int bytecodelen = 0;
if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) {
if (g_ActiveConfig.bShowShaderErrors)
{
PanicAlert("Failed to compile Pixel Shader:\n\n%s", code);
}
return false;
}
// Here we have the UID and the byte code. Insert it into the disk cache.
g_ps_disk_cache.Append((u8 *)&uid, sizeof(uid), bytecode, bytecodelen);
g_ps_disk_cache.Sync();
// And insert it into the shader cache.
bool result = InsertByteCode(uid, bytecode, bytecodelen, true);
delete [] bytecode;
return result;
}
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
// Make an entry in the table
PSCacheEntry newentry;
newentry.shader = shader;
newentry.frameCount = frameCount;
PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid];
if (!shader) {
// INCSTAT(stats.numPixelShadersFailed);
return false;
}
INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, (int)PixelShaders.size());
if (activate)
{
D3D::SetPixelShader(shader);
}
return true;
}
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string PixelShaderCache::GetCurrentShaderCode()
{
if (last_entry)
return last_entry->code;
else
return "(no shader)\n";
}
#endif