diff --git a/Source/Core/Common/Common.vcproj b/Source/Core/Common/Common.vcproj index 4608f8340c..e4e7eece2f 100644 --- a/Source/Core/Common/Common.vcproj +++ b/Source/Core/Common/Common.vcproj @@ -720,6 +720,14 @@ RelativePath=".\Src\IniFile.h" > + + + + diff --git a/Source/Core/Common/Src/CommonPaths.h b/Source/Core/Common/Src/CommonPaths.h index 3250c4fe7f..45f729ea1d 100644 --- a/Source/Core/Common/Src/CommonPaths.h +++ b/Source/Core/Common/Src/CommonPaths.h @@ -66,6 +66,7 @@ #define GAMECONFIG_DIR "GameConfig" #define MAPS_DIR "Maps" #define CACHE_DIR "Cache" +#define SHADERCACHE_DIR "ShaderCache" #define STATESAVES_DIR "StateSaves" #define SCREENSHOTS_DIR "ScreenShots" #define DUMP_DIR "Dump" @@ -128,6 +129,7 @@ #define FULL_CONFIG_DIR FULL_USERDATA_DIR CONFIG_DIR DIR_SEP #define FULL_CACHE_DIR FULL_USERDATA_DIR CACHE_DIR DIR_SEP +#define FULL_SHADERCACHE_DIR FULL_USERDATA_DIR SHADERCACHE_DIR DIR_SEP #define FULL_STATESAVES_DIR FULL_USERDATA_DIR STATESAVES_DIR DIR_SEP #define FULL_SCREENSHOTS_DIR FULL_USERDATA_DIR SCREENSHOTS_DIR DIR_SEP #define FULL_FRAMES_DIR FULL_USERDATA_DIR DUMP_DIR DIR_SEP DUMP_FRAMES_DIR diff --git a/Source/Core/Common/Src/LinearDiskCache.cpp b/Source/Core/Common/Src/LinearDiskCache.cpp new file mode 100644 index 0000000000..820450bef4 --- /dev/null +++ b/Source/Core/Common/Src/LinearDiskCache.cpp @@ -0,0 +1,149 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "LinearDiskCache.h" + +static const char ID[4] = {'D', 'C', 'A', 'C'}; +const int version = 1; // TODO: Get from SVN_REV + +LinearDiskCache::LinearDiskCache() + : file_(NULL), num_entries_(0) { +} + +void LinearDiskCache::WriteHeader() { + fwrite(ID, 4, 1, file_); + fwrite(&version, 4, 1, file_); +} + +bool LinearDiskCache::ValidateHeader() { + char header_id[4]; + int header_version; + fread(&header_id, 4, 1, file_); + fread(&header_version, 4, 1, file_); + if (memcmp(header_id, ID, 4) != 0) + return false; + if (header_version != version) + return false; + return true; +} + +int LinearDiskCache::OpenAndRead(const char *filename, LinearDiskCacheReader *reader) { + int items_read_count = 0; + file_ = fopen(filename, "rb"); + int file_size = 0; + if (file_) { + fseek(file_, 0, SEEK_END); + file_size = (int)ftell(file_); + } + + bool file_corrupt = false; + if (file_size == 0) { + if (file_) + fclose(file_); + // Reopen for writing. + file_ = fopen(filename, "wb"); + // Cache empty, let's initialize a header. + WriteHeader(); + num_entries_ = 0; + } else { + // file_ must be != 0 here. + // Back to the start we go. + fseek(file_, 0, SEEK_SET); + // Check that the file is valid + if (!ValidateHeader()) { + // Not valid - delete the file and start over. + fclose(file_); + unlink(filename); + + PanicAlert("LinearDiskCache file header broken."); + + file_ = fopen(filename, "wb"); + WriteHeader(); + num_entries_ = 0; + } else { + // Valid - blow through it. + // We're past the header already thanks to ValidateHeader. + while (!feof(file_)) { + int key_size, value_size; + int key_size_size = fread(&key_size, 1, sizeof(key_size), file_); + int value_size_size = fread(&value_size, 1, sizeof(value_size), file_); + if (key_size_size == 0 && value_size_size == 0) { + // I guess feof isn't doing it's job - we're at the end. + break; + } + if (key_size <= 0 || value_size < 0 || key_size_size != 4 || value_size_size != 4) { + PanicAlert("Disk cache file %s corrupted/truncated! ks: %i vs %i kss %i vss %i", filename, + key_size, value_size, key_size_size, value_size_size); + file_corrupt = true; + break; + } + u8 *key = new u8[key_size]; + u8 *value = new u8[value_size]; + int actual_key_size = (int)fread(key, 1, key_size, file_); + int actual_value_size = (int)fread(value, 1, value_size, file_); + if (actual_key_size != key_size || actual_value_size != value_size) { + PanicAlert("Disk cache file %s corrupted/truncated! ks: %i actual ks: %i vs: %i actual vs: %i", filename, + key_size, actual_key_size, value_size, actual_value_size); + file_corrupt = true; + } else { + reader->Read(key, key_size, value, value_size); + items_read_count++; + } + delete [] key; + delete [] value; + } + fclose(file_); + // Done reading. + + // Reopen file for append. + // At this point, ftell() will be at the end of the file, + // which happens to be exactly what we want. + file_ = fopen(filename, "ab"); + fseek(file_, 0, SEEK_END); + } + } + + if (file_corrupt) { + // Restore sanity, start over. + fclose(file_); + unlink(filename); + + file_ = fopen(filename, "wb+"); + WriteHeader(); + } + + return items_read_count; +} + +void LinearDiskCache::Append( + const u8 *key, int key_size, const u8 *value, int value_size) { + // Should do a check that we don't already have "key"? + fwrite(&key_size, 1, sizeof(key_size), file_); + fwrite(&value_size, 1, sizeof(value_size), file_); + fwrite(key, 1, key_size, file_); + fwrite(value, 1, value_size, file_); +} + +void LinearDiskCache::Sync() { + fflush(file_); +} + +void LinearDiskCache::Close() { + fclose(file_); + file_ = 0; + num_entries_ = 0; +} diff --git a/Source/Core/Common/Src/LinearDiskCache.h b/Source/Core/Common/Src/LinearDiskCache.h new file mode 100644 index 0000000000..c2265b26a4 --- /dev/null +++ b/Source/Core/Common/Src/LinearDiskCache.h @@ -0,0 +1,67 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _LINEAR_DISKCACHE +#define _LINEAR_DISKCACHE + +#include "Common.h" + +#include +#include + +// On disk format: +// uint32 'DCAC' +// uint32 version; // svn_rev +// uint32 key_length; +// uint32 value_length; +// .... key; +// .... value; + +class LinearDiskCacheReader { +public: + virtual void Read(const u8 *key, int key_size, const u8 *value, int value_size) = 0; +}; + +// Dead simple unsorted key-value store with append functionality. +// No random read functionality, all reading is done in OpenAndRead. +// Keys and values can contain any characters, including \0. +// +// Suitable for caching generated shader bytecode between executions. +// Not tuned for extreme performance but should be reasonably fast. +// Does not support keys or values larger than 2GB, which should be reasonable. +// Keys must have non-zero length; values can have zero length. +class LinearDiskCache { +public: + LinearDiskCache(); + + // Returns the number of items read from the cache. + int OpenAndRead(const char *filename, LinearDiskCacheReader *reader); + void Close(); + void Sync(); + + // Appends a key-value pair to the store. + void Append(const u8 *key, int key_size, const u8 *value, int value_size); + +private: + void WriteHeader(); + bool ValidateHeader(); + + FILE *file_; + int num_entries_; +}; + +#endif // _LINEAR_DISKCACHE \ No newline at end of file diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h index 0f54bd9060..e4fd5f90db 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -232,8 +232,8 @@ inline u32 ConvertToSingle(u64 x) } else { - // this is said to be undefined - // based on hardware tests + // This is said to be undefined. + // The code is based on hardware tests. return ((x >> 32) & 0xc0000000) | ((x >> 29) & 0x3fffffff); } } diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index faf0c97eff..54a433d71c 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -31,7 +31,7 @@ PIXELSHADERUID last_pixel_shader_uid; // a unique identifier, basically containing all the bits. Yup, it's a lot .... // It would likely be a lot more efficient to build this incrementally as the attributes // are set... -void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable) +void GetPixelShaderId(PIXELSHADERUID *uid, u32 texturemask, u32 dstAlphaEnable) { u32 projtexcoords = 0; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; i++) @@ -43,7 +43,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable) projtexcoords |= 1 << texcoord; } } - uid.values[0] = (u32)bpmem.genMode.numtevstages | + uid->values[0] = (u32)bpmem.genMode.numtevstages | ((u32)bpmem.genMode.numindstages << 4) | ((u32)bpmem.genMode.numtexgens << 7) | ((u32)dstAlphaEnable << 11) | @@ -51,21 +51,21 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable) (projtexcoords << 20) | ((u32)bpmem.ztex2.op << 28); - uid.values[0] = (uid.values[0] & ~0x0ff00000) | (projtexcoords << 20); + uid->values[0] = (uid->values[0] & ~0x0ff00000) | (projtexcoords << 20); // swap table for (int i = 0; i < 8; i += 2) - ((u8*)&uid.values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4); + ((u8*)&uid->values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4); - uid.values[2] = texturemask; + uid->values[2] = texturemask; u32 enableZTexture = (!bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable)?1:0; - uid.values[3] = (u32)bpmem.fog.c_proj_fsel.fsel | + uid->values[3] = (u32)bpmem.fog.c_proj_fsel.fsel | ((u32)bpmem.fog.c_proj_fsel.proj << 3) | ((u32)enableZTexture << 4); int hdr = 4; - u32* pcurvalue = &uid.values[hdr]; + u32 *pcurvalue = &uid->values[hdr]; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) { TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC; @@ -119,7 +119,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable) if ((bpmem.genMode.numtevstages % 3) != 2) ++pcurvalue; - uid.tevstages = (u32)(pcurvalue - &uid.values[0] - hdr); + uid->tevstages = (u32)(pcurvalue - &uid->values[0] - hdr); for (u32 i = 0; i < bpmem.genMode.numindstages; ++i) { @@ -134,7 +134,7 @@ void GetPixelShaderId(PIXELSHADERUID &uid, u32 texturemask, u32 dstAlphaEnable) } // yeah, well .... - uid.indstages = (u32)(pcurvalue - &uid.values[0] - (hdr - 1) - uid.tevstages); + uid->indstages = (u32)(pcurvalue - &uid->values[0] - (hdr - 1) - uid->tevstages); } // old tev->pixelshader notes @@ -385,7 +385,7 @@ static void BuildSwapModeTable() } } -const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL) +const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 HLSL) { setlocale(LC_NUMERIC, "C"); // Reset locale for compilation text[sizeof(text) - 1] = 0x7C; // canary diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index 995ea55b00..37bcdc0d9e 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -42,6 +42,7 @@ #define C_COLORMATRIX (C_FOG + 2) #define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11) +// DO NOT make anything in this class virtual. class PIXELSHADERUID { public: @@ -100,8 +101,9 @@ public: } }; -const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL = 0); -void GetPixelShaderId(PIXELSHADERUID &, u32 texturemask, u32 dstAlphaEnable); +const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 HLSL = 0); +void GetPixelShaderId(PIXELSHADERUID *uid, u32 texturemask, u32 dstAlphaEnable); + extern PIXELSHADERUID last_pixel_shader_uid; #endif // GCOGL_PIXELSHADER_H diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.h b/Source/Core/VideoCommon/Src/TextureConversionShader.h index 7e81c4cc4f..e49f2a24ef 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.h +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.h @@ -25,7 +25,7 @@ namespace TextureConversionShader { u16 GetEncodedSampleCount(u32 format); -const char *GenerateEncodingShader(u32 format,bool HLSL = false); +const char *GenerateEncodingShader(u32 format, bool HLSL = false); void SetShaderParameters(float width, float height, float offsetX, float offsetY, float widthStride, float heightStride,float buffW = 0.0f,float buffH = 0.0f); diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index ad8d227813..934990a9f6 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -29,27 +29,27 @@ VERTEXSHADERUID last_vertex_shader_uid; // Mash together all the inputs that contribute to the code of a generated vertex shader into // a unique identifier, basically containing all the bits. Yup, it's a lot .... -void GetVertexShaderId(VERTEXSHADERUID& vid, u32 components) +void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) { - vid.values[0] = components | + uid->values[0] = components | (xfregs.numTexGens << 23) | (xfregs.nNumChans << 27) | ((u32)xfregs.bEnableDualTexTransform << 29); for (int i = 0; i < 2; ++i) { - vid.values[1+i] = xfregs.colChans[i].color.enablelighting ? + uid->values[1+i] = xfregs.colChans[i].color.enablelighting ? (u32)xfregs.colChans[i].color.hex : (u32)xfregs.colChans[i].color.matsource; - vid.values[1+i] |= (xfregs.colChans[i].alpha.enablelighting ? + uid->values[1+i] |= (xfregs.colChans[i].alpha.enablelighting ? (u32)xfregs.colChans[i].alpha.hex : (u32)xfregs.colChans[i].alpha.matsource) << 15; } // fog - vid.values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30); - vid.values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30); + uid->values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30); + uid->values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30); - u32* pcurvalue = &vid.values[3]; + u32 *pcurvalue = &uid->values[3]; for (int i = 0; i < xfregs.numTexGens; ++i) { TexMtxInfo tinfo = xfregs.texcoords[i].texmtxinfo; if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) @@ -78,16 +78,16 @@ static char text[16384]; #define LIGHTS_POS "" -char *GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha); +char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char *dest, int coloralpha); -const char *GenerateVertexShader(u32 components, bool D3D) +const char *GenerateVertexShaderCode(u32 components, bool D3D) { setlocale(LC_NUMERIC, "C"); // Reset locale for compilation text[sizeof(text) - 1] = 0x7C; // canary DVSTARTPROFILE(); - _assert_( bpmem.genMode.numtexgens == xfregs.numTexGens); - _assert_( bpmem.genMode.numcolchans == xfregs.nNumChans); + _assert_(bpmem.genMode.numtexgens == xfregs.numTexGens); + _assert_(bpmem.genMode.numcolchans == xfregs.nNumChans); u32 lightMask = 0; if (xfregs.nNumChans > 0) @@ -125,9 +125,8 @@ const char *GenerateVertexShader(u32 components, bool D3D) WRITE(p, "};\n"); // uniforms - // bool bTexMtx = ((components & VB_HAS_TEXMTXIDXALL)<shader. +LPDIRECT3DVERTEXSHADER9 CreateVertexShaderFromByteCode(const u8 *bytecode, int len) +{ + LPDIRECT3DVERTEXSHADER9 v_shader; + HRESULT hr = D3D::dev->CreateVertexShader((DWORD *)bytecode, &v_shader); + if (FAILED(hr)) + v_shader = 0; + return v_shader; +} + +// Code->bytecode. +bool CompileVertexShader(const char *code, int len, u8 **bytecode, int *bytecodelen) { //try to compile LPD3DXBUFFER shaderBuffer = 0; LPD3DXBUFFER errorBuffer = 0; - LPDIRECT3DVERTEXSHADER9 vShader = 0; HRESULT hr = D3DXCompileShader(code, len, 0, 0, "main", D3D::VertexShaderVersionString(), 0, &shaderBuffer, &errorBuffer, 0); if (FAILED(hr)) @@ -39,20 +49,16 @@ LPDIRECT3DVERTEXSHADER9 CompileVertexShader(const char *code, int len) std::string hello = (char*)errorBuffer->GetBufferPointer(); hello += "\n\n"; hello += code; - MessageBoxA(0, hello.c_str(), "Error assembling vertex shader", MB_ICONERROR); + MessageBoxA(0, hello.c_str(), "Error compiling vertex shader", MB_ICONERROR); } - vShader = 0; + *bytecode = 0; + *bytecodelen = 0; } else if (SUCCEEDED(hr)) { - //create it - HRESULT hr = E_FAIL; - if (shaderBuffer) - hr = D3D::dev->CreateVertexShader((DWORD *)shaderBuffer->GetBufferPointer(), &vShader); - if ((FAILED(hr) || vShader == 0) && g_ActiveConfig.bShowShaderErrors) - { - MessageBoxA(0, code, (char*)errorBuffer->GetBufferPointer(), MB_ICONERROR); - } + *bytecodelen = shaderBuffer->GetBufferSize(); + *bytecode = new u8[*bytecodelen]; + memcpy(*bytecode, shaderBuffer->GetBufferPointer(), *bytecodelen); } //cleanup @@ -60,14 +66,25 @@ LPDIRECT3DVERTEXSHADER9 CompileVertexShader(const char *code, int len) shaderBuffer->Release(); if (errorBuffer) errorBuffer->Release(); - return vShader; + return SUCCEEDED(hr) ? true : false; } -LPDIRECT3DPIXELSHADER9 CompilePixelShader(const char *code, int len) + +// Bytecode->shader. +LPDIRECT3DPIXELSHADER9 CreatePixelShaderFromByteCode(const u8 *bytecode, int len) +{ + LPDIRECT3DPIXELSHADER9 p_shader; + HRESULT hr = D3D::dev->CreatePixelShader((DWORD *)bytecode, &p_shader); + if (FAILED(hr)) + p_shader = 0; + return p_shader; +} + + +bool CompilePixelShader(const char *code, int len, u8 **bytecode, int *bytecodelen) { LPD3DXBUFFER shaderBuffer = 0; LPD3DXBUFFER errorBuffer = 0; - LPDIRECT3DPIXELSHADER9 pShader = 0; // Someone: // For some reason, I had this kind of errors : "Shader uses texture addressing operations @@ -81,18 +98,16 @@ LPDIRECT3DPIXELSHADER9 CompilePixelShader(const char *code, int len) std::string hello = (char*)errorBuffer->GetBufferPointer(); hello += "\n\n"; hello += code; - MessageBoxA(0, hello.c_str(), "Error assembling pixel shader", MB_ICONERROR); + MessageBoxA(0, hello.c_str(), "Error compiling pixel shader", MB_ICONERROR); } - pShader = 0; + *bytecode = 0; + *bytecodelen = 0; } - else + else if (SUCCEEDED(hr)) { - //create it - HRESULT hr = D3D::dev->CreatePixelShader((DWORD *)shaderBuffer->GetBufferPointer(), &pShader); - if ((FAILED(hr) || pShader == 0) && g_ActiveConfig.bShowShaderErrors) - { - MessageBoxA(0, "damn", "error creating pixelshader", MB_ICONERROR); - } + *bytecodelen = shaderBuffer->GetBufferSize(); + *bytecode = new u8[*bytecodelen]; + memcpy(*bytecode, shaderBuffer->GetBufferPointer(), *bytecodelen); } //cleanup @@ -100,7 +115,31 @@ LPDIRECT3DPIXELSHADER9 CompilePixelShader(const char *code, int len) shaderBuffer->Release(); if (errorBuffer) errorBuffer->Release(); - return pShader; + return SUCCEEDED(hr) ? true : false; +} + +LPDIRECT3DVERTEXSHADER9 CompileAndCreateVertexShader(const char *code, int len) { + u8 *bytecode; + int bytecodelen; + if (CompileVertexShader(code, len, &bytecode, &bytecodelen)) { + LPDIRECT3DVERTEXSHADER9 v_shader = CreateVertexShaderFromByteCode(bytecode, len); + delete [] bytecode; + return v_shader; + } else { + return 0; + } +} + +LPDIRECT3DPIXELSHADER9 CompileAndCreatePixelShader(const char *code, int len) { + u8 *bytecode; + int bytecodelen; + if (CompilePixelShader(code, len, &bytecode, &bytecodelen)) { + LPDIRECT3DPIXELSHADER9 p_shader = CreatePixelShaderFromByteCode(bytecode, len); + delete [] bytecode; + return p_shader; + } else { + return 0; + } } } // namespace diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.h b/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.h index 21cfb55cc1..4354f0965c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.h @@ -21,6 +21,14 @@ namespace D3D { - LPDIRECT3DVERTEXSHADER9 CompileVertexShader(const char *code, int len); - LPDIRECT3DPIXELSHADER9 CompilePixelShader(const char *code, int len); + LPDIRECT3DVERTEXSHADER9 CreateVertexShaderFromByteCode(const u8 *bytecode, int len); + LPDIRECT3DPIXELSHADER9 CreatePixelShaderFromByteCode(const u8 *bytecode, int len); + + // The returned bytecode buffers should be delete[]-d. + bool CompileVertexShader(const char *code, int len, u8 **bytecode, int *bytecodelen); + bool CompilePixelShader(const char *code, int len, u8 **bytecode, int *bytecodelen); + + // Utility functions + LPDIRECT3DVERTEXSHADER9 CompileAndCreateVertexShader(const char *code, int len); + LPDIRECT3DPIXELSHADER9 CompileAndCreatePixelShader(const char *code, int len); } \ No newline at end of file diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index 145eb6e11e..d696033e33 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -15,11 +15,15 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ +#include "Common.h" +#include "FileUtil.h" +#include "LinearDiskCache.h" + +#include "Globals.h" #include "D3DBase.h" #include "D3DShader.h" #include "Statistics.h" #include "Utils.h" -#include "Profiler.h" #include "VideoConfig.h" #include "PixelShaderGen.h" #include "PixelShaderManager.h" @@ -29,10 +33,13 @@ #include "XFMemory.h" #include "ImageWrite.h" -#include "debugger/debugger.h" +#include "Debugger/Debugger.h" PixelShaderCache::PSCache PixelShaderCache::PixelShaders; const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry; + +LinearDiskCache g_ps_disk_cache; + static float lastPSconstants[C_COLORMATRIX+16][4]; static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram = 0; @@ -41,7 +48,6 @@ static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0; static LPDIRECT3DPIXELSHADER9 s_ClearZProgram = 0; static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram = 0; - LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram() { return s_ColorMatrixProgram; @@ -64,7 +70,7 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { - if( lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 || + if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 || lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4 ) { const float f[4] = {f1, f2, f3, f4}; @@ -78,7 +84,7 @@ void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) void SetPSConstant4fv(int const_number, const float *f) { - if( lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] || + if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] || lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3] ) { D3D::dev->SetPixelShaderConstantF(const_number, f, 1); @@ -89,25 +95,39 @@ void SetPSConstant4fv(int const_number, const float *f) } } +class PixelShaderCacheInserter : public LinearDiskCacheReader { +public: + void Read(const u8 *key, int key_size, const u8 *value, int value_size) + { + PIXELSHADERUID uid; + if (key_size != sizeof(uid)) { + ERROR_LOG(VIDEO, "Wrong key size in pixel shader cache"); + return; + } + memcpy(&uid, key, key_size); + PixelShaderCache::InsertByteCode(uid, value, value_size, false); + } +}; + void PixelShaderCache::Init() { char pprog[1024]; - sprintf(pprog,"void main(\n" + sprintf(pprog, "void main(\n" "out float4 ocol0 : COLOR0,\n" " in float4 incol0 : COLOR0){\n" "ocol0 = incol0;\n" "}\n"); - s_ClearProgram = D3D::CompilePixelShader(pprog, (int)strlen(pprog)); + s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - sprintf(pprog,"uniform sampler samp0 : register(s0);\n" + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" " in float3 uv0 : TEXCOORD0){\n" "ocol0 = tex2D(samp0,uv0.xy);\n" "}\n"); - s_ColorCopyProgram = D3D::CompilePixelShader(pprog, (int)strlen(pprog)); + s_ColorCopyProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - sprintf(pprog,"uniform sampler samp0 : register(s0);\n" + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" @@ -115,9 +135,9 @@ void PixelShaderCache::Init() "float4 texcol = tex2D(samp0,uv0.xy);\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); - s_ColorMatrixProgram = D3D::CompilePixelShader(pprog, (int)strlen(pprog)); + s_ColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - sprintf(pprog,"uniform sampler samp0 : register(s0);\n" + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" @@ -127,10 +147,19 @@ void PixelShaderCache::Init() "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); - s_DepthMatrixProgram = D3D::CompilePixelShader(pprog, (int)strlen(pprog)); + s_DepthMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); Clear(); + + if (!File::Exists(FULL_SHADERCACHE_DIR)) + File::CreateDir(FULL_SHADERCACHE_DIR); + + char cache_filename[MAX_PATH]; + sprintf(cache_filename, "%s%s-ps.cache", FULL_SHADERCACHE_DIR, globals->unique_id); + PixelShaderCacheInserter inserter; + int read_items = g_ps_disk_cache.OpenAndRead(cache_filename, &inserter); } +// ONLY to be used during shutdown. void PixelShaderCache::Clear() { PSCache::iterator iter = PixelShaders.begin(); @@ -139,45 +168,44 @@ void PixelShaderCache::Clear() PixelShaders.clear(); for (int i = 0; i < (C_COLORMATRIX + 16) * 4; i++) - lastPSconstants[i/4][i%4] = -100000000.0f; + lastPSconstants[i / 4][i % 4] = -100000000.0f; memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid)); } void PixelShaderCache::Shutdown() { - if(s_ColorMatrixProgram) - s_ColorMatrixProgram->Release(); + if (s_ColorMatrixProgram) s_ColorMatrixProgram->Release(); s_ColorMatrixProgram = NULL; - if(s_ColorCopyProgram) - s_ColorCopyProgram->Release(); - s_ColorCopyProgram=NULL; - if(s_DepthMatrixProgram) - s_DepthMatrixProgram->Release(); + if (s_ColorCopyProgram) s_ColorCopyProgram->Release(); + s_ColorCopyProgram = NULL; + if (s_DepthMatrixProgram) s_DepthMatrixProgram->Release(); s_DepthMatrixProgram = NULL; - if(s_ClearProgram) - s_ClearProgram->Release(); - s_ClearProgram=NULL; + if (s_ClearProgram) s_ClearProgram->Release(); + s_ClearProgram = NULL; Clear(); + g_ps_disk_cache.Sync(); + g_ps_disk_cache.Close(); } bool PixelShaderCache::SetShader(bool dstAlpha) { - DVSTARTPROFILE(); - PIXELSHADERUID uid; - GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), dstAlpha); + GetPixelShaderId(&uid, PixelShaderManager::GetTextureMask(), dstAlpha); + + // Is the shader already set? if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount) { PSCache::const_iterator iter = PixelShaders.find(uid); if (iter != PixelShaders.end() && iter->second.shader) - return true; + return true; // Sure, we're done. else - return false; + return false; // ?? something is wrong. } memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID)); + // Is the shader already in the cache? PSCache::iterator iter; iter = PixelShaders.find(uid); if (iter != PixelShaders.end()) @@ -186,7 +214,6 @@ bool PixelShaderCache::SetShader(bool dstAlpha) const PSCacheEntry &entry = iter->second; last_entry = &entry; - DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); if (entry.shader) @@ -198,7 +225,8 @@ bool PixelShaderCache::SetShader(bool dstAlpha) return false; } - const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), dstAlpha, 2); + // OK, need to generate and compile it. + const char *code = GeneratePixelShaderCode(PixelShaderManager::GetTextureMask(), dstAlpha, 2); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { static int counter = 0; @@ -208,7 +236,29 @@ bool PixelShaderCache::SetShader(bool dstAlpha) SaveData(szTemp, code); } #endif - LPDIRECT3DPIXELSHADER9 shader = D3D::CompilePixelShader(code, (int)strlen(code)); + + u8 *bytecode = 0; + int bytecodelen = 0; + if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) { + if (g_ActiveConfig.bShowShaderErrors) + { + PanicAlert("Failed to compile Pixel Shader:\n\n%s", code); + } + return false; + } + + // Here we have the UID and the byte code. Insert it into the disk cache. + g_ps_disk_cache.Append((u8 *)&uid, sizeof(uid), bytecode, bytecodelen); + g_ps_disk_cache.Sync(); + + // And insert it into the shader cache. + bool result = InsertByteCode(uid, bytecode, bytecodelen, true); + delete [] bytecode; + return result; +} + +bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) { + LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); // Make an entry in the table PSCacheEntry newentry; @@ -220,44 +270,21 @@ bool PixelShaderCache::SetShader(bool dstAlpha) PixelShaders[uid] = newentry; last_entry = &PixelShaders[uid]; + if (!shader) { + // INCSTAT(stats.numPixelShadersFailed); + return false; + } + INCSTAT(stats.numPixelShadersCreated); SETSTAT(stats.numPixelShadersAlive, (int)PixelShaders.size()); - if (shader) + if (activate) { D3D::SetPixelShader(shader); - return true; } - - if (g_ActiveConfig.bShowShaderErrors) - { - PanicAlert("Failed to compile Pixel Shader:\n\n%s", code); - } - return false; + return true; } -void PixelShaderCache::Cleanup() -{ - /* - PSCache::iterator iter; - iter = PixelShaders.begin(); - while (iter != PixelShaders.end()) - { - PSCacheEntry &entry = iter->second; - if (entry.frameCount < frameCount - 1400) - { - entry.Destroy(); - iter = PixelShaders.erase(iter); - } - else - { - iter++; - } - } - SETSTAT(stats.numPixelShadersAlive, (int)PixelShaders.size()); - */ -} - #if defined(_DEBUG) || defined(DEBUGFAST) std::string PixelShaderCache::GetCurrentShaderCode() { diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index 2deeeacc64..8b4ab8d8fb 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -18,6 +18,8 @@ #ifndef _PIXELSHADERCACHE_H #define _PIXELSHADERCACHE_H +#include "Common.h" +#include "LinearDiskCache.h" #include "D3DBase.h" #include @@ -53,12 +55,13 @@ private: static PSCache PixelShaders; static const PSCacheEntry *last_entry; + public: static void Init(); - static void Cleanup(); static void Clear(); static void Shutdown(); static bool SetShader(bool dstAlpha); + static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(); static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(); static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index a30a37f76d..d2863ce2df 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -272,7 +272,7 @@ bool Renderer::Init() xScale = (float)s_target_width / (float)EFB_WIDTH; yScale = (float)s_target_height / (float)EFB_HEIGHT; - if(!D3D::IsATIDevice()) + if (!D3D::IsATIDevice()) { FULL_EFB_WIDTH = 2 * EFB_WIDTH; FULL_EFB_HEIGHT = 2 * EFB_HEIGHT; @@ -301,6 +301,7 @@ bool Renderer::Init() for (int stage = 0; stage < 8; stage++) D3D::SetSamplerState(stage, D3DSAMP_MAXANISOTROPY, g_ActiveConfig.iMaxAnisotropy); + D3DVIEWPORT9 vp; vp.X = 0; vp.Y = 0; @@ -354,7 +355,7 @@ void dumpMatrix(D3DXMATRIX &mtx) for (int y = 0; y < 4; y++) { char temp[256]; - sprintf(temp,"%4.4f %4.4f %4.4f %4.4f",mtx.m[y][0],mtx.m[y][1],mtx.m[y][2],mtx.m[y][3]); + sprintf(temp,"%4.4f %4.4f %4.4f %4.4f", mtx.m[y][0], mtx.m[y][1], mtx.m[y][2], mtx.m[y][3]); g_VideoInitialize.pLog(temp, FALSE); } } @@ -364,9 +365,9 @@ TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) int Xstride = (s_Fulltarget_width - s_target_width) / 2; int Ystride = (s_Fulltarget_height - s_target_height) / 2; TargetRectangle result; - result.left = (int)(rc.left * xScale) + Xstride ; + result.left = (int)(rc.left * xScale) + Xstride; result.top = (int)(rc.top * yScale) + Ystride; - result.right = (int)(rc.right * xScale) + Xstride ; + result.right = (int)(rc.right * xScale) + Xstride; result.bottom = (int)(rc.bottom * yScale) + Ystride; return result; } @@ -433,14 +434,14 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc) int Width = dst_rect.right - dst_rect.left; int Height = dst_rect.bottom - dst_rect.top; - if(X < 0) X = 0; - if(Y < 0) Y = 0; - if(X > s_backbuffer_width) X = s_backbuffer_width; - if(Y > s_backbuffer_height) Y = s_backbuffer_height; - if(Width < 0) Width = 0; - if(Height < 0) Height = 0; - if(Width > (s_backbuffer_width - X)) Width = s_backbuffer_width - X; - if(Height > (s_backbuffer_height - Y)) Height = s_backbuffer_height - Y; + if (X < 0) X = 0; + if (Y < 0) Y = 0; + if (X > s_backbuffer_width) X = s_backbuffer_width; + if (Y > s_backbuffer_height) Y = s_backbuffer_height; + if (Width < 0) Width = 0; + if (Height < 0) Height = 0; + if (Width > (s_backbuffer_width - X)) Width = s_backbuffer_width - X; + if (Height > (s_backbuffer_height - Y)) Height = s_backbuffer_height - Y; vp.X = X; vp.Y = Y; vp.Width = Width; @@ -617,13 +618,13 @@ bool Renderer::SetScissorRect() if (rc.bottom < 0) rc.bottom = 0; if (rc.top > s_Fulltarget_height) rc.top = s_Fulltarget_height; if (rc.bottom > s_Fulltarget_height) rc.bottom = s_Fulltarget_height; - if(rc.left > rc.right) + if (rc.left > rc.right) { int temp = rc.right; rc.right = rc.left; rc.left = temp; } - if(rc.top > rc.bottom) + if (rc.top > rc.bottom) { int temp = rc.bottom; rc.bottom = rc.top; @@ -660,7 +661,7 @@ void Renderer::SetColorMask() u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) { - if(!g_ActiveConfig.bEFBAccessEnable) + if (!g_ActiveConfig.bEFBAccessEnable) return 0; //Get the working buffer @@ -678,13 +679,13 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) D3DFORMAT ReadBufferFormat = (type == PEEK_Z || type == POKE_Z) ? FBManager::GetEFBDepthReadSurfaceFormat() : BufferFormat; - if(BufferFormat == D3DFMT_D24X8) + if (BufferFormat == D3DFMT_D24X8) return 0; D3DLOCKED_RECT drect; //Buffer not found alert - if(!pBuffer) { + if (!pBuffer) { PanicAlert("No %s!", (type == PEEK_Z || type == POKE_Z) ? "Z-Buffer" : "Color EFB"); return 0; } @@ -706,7 +707,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) RectToLock.left = targetPixelRc.left; RectToLock.right = targetPixelRc.right; RectToLock.top = targetPixelRc.top; - if(type == PEEK_Z) + if (type == PEEK_Z) { RECT PixelRect; PixelRect.bottom = 4; @@ -717,14 +718,14 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) RectToLock.right+=1; RectToLock.top-=1; RectToLock.left-=2; - if((RectToLock.bottom - RectToLock.top) > 4) + if ((RectToLock.bottom - RectToLock.top) > 4) RectToLock.bottom--; - if((RectToLock.right - RectToLock.left) > 4) + if ((RectToLock.right - RectToLock.left) > 4) RectToLock.left++; ResetAPIState(); // reset any game specific settings hr =D3D::dev->SetDepthStencilSurface(NULL); hr = D3D::dev->SetRenderTarget(0, RBuffer); - if(FAILED(hr)) + if (FAILED(hr)) { PanicAlert("unable to set pixel render buffer"); return 0; @@ -738,7 +739,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) vp.MinZ = 0.0f; vp.MaxZ = 1.0f; hr = D3D::dev->SetViewport(&vp); - if(FAILED(hr)) + if (FAILED(hr)) { PanicAlert("unable to set pixel viewport"); return 0; @@ -774,14 +775,14 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) RectToLock.right = 1; RectToLock.top = 0; } - if(FAILED(hr)) + if (FAILED(hr)) { PanicAlert("Unable to stretch data to buffer"); return 0; } //retriebe the pixel data to the local memory buffer D3D::dev->GetRenderTargetData(RBuffer,pOffScreenBuffer); - if(FAILED(hr)) + if (FAILED(hr)) { PanicAlert("Unable to copy data to mem buffer"); return 0; @@ -791,7 +792,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) //the surface is good.. lock it - if((hr = pOffScreenBuffer->LockRect(&drect, &RectToLock, D3DLOCK_READONLY)) != D3D_OK) + if ((hr = pOffScreenBuffer->LockRect(&drect, &RectToLock, D3DLOCK_READONLY)) != D3D_OK) { PanicAlert("ERROR: %s", hr == D3DERR_WASSTILLDRAWING ? "Still drawing" : hr == D3DERR_INVALIDCALL ? "Invalid call" : "w00t"); @@ -872,12 +873,12 @@ void UpdateViewport() int Y = (int)(ceil(xfregs.rawViewport[4] + xfregs.rawViewport[1] - (scissorYOff)) * MValueY) + Ystride; int Width = (int)ceil((int)(2 * xfregs.rawViewport[0]) * MValueX); int Height = (int)ceil((int)(-2 * xfregs.rawViewport[1]) * MValueY); - if(Width < 0) + if (Width < 0) { X += Width; Width*=-1; } - if(Height < 0) + if (Height < 0) { Y += Height; Height *= -1; @@ -916,10 +917,10 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE sirc.right = targetRc.right; sirc.bottom = targetRc.bottom; D3D::dev->SetScissorRect(&sirc); - if(zEnable) + if (zEnable) D3D::ChangeRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS); D3D::drawClearQuad(&sirc,color ,(z & 0xFFFFFF) / float(0xFFFFFF),PixelShaderCache::GetClearProgram(),VertexShaderCache::GetSimpleVertexShader()); - if(zEnable) + if (zEnable) D3D::RefreshRenderState(D3DRS_ZFUNC); //D3D::dev->Clear(0, NULL, (colorEnable ? D3DCLEAR_TARGET : 0)| ( zEnable ? D3DCLEAR_ZBUFFER : 0), color | ((alphaEnable)?0:0xFF000000),(z & 0xFFFFFF) / float(0xFFFFFF), 0); UpdateViewport(); @@ -933,7 +934,7 @@ void Renderer::SetBlendMode(bool forceUpdate) // 2 - reverse subtract enable (else add) // 3-5 - srcRGB function // 6-8 - dstRGB function - if(bpmem.blendmode.logicopenable && bpmem.blendmode.logicmode != 3) + if (bpmem.blendmode.logicopenable && bpmem.blendmode.logicmode != 3) return; u32 newval = bpmem.blendmode.subtract << 2; @@ -978,8 +979,6 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) // D3D frame is now over // Clean out old stuff from caches. frameCount++; - PixelShaderCache::Cleanup(); - VertexShaderCache::Cleanup(); TextureCache::Cleanup(); // Make any new configuration settings active. diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp index 5277c6ad84..1770912b12 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp @@ -75,7 +75,7 @@ void CreateRgbToYuyvProgram() " float3 c01 = (c0 + c1) * 0.5f;\n" " ocol0 = float4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n" "}\n"; - s_rgbToYuyvProgram = D3D::CompilePixelShader(FProgram, (int)strlen(FProgram)); + s_rgbToYuyvProgram = D3D::CompileAndCreatePixelShader(FProgram, (int)strlen(FProgram)); if (!s_rgbToYuyvProgram) { ERROR_LOG(VIDEO, "Failed to create RGB to YUYV fragment program"); } @@ -102,7 +102,7 @@ void CreateYuyvToRgbProgram() " yComp + (2.018f * uComp),\n" " 1.0f);\n" "}\n"; - s_yuyvToRgbProgram = D3D::CompilePixelShader(FProgram, (int)strlen(FProgram)); + s_yuyvToRgbProgram = D3D::CompileAndCreatePixelShader(FProgram, (int)strlen(FProgram)); if (!s_yuyvToRgbProgram) { ERROR_LOG(VIDEO, "Failed to create YUYV to RGB fragment program"); } @@ -129,7 +129,7 @@ LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format) SaveData(szTemp, shader); } #endif - s_encodingPrograms[format] = D3D::CompilePixelShader(shader, (int)strlen(shader)); + s_encodingPrograms[format] = D3D::CompileAndCreatePixelShader(shader, (int)strlen(shader)); if (!s_encodingPrograms[format]) { ERROR_LOG(VIDEO, "Failed to create encoding fragment program"); } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index 24d6492863..a0f2895e76 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -17,6 +17,11 @@ #include +#include "Common.h" +#include "FileUtil.h" +#include "LinearDiskCache.h" + +#include "Globals.h" #include "D3DBase.h" #include "D3DShader.h" #include "Statistics.h" @@ -33,9 +38,10 @@ VertexShaderCache::VSCache VertexShaderCache::vshaders; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; -static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS+8][4]); +static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS + 8][4]); static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader; +LinearDiskCache g_vs_disk_cache; LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader() { @@ -126,9 +132,23 @@ void SetMultiVSConstant4fv(int const_number, int count, const float *f) } } +class VertexShaderCacheInserter : public LinearDiskCacheReader { +public: + void Read(const u8 *key, int key_size, const u8 *value, int value_size) + { + VERTEXSHADERUID uid; + if (key_size != sizeof(uid)) { + ERROR_LOG(VIDEO, "Wrong key size in vertex shader cache"); + return; + } + memcpy(&uid, key, key_size); + VertexShaderCache::InsertByteCode(uid, value, value_size, false); + } +}; + void VertexShaderCache::Init() { - char vSimpleProg[1024]; + char *vSimpleProg = new char[2048]; sprintf(vSimpleProg,"struct VSOUTPUT\n" "{\n" "float4 vPosition : POSITION;\n" @@ -146,8 +166,17 @@ void VertexShaderCache::Init() "return OUT;\n" "}\n"); - SimpleVertexShader = D3D::CompileVertexShader(vSimpleProg, (int)strlen(vSimpleProg)); + SimpleVertexShader = D3D::CompileAndCreateVertexShader(vSimpleProg, (int)strlen(vSimpleProg)); Clear(); + delete [] vSimpleProg; + + if (!File::Exists(FULL_SHADERCACHE_DIR)) + File::CreateDir(FULL_SHADERCACHE_DIR); + + char cache_filename[MAX_PATH]; + sprintf(cache_filename, "%s%s-vs.cache", FULL_SHADERCACHE_DIR, globals->unique_id); + VertexShaderCacheInserter inserter; + int read_items = g_vs_disk_cache.OpenAndRead(cache_filename, &inserter); } void VertexShaderCache::Clear() @@ -164,9 +193,11 @@ void VertexShaderCache::Clear() void VertexShaderCache::Shutdown() { - if(SimpleVertexShader) + if (SimpleVertexShader) SimpleVertexShader->Release(); Clear(); + g_vs_disk_cache.Sync(); + g_vs_disk_cache.Close(); } bool VertexShaderCache::SetShader(u32 components) @@ -174,7 +205,7 @@ bool VertexShaderCache::SetShader(u32 components) DVSTARTPROFILE(); VERTEXSHADERUID uid; - GetVertexShaderId(uid, components); + GetVertexShaderId(&uid, components); if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount) { if (vshaders[uid].shader) @@ -202,8 +233,27 @@ bool VertexShaderCache::SetShader(u32 components) return false; } - const char *code = GenerateVertexShader(components, true); - LPDIRECT3DVERTEXSHADER9 shader = D3D::CompileVertexShader(code, (int)strlen(code)); + const char *code = GenerateVertexShaderCode(components, true); + u8 *bytecode; + int bytecodelen; + if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen)) + { + if (g_ActiveConfig.bShowShaderErrors) + { + PanicAlert("Failed to compile Vertex Shader:\n\n%s", code); + } + return false; + } + g_vs_disk_cache.Append((u8 *)&uid, sizeof(uid), bytecode, bytecodelen); + g_vs_disk_cache.Sync(); + + bool result = InsertByteCode(uid, bytecode, bytecodelen, true); + delete [] bytecode; + return result; +} + +bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) { + LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen); // Make an entry in the table VSCacheEntry entry; @@ -214,41 +264,19 @@ bool VertexShaderCache::SetShader(u32 components) #endif vshaders[uid] = entry; last_entry = &vshaders[uid]; + if (!shader) + return false; + INCSTAT(stats.numVertexShadersCreated); SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size()); - if (shader) + if (activate) { D3D::SetVertexShader(shader); return true; } - - if (g_ActiveConfig.bShowShaderErrors) - { - PanicAlert("Failed to compile Vertex Shader:\n\n%s", code); - } return false; } -void VertexShaderCache::Cleanup() -{ - /* - for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end();) - { - VSCacheEntry &entry = iter->second; - if (entry.frameCount < frameCount - 1400) - { - entry.Destroy(); - iter = vshaders.erase(iter); - } - else - { - ++iter; - } - } - SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());*/ - -} - #if defined(_DEBUG) || defined(DEBUGFAST) std::string VertexShaderCache::GetCurrentShaderCode() { diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h index e339a0249c..d7487a091f 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.h @@ -53,10 +53,10 @@ private: public: static void Init(); static void Clear(); - static void Cleanup(); static void Shutdown(); static bool SetShader(u32 components); static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(); + static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); #if defined(_DEBUG) || defined(DEBUGFAST) static std::string GetCurrentShaderCode(); #endif diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index 0539d148be..c5da32428c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -78,9 +78,9 @@ void PixelShaderCache::Init() { GL_REPORT_ERRORD(); - for( int i=0;i<(C_COLORMATRIX+16)*4;i++) + for (int i = 0; i < (C_COLORMATRIX+16) * 4; i++) lastPSconstants[i/4][i%4] = -100000000.0f; - memset(&last_pixel_shader_uid,0xFF,sizeof(last_pixel_shader_uid)); + memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid)); s_displayCompileAlert = true; @@ -172,8 +172,7 @@ FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable) { DVSTARTPROFILE(); PIXELSHADERUID uid; - u32 dstAlpha = dstAlphaEnable ? 1 : 0; - GetPixelShaderId(uid, PixelShaderManager::GetTextureMask(), dstAlpha); + GetPixelShaderId(&uid, PixelShaderManager::GetTextureMask(), dstAlphaEnable ? 1 : 0); if (uid == last_pixel_shader_uid && pshaders[uid].frameCount == frameCount) { return pShaderLast; @@ -194,14 +193,12 @@ FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable) return pShaderLast; } - //Make an entry in the table PSCacheEntry& newentry = pshaders[uid]; newentry.frameCount = frameCount; pShaderLast = &newentry.shader; - - const char *code = GeneratePixelShader(PixelShaderManager::GetTextureMask(), - dstAlphaEnable); + const char *code = GeneratePixelShaderCode(PixelShaderManager::GetTextureMask(), + dstAlphaEnable); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { @@ -218,34 +215,12 @@ FRAGMENTSHADER* PixelShaderCache::GetShader(bool dstAlphaEnable) ERROR_LOG(VIDEO, "failed to create pixel shader"); return NULL; } - INCSTAT(stats.numPixelShadersCreated); SETSTAT(stats.numPixelShadersAlive, pshaders.size()); return pShaderLast; } -void PixelShaderCache::ProgressiveCleanup() -{ - /* - PSCache::iterator iter = pshaders.begin(); - while (iter != pshaders.end()) { - PSCacheEntry &entry = iter->second; - if (entry.frameCount < frameCount - 400) { - entry.Destroy(); -#ifdef _WIN32 - iter = pshaders.erase(iter); -#else - pshaders.erase(iter++); // (this is gcc standard!) -#endif - } - else - iter++; - } - SETSTAT(stats.numPixelShadersAlive, (int)pshaders.size()); - */ -} - bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) { GLenum err = GL_REPORT_ERROR(); @@ -268,7 +243,7 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr } // handle warnings - if(cgGetError() != CG_NO_ERROR) + if (cgGetError() != CG_NO_ERROR) { WARN_LOG(VIDEO, "Warnings on compile ps %s:", cgGetLastListing(g_cgcontext)); WARN_LOG(VIDEO, pstrprogram); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h index 241e7167b2..0fc1082c9b 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h @@ -69,7 +69,6 @@ class PixelShaderCache public: static void Init(); - static void ProgressiveCleanup(); static void Shutdown(); static FRAGMENTSHADER* GetShader(bool dstAlphaEnable); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 1ec6dcbf75..c3a830a738 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -1054,10 +1054,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) GL_REPORT_ERRORD(); - // Clean out old stuff from caches + // Clean out old stuff from caches. It's not worth it to clean out the shader caches. DLCache::ProgressiveCleanup(); - VertexShaderCache::ProgressiveCleanup(); - PixelShaderCache::ProgressiveCleanup(); TextureMngr::ProgressiveCleanup(); frameCount++; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp index 10a80ee75b..85508b8148 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.cpp @@ -47,7 +47,7 @@ static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS+8][4]); void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) { - if( lastVSconstants[const_number][0] != f1 || + if ( lastVSconstants[const_number][0] != f1 || lastVSconstants[const_number][1] != f2 || lastVSconstants[const_number][2] != f3 || lastVSconstants[const_number][3] != f4) @@ -114,9 +114,9 @@ void SetMultiVSConstant3fv(int const_number, int count, const float *f) void VertexShaderCache::Init() { - for( int i=0;i<(C_FOGPARAMS+8)*4;i++) - lastVSconstants[i/4][i%4] = -100000000.0f; - memset(&last_vertex_shader_uid,0xFF,sizeof(last_vertex_shader_uid)); + for (int i = 0; i < (C_FOGPARAMS + 8) * 4; i++) + lastVSconstants[i / 4][i % 4] = -100000000.0f; + memset(&last_vertex_shader_uid, 0xFF, sizeof(last_vertex_shader_uid)); s_displayCompileAlert = true; @@ -138,7 +138,7 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components) { DVSTARTPROFILE(); VERTEXSHADERUID uid; - GetVertexShaderId(uid, components); + GetVertexShaderId(&uid, components); if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount) { @@ -163,7 +163,7 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components) VSCacheEntry& entry = vshaders[uid]; entry.frameCount = frameCount; pShaderLast = &entry.shader; - const char *code = GenerateVertexShader(components, false); + const char *code = GenerateVertexShaderCode(components, false); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { @@ -185,29 +185,6 @@ VERTEXSHADER* VertexShaderCache::GetShader(u32 components) return pShaderLast; } -void VertexShaderCache::ProgressiveCleanup() -{ - /* - VSCache::iterator iter = vshaders.begin(); - while (iter != vshaders.end()) { - VSCacheEntry &entry = iter->second; - if (entry.frameCount < frameCount - 200) { - entry.Destroy(); -#ifdef _WIN32 - iter = vshaders.erase(iter); -#else - vshaders.erase(iter++); -#endif - } - else { - ++iter; - } - } - - SETSTAT(stats.numVertexShadersAlive, vshaders.size()); - */ -} - bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram) { // Reset GL error before compiling shaders. Yeah, we need to investigate the causes of these. @@ -232,7 +209,7 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr return false; } - if(cgGetError() != CG_NO_ERROR) + if (cgGetError() != CG_NO_ERROR) { WARN_LOG(VIDEO, "Failed to load vs %s:", cgGetLastListing(g_cgcontext)); WARN_LOG(VIDEO, pstrprogram); @@ -249,8 +226,7 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr } glGenProgramsARB(1, &vs.glprogid); EnableShader(vs.glprogid); - //glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vs.glprogid); - //CurrentShader = vs.glprogid; + glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog); err = GL_REPORT_ERROR(); if (err != GL_NO_ERROR) { @@ -263,13 +239,13 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr #if defined(_DEBUG) || defined(DEBUGFAST) vs.strprog = pstrprogram; #endif - + return true; } void VertexShaderCache::DisableShader() { - //if(ShaderEnabled) + //if (ShaderEnabled) { CurrentShader = 0; glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); @@ -278,9 +254,11 @@ void VertexShaderCache::DisableShader() } } +// TODO: Why are these if statements commented out? + void VertexShaderCache::SetCurrentShader(GLuint Shader) { - //if(ShaderEnabled && CurrentShader != Shader) + //if (ShaderEnabled && CurrentShader != Shader) { CurrentShader = Shader; glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); @@ -289,13 +267,13 @@ void VertexShaderCache::SetCurrentShader(GLuint Shader) void VertexShaderCache::EnableShader(GLuint Shader) { - //if(!ShaderEnabled) + //if (!ShaderEnabled) { glEnable(GL_VERTEX_PROGRAM_ARB); ShaderEnabled= true; CurrentShader = 0; } - //if(CurrentShader != Shader) + //if (CurrentShader != Shader) { CurrentShader = Shader; glBindProgramARB(GL_VERTEX_PROGRAM_ARB, CurrentShader); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h index 704b3a5787..5ba34bc31e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderCache.h @@ -59,7 +59,6 @@ class VertexShaderCache public: static void Init(); - static void ProgressiveCleanup(); static void Shutdown(); static VERTEXSHADER* GetShader(u32 components);