Merge branch 'new-shadercache-uids'

This branch vastly reduces stuttering caused by redundant shader compilations.
With this code Red Steel 2 has much less stuttering and is actually playable on my hardware now. Other games probably benefit in other ways as well, but don't expect all kinds of stuttering to be magically fixed now.

For reference: Set EnableShaderDebugging to True if you experience any regressions, it should automatically tell you what's wrong then.
This commit is contained in:
NeoBrainX 2011-09-30 17:46:34 +02:00
commit adef86c1ef
22 changed files with 634 additions and 421 deletions

View File

@ -21,12 +21,10 @@
#include "Common.h" #include "Common.h"
#include <fstream> #include <fstream>
// Update this to the current SVN revision every time you change shader generation code. // Increment this every time you change shader generation code.
// We don't automatically get this from SVN_REV because that would mean regenerating the
// shader cache for every revision, graphics-related or not, which is simply annoying.
enum enum
{ {
LINEAR_DISKCACHE_VER = 6964 LINEAR_DISKCACHE_VER = 6967
}; };
// On disk format: // On disk format:

View File

@ -86,7 +86,7 @@ bool MsgAlert(bool yes_no, int Style, const char* format, ...)
va_list args; va_list args;
va_start(args, format); va_start(args, format);
CharArrayFromFormatV(buffer, 2047, str_translator(format).c_str(), args); CharArrayFromFormatV(buffer, sizeof(buffer)-1, str_translator(format).c_str(), args);
va_end(args); va_end(args);
ERROR_LOG(MASTER_LOG, "%s: %s", caption.c_str(), buffer); ERROR_LOG(MASTER_LOG, "%s: %s", caption.c_str(), buffer);

View File

@ -21,6 +21,21 @@
#define WRITE p+=sprintf #define WRITE p+=sprintf
int GetLightingShaderId(u32* out)
{
for (int i = 0; i < xfregs.numChan.numColorChans; ++i)
{
out[i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
out[i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
_assert_(xfregs.numChan.numColorChans <= 2);
return xfregs.numChan.numColorChans;
}
// coloralpha - 1 if color, 2 if alpha // coloralpha - 1 if color, 2 if alpha
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha) char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha)
{ {

View File

@ -18,6 +18,9 @@
#ifndef _LIGHTINGSHADERGEN_H_ #ifndef _LIGHTINGSHADERGEN_H_
#define _LIGHTINGSHADERGEN_H_ #define _LIGHTINGSHADERGEN_H_
#include "CommonTypes.h"
int GetLightingShaderId(u32* out);
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest); char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest);
#endif // _LIGHTINGSHADERGEN_H_ #endif // _LIGHTINGSHADERGEN_H_

View File

@ -27,128 +27,264 @@
#include "VideoConfig.h" #include "VideoConfig.h"
#include "NativeVertexFormat.h" #include "NativeVertexFormat.h"
PIXELSHADERUID last_pixel_shader_uid; static int AlphaPreTest();
static void StageHash(int stage, u32* out)
{
out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24
u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now
out[0] |= (alphaC&0xF0) << 24; // 8
out[1] |= alphaC >> 8; // 16
// reserve 3 bits for bpmem.tevorders[stage/2].getTexMap
out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3
out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1
// reserve 3 bits for bpmem.tevorders[stage/2].getColorChan
bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages;
out[2] |= bHasIndStage << 2; // 1
bool needstexcoord = false;
if (bHasIndStage)
{
out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation
needstexcoord = true;
}
TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC;
if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
|| cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC
|| cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC
|| ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA
|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{
out[0] |= bpmem.combiners[stage].alphaC.rswap;
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2
out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2
out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23;
out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1;
}
out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1);
if (bpmem.tevorders[stage/2].getEnable(stage&1))
{
if (bHasIndStage) needstexcoord = true;
out[0] |= bpmem.combiners[stage].alphaC.tswap;
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2
out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2
out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16;
}
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST
|| ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
{
out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5
out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5
}
if (needstexcoord)
{
out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16;
}
}
// Mash together all the inputs that contribute to the code of a generated pixel shader into // Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot .... // a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes // It would likely be a lot more efficient to build this incrementally as the attributes
// are set... // are set...
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode) void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{ {
u32 numstages = bpmem.genMode.numtevstages + 1; memset(uid->values, 0, sizeof(uid->values));
u32 projtexcoords = 0; uid->values[0] |= bpmem.genMode.numtevstages; // 4
for (u32 i = 0; i < numstages; i++) uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
uid->values[0] |= dstAlphaMode << 8; // 2
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth;
uid->values[0] |= DepthTextureEnable << 10; // 1
bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
uid->values[0] |= enablePL << 11; // 1
if (!enablePL) uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4
u32 alphaPreTest = AlphaPreTest()+1;
uid->values[0] |= alphaPreTest << 16; // 2
if (alphaPreTest == 1 || (alphaPreTest && !DepthTextureEnable && dstAlphaMode == DSTALPHA_ALPHA_PASS))
{ {
if (bpmem.tevorders[i/2].getEnable(i & 1)) // Courtesy of PreAlphaTest, we're done already ;)
{ // NOTE: The comment header of generated shaders depends on the value of bpmem.genmode.numindstages.. shouldnt really bother about that though.
int texcoord = bpmem.tevorders[i / 2].getTexCoord(i & 1); uid->num_values = 1;
if (xfregs.texMtxInfo[i].projection) return;
projtexcoords |= 1 << texcoord;
}
}
uid->values[0] = (u32)bpmem.genMode.numtevstages |
((u32)bpmem.genMode.numindstages << 4) |
((u32)bpmem.genMode.numtexgens << 7) |
((u32)dstAlphaMode << 11) |
((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 13) |
(projtexcoords << 21) |
((u32)bpmem.ztex2.op << 29);
// swap table
for (int i = 0; i < 8; i += 2)
((u8*)&uid->values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4);
u32 enableZTexture = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ? 1 : 0;
uid->values[2] = (u32)bpmem.fog.c_proj_fsel.fsel |
((u32)bpmem.fog.c_proj_fsel.proj << 3) |
((u32)enableZTexture << 4) | ((u32)bpmem.fogRange.Base.Enabled << 5);
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 2; ++i) {
uid->values[3 + i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[3 + i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
}
uid->values[4] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
int hdr = 5;
u32 *pcurvalue = &uid->values[hdr];
for (u32 i = 0; i < numstages; ++i)
{
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC;
u32 val0 = cc.hex & 0xffffff;
u32 val1 = ac.hex & 0xffffff;
val0 |= bpmem.tevksel[i / 2].getKC(i & 1) << 24;
val1 |= bpmem.tevksel[i / 2].getKA(i & 1) << 24;
pcurvalue[0] = val0;
pcurvalue[1] = val1;
pcurvalue += 2;
} }
for (u32 i = 0; i < numstages / 2; ++i) for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
{ {
u32 val0, val1; if (18+i < 32)
if (bpmem.tevorders[i].hex & 0x40) uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1
val0 = bpmem.tevorders[i].hex & 0x3ff;
else else
val0 = bpmem.tevorders[i].hex & 0x380; uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1
if (bpmem.tevorders[i].hex & 0x40000)
val1 = (bpmem.tevorders[i].hex & 0x3ff000) >> 12;
else
val1 = (bpmem.tevorders[i].hex & 0x380000) >> 12;
switch (i % 3) {
case 0: pcurvalue[0] = val0|(val1<<10); break;
case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break;
case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages / 2: %08x", (i % 3));
}
} }
if (numstages & 1) { // odd uid->values[1] = bpmem.genMode.numindstages << 2; // 3
u32 val0; u32 indirectStagesUsed = 0;
if (bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x40) for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x3ff; if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
else indirectStagesUsed |= (1 << bpmem.tevind[i].bt);
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x380;
switch (bpmem.genMode.numtevstages % 3) assert(indirectStagesUsed == (indirectStagesUsed & 0xF));
{
case 0: pcurvalue[0] = val0; break;
case 1: pcurvalue[0] |= val0 << 20; break;
case 2: pcurvalue[1] |= val0 << 10; pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages: %08x", bpmem.genMode.numtevstages % 3);
}
}
if ((bpmem.genMode.numtevstages % 3) != 2) uid->values[1] |= indirectStagesUsed << 5; // 4;
++pcurvalue;
uid->tevstages = (u32)(pcurvalue - &uid->values[0] - hdr); for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
{ {
u32 val = bpmem.tevind[i].hex & 0x1fffff; // 21 bits if (indirectStagesUsed & (1 << i))
switch (i % 3)
{ {
case 0: pcurvalue[0] = val; break; uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (9 + 3*i); // 1
case 1: pcurvalue[0] |= val << 21; pcurvalue[1] = val >> 11; ++pcurvalue; break; if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens)
case 2: pcurvalue[0] |= val << 10; ++pcurvalue; break; uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (10 + 3*i); // 2
default: PanicAlert("Unknown case for Ind Stages: %08x", (i % 3));
} }
} }
// yeah, well .... u32* ptr = &uid->values[2];
uid->indstages = (u32)(pcurvalue - &uid->values[0] - (hdr - 1) - uid->tevstages); for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i)
{
StageHash(i, ptr);
ptr += 4; // max: ptr = &uid->values[66]
}
ptr[0] |= bpmem.alphaFunc.comp0; // 3
ptr[0] |= bpmem.alphaFunc.comp1 << 3; // 3
ptr[0] |= bpmem.alphaFunc.logic << 6; // 2
if (alphaPreTest == 0 || alphaPreTest == 2)
{
ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 8; // 3
if (DepthTextureEnable)
{
ptr[0] |= bpmem.ztex2.op << 11; // 2
ptr[0] |= bpmem.zcontrol.zcomploc << 13; // 1
ptr[0] |= bpmem.zmode.testenable << 14; // 1
ptr[0] |= bpmem.zmode.updateenable << 15; // 1
}
}
if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
{
if (bpmem.fog.c_proj_fsel.fsel != 0)
{
ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1
ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1
}
}
++ptr;
if (enablePL)
{
ptr += GetLightingShaderId(ptr);
*ptr++ = components;
}
uid->num_values = ptr - uid->values;
}
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = dstAlphaMode; // 0
*ptr++ = bpmem.genMode.hex; // 1
*ptr++ = bpmem.ztex2.hex; // 2
*ptr++ = bpmem.zcontrol.hex; // 3
*ptr++ = bpmem.zmode.hex; // 4
*ptr++ = g_ActiveConfig.bEnablePerPixelDepth; // 5
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6
*ptr++ = xfregs.numTexGen.hex; // 7
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
*ptr++ = xfregs.color[0].hex;
*ptr++ = xfregs.alpha[0].hex;
*ptr++ = xfregs.color[1].hex;
*ptr++ = xfregs.alpha[1].hex;
*ptr++ = components;
}
for (unsigned int i = 0; i < 8; ++i)
*ptr++ = xfregs.texMtxInfo[i].hex; // 8-15
for (unsigned int i = 0; i < 16; ++i)
*ptr++ = bpmem.tevind[i].hex; // 16-31
*ptr++ = bpmem.tevindref.hex; // 32
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times
{
*ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
*ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
*ptr++ = bpmem.tevind[i].hex; // 35+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
}
ptr = &uid->values[113];
*ptr++ = bpmem.alphaFunc.hex; // 113
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
*ptr++ = bpmem.fogRange.Base.hex; // 115
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
PIXELSHADERUIDSAFE new_id;
GetSafePixelShaderId(&new_id, dstAlphaMode, components);
if (!(old_id == new_id))
{
std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file(szTemp);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
} }
// old tev->pixelshader notes // old tev->pixelshader notes
@ -165,7 +301,6 @@ static void SampleTexture(char *&p, const char *destination, const char *texcoor
// static void WriteAlphaCompare(char *&p, int num, int comp); // static void WriteAlphaCompare(char *&p, int num, int comp);
static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode); static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode);
static void WriteFog(char *&p); static void WriteFog(char *&p);
static int AlphaPreTest();
static const char *tevKSelTableC[] = // KCSEL static const char *tevKSelTableC[] = // KCSEL
{ {
@ -333,12 +468,6 @@ static const char *tevRasTable[] =
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero "float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
}; };
static const char *alphaRef[2] =
{
I_ALPHA"[0].r",
I_ALPHA"[0].g"
};
//static const char *tevTexFunc[] = { "tex2D", "texRECT" }; //static const char *tevTexFunc[] = { "tex2D", "texRECT" };
static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
@ -353,23 +482,14 @@ static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
#define WRITE p+=sprintf #define WRITE p+=sprintf
static const char *swapColors = "rgba";
static char swapModeTable[4][5]; static char swapModeTable[4][5];
static char text[16384]; static char text[16384];
static bool DepthTextureEnable; static bool DepthTextureEnable;
struct RegisterState
{
bool ColorNeedOverflowControl;
bool AlphaNeedOverflowControl;
bool AuxStored;
};
static RegisterState RegisterStates[4];
static void BuildSwapModeTable() static void BuildSwapModeTable()
{ {
static const char *swapColors = "rgba";
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1]; swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
@ -385,14 +505,14 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
setlocale(LC_NUMERIC, "C"); // Reset locale for compilation setlocale(LC_NUMERIC, "C"); // Reset locale for compilation
text[sizeof(text) - 1] = 0x7C; // canary text[sizeof(text) - 1] = 0x7C; // canary
BuildSwapModeTable(); BuildSwapModeTable(); // Needed for WriteStage
int numStages = bpmem.genMode.numtevstages + 1; int numStages = bpmem.genMode.numtevstages + 1;
int numTexgen = bpmem.genMode.numtexgens; int numTexgen = bpmem.genMode.numtexgens;
char *p = text; char *p = text;
WRITE(p, "//Pixel Shader for TEV stages\n"); WRITE(p, "//Pixel Shader for TEV stages\n");
WRITE(p, "//%i TEV stages, %i texgens, %i IND stages\n", WRITE(p, "//%i TEV stages, %i texgens, XXX IND stages\n",
numStages, numTexgen, bpmem.genMode.numindstages); numStages, numTexgen/*, bpmem.genMode.numindstages*/);
int nIndirectStagesUsed = 0; int nIndirectStagesUsed = 0;
if (bpmem.genMode.numindstages > 0) if (bpmem.genMode.numindstages > 0)
@ -505,10 +625,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{ {
// alpha test will always fail, so restart the shader and just make it an empty function // alpha test will always fail, so restart the shader and just make it an empty function
WRITE(p, "ocol0 = 0;\n"); WRITE(p, "ocol0 = 0;\n");
if(DepthTextureEnable)
WRITE(p, "depth = 1.f;\n");
if(dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
WRITE(p, "ocol1 = 0;\n");
WRITE(p, "discard;\n"); WRITE(p, "discard;\n");
if(ApiType != API_D3D11) if(ApiType != API_D3D11)
WRITE(p, "return;\n"); WRITE(p, "return;\n");
@ -594,16 +710,6 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
} }
} }
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
RegisterStates[0].AuxStored = false;
for(int i = 1; i < 4; i++)
{
RegisterStates[i].AlphaNeedOverflowControl = true;
RegisterStates[i].ColorNeedOverflowControl = true;
RegisterStates[i].AuxStored = false;
}
for (int i = 0; i < numStages; i++) for (int i = 0; i < numStages; i++)
WriteStage(p, i, ApiType); //build the equation for this stage WriteStage(p, i, ApiType); //build the equation for this stage
@ -611,23 +717,13 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
{ {
// The results of the last texenv stage are put onto the screen, // The results of the last texenv stage are put onto the screen,
// regardless of the used destination register // regardless of the used destination register
if(bpmem.combiners[numStages - 1].colorC.dest != 0) WRITE(p, "prev.rgb = %s;\n",tevCOutputTable[bpmem.combiners[numStages-1].colorC.dest]);
{ WRITE(p, "prev.a = %s;\n",tevAOutputTable[bpmem.combiners[numStages-1].alphaC.dest]);
bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored;
WRITE(p, "prev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl;
}
if(bpmem.combiners[numStages - 1].alphaC.dest != 0)
{
bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored;
WRITE(p, "prev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl;
}
} }
// emulation of unisgned 8 overflow when casting if needed // emulation of unsigned 8 overflow when casting
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) WRITE(p, "prev = frac(4.0f + prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
WRITE(p, "prev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
// TODO: Why are we doing a second alpha pretest here?
if (!WriteAlphaTest(p, ApiType, dstAlphaMode)) if (!WriteAlphaTest(p, ApiType, dstAlphaMode))
{ {
// alpha test will always fail, so restart the shader and just make it an empty function // alpha test will always fail, so restart the shader and just make it an empty function
@ -742,10 +838,6 @@ static const char *TEVCMPAlphaOPTable[16] =
static void WriteStage(char *&p, int n, API_TYPE ApiType) static void WriteStage(char *&p, int n, API_TYPE ApiType)
{ {
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages; bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages;
@ -754,8 +846,11 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (!bHasTexCoord) if (!bHasTexCoord)
texcoord = 0; texcoord = 0;
WRITE(p, "// TEV stage %d\n", n);
if (bHasIndStage) if (bHasIndStage)
{ {
WRITE(p, "// indirect op\n");
// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
if (bpmem.tevind[n].bs != ITBA_OFF) if (bpmem.tevind[n].bs != ITBA_OFF)
{ {
@ -782,11 +877,13 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
} }
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix { // s matrix
_assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5); int mtxidx = 2*(bpmem.tevind[n].mid-5);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
} }
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix { // t matrix
_assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9); int mtxidx = 2*(bpmem.tevind[n].mid-9);
WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); WRITE(p, "float2 indtevtrans%d = "I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
} }
@ -825,11 +922,15 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
bool bCRas = cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC || cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC || cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC; // blah1
bool bARas = ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA || ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA; if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
if(bCRas || bARas) || cc.c == TEVCOLORARG_RASA || cc.c == TEVCOLORARG_RASC
|| cc.d == TEVCOLORARG_RASA || cc.d == TEVCOLORARG_RASC
|| ac.a == TEVALPHAARG_RASA || ac.b == TEVALPHAARG_RASA
|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
{ {
char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); WRITE(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n"); WRITE(p, "crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n");
} }
@ -837,7 +938,6 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (bpmem.tevorders[n/2].getEnable(n&1)) if (bpmem.tevorders[n/2].getEnable(n&1))
{ {
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
if(!bHasIndStage) if(!bHasIndStage)
{ {
// calc tevcord // calc tevcord
@ -847,20 +947,20 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n"); WRITE(p, "tevcoord.xy = float2(0.0f, 0.0f);\n");
} }
char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType); SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType);
} }
else else
WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); WRITE(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
// blah2
if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST
bool bCKonst = cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || cc.c == TEVCOLORARG_KONST || cc.d == TEVCOLORARG_KONST; || ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST)
bool bAKonst = ac.a == TEVALPHAARG_KONST || ac.b == TEVALPHAARG_KONST || ac.c == TEVALPHAARG_KONST || ac.d == TEVALPHAARG_KONST;
if (bCKonst || bAKonst )
{ {
int kc = bpmem.tevksel[n / 2].getKC(n & 1);
int ka = bpmem.tevksel[n / 2].getKA(n & 1);
WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); WRITE(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
if(kc > 7 || ka > 7) if(kc > 7 || ka > 7)
{ {
@ -872,100 +972,35 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
} }
} }
if(cc.a == TEVCOLORARG_CPREV if(cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV
|| cc.a == TEVCOLORARG_APREV || cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV
|| cc.b == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV
|| cc.b == TEVCOLORARG_APREV || ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || ac.c == TEVALPHAARG_APREV)
|| cc.c == TEVCOLORARG_CPREV WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
|| cc.c == TEVCOLORARG_APREV
|| ac.a == TEVALPHAARG_APREV
|| ac.b == TEVALPHAARG_APREV
|| ac.c == TEVALPHAARG_APREV)
{
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
{
WRITE(p, "cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cprev = prev;\n");
}
RegisterStates[0].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C0
|| cc.a == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_C0
|| cc.b == TEVCOLORARG_A0
|| cc.c == TEVCOLORARG_C0
|| cc.c == TEVCOLORARG_A0
|| ac.a == TEVALPHAARG_A0
|| ac.b == TEVALPHAARG_A0
|| ac.c == TEVALPHAARG_A0)
{
if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
{
WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[1].AlphaNeedOverflowControl = false;
RegisterStates[1].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc0 = c0;\n");
}
RegisterStates[1].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C1 if(cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0
|| cc.a == TEVCOLORARG_A1 || cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0
|| cc.b == TEVCOLORARG_A1 || ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || ac.c == TEVALPHAARG_A0)
|| cc.c == TEVCOLORARG_C1 WRITE(p, "cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
|| cc.c == TEVCOLORARG_A1
|| ac.a == TEVALPHAARG_A1
|| ac.b == TEVALPHAARG_A1
|| ac.c == TEVALPHAARG_A1)
{
if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
{
WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[2].AlphaNeedOverflowControl = false;
RegisterStates[2].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc1 = c1;\n");
}
RegisterStates[2].AuxStored = true;
}
if(cc.a == TEVCOLORARG_C2
|| cc.a == TEVCOLORARG_A2 if(cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1
|| cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1
|| cc.b == TEVCOLORARG_A2 || cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1
|| cc.c == TEVCOLORARG_C2 || ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || ac.c == TEVALPHAARG_A1)
|| cc.c == TEVCOLORARG_A2 WRITE(p, "cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
|| ac.a == TEVALPHAARG_A2
|| ac.b == TEVALPHAARG_A2
|| ac.c == TEVALPHAARG_A2) if(cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2
{ || cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2
if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) || cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2
{ || ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || ac.c == TEVALPHAARG_A2)
WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n"); WRITE(p, "cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
RegisterStates[3].AlphaNeedOverflowControl = false;
RegisterStates[3].ColorNeedOverflowControl = false;
}
else
{
WRITE(p, "cc2 = c2;\n");
}
RegisterStates[3].AuxStored = true;
}
RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
RegisterStates[cc.dest].AuxStored = false; WRITE(p, "// color combine\n");
if (cc.clamp) if (cc.clamp)
WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]); WRITE(p, "%s = saturate(", tevCOutputTable[cc.dest]);
else else
@ -996,7 +1031,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, "%s", tevBiasTable[cc.bias]); WRITE(p, "%s", tevBiasTable[cc.bias]);
if (cc.shift > 0) if (cc.shift > TEVSCALE_1)
WRITE(p, ")"); WRITE(p, ")");
} }
else else
@ -1012,8 +1047,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
WRITE(p, ")"); WRITE(p, ")");
WRITE(p,";\n"); WRITE(p,";\n");
RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); WRITE(p, "// alpha combine\n");
RegisterStates[ac.dest].AuxStored = false;
// combine the alpha channel // combine the alpha channel
if (ac.clamp) if (ac.clamp)
WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]); WRITE(p, "%s = saturate(", tevAOutputTable[ac.dest]);
@ -1059,6 +1093,7 @@ static void WriteStage(char *&p, int n, API_TYPE ApiType)
if (ac.clamp) if (ac.clamp)
WRITE(p, ")"); WRITE(p, ")");
WRITE(p, ";\n\n"); WRITE(p, ";\n\n");
WRITE(p, "// TEV done\n");
} }
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
@ -1124,6 +1159,11 @@ static int AlphaPreTest()
static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode) static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode)
{ {
static const char *alphaRef[2] =
{
I_ALPHA"[0].r",
I_ALPHA"[0].g"
};
int Pretest = AlphaPreTest(); int Pretest = AlphaPreTest();
if(Pretest >= 0) if(Pretest >= 0)
@ -1141,7 +1181,10 @@ static bool WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode
compindex = bpmem.alphaFunc.comp1 % 8; compindex = bpmem.alphaFunc.comp1 % 8;
WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
WRITE(p, ")){ocol0 = 0;%s%sdiscard;%s}\n",dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "",DepthTextureEnable ? "depth = 1.f;" : "",(ApiType != API_D3D11)? "return;" : ""); WRITE(p, ")){ocol0 = 0;%s%s discard;%s}\n",
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "ocol1 = 0;" : "",
DepthTextureEnable ? "depth = 1.f;" : "",
(ApiType != API_D3D11) ? "return;" : "");
return true; return true;
} }
@ -1199,4 +1242,4 @@ static void WriteFog(char *&p)
WRITE(p, " prev.rgb = lerp(prev.rgb,"I_FOG"[0].rgb,fog);\n"); WRITE(p, " prev.rgb = lerp(prev.rgb,"I_FOG"[0].rgb,fog);\n");
} }

View File

@ -44,44 +44,42 @@
#define C_PLIGHTS (C_FOG + 3) #define C_PLIGHTS (C_FOG + 3)
#define C_PMATERIALS (C_PLIGHTS + 40) #define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4) #define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11 + 2) #define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 120
// DO NOT make anything in this class virtual. // DO NOT make anything in this class virtual.
class PIXELSHADERUID template<bool safe>
class _PIXELSHADERUID
{ {
public: public:
u32 values[PIXELSHADERUID_MAX_VALUES]; u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
u16 tevstages, indstages; int num_values;
PIXELSHADERUID() _PIXELSHADERUID()
{ {
memset(values, 0, PIXELSHADERUID_MAX_VALUES * 4);
tevstages = indstages = 0;
} }
PIXELSHADERUID(const PIXELSHADERUID& r) _PIXELSHADERUID(const _PIXELSHADERUID& r)
{ {
tevstages = r.tevstages; num_values = r.num_values;
indstages = r.indstages; if (safe) memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE);
int N = GetNumValues(); else memcpy(values, r.values, r.GetNumValues() * sizeof(values[0]));
_assert_(N <= PIXELSHADERUID_MAX_VALUES);
for (int i = 0; i < N; ++i)
values[i] = r.values[i];
} }
int GetNumValues() const int GetNumValues() const
{ {
return tevstages + indstages + 4; if (safe) return (sizeof(values) / sizeof(u32));
else return num_values;
} }
bool operator <(const PIXELSHADERUID& _Right) const bool operator <(const _PIXELSHADERUID& _Right) const
{ {
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues(); int N = GetNumValues();
for (int i = 1; i < N; ++i) if (N < _Right.GetNumValues())
return true;
else if (N > _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{ {
if (values[i] < _Right.values[i]) if (values[i] < _Right.values[i])
return true; return true;
@ -91,12 +89,12 @@ public:
return false; return false;
} }
bool operator ==(const PIXELSHADERUID& _Right) const bool operator ==(const _PIXELSHADERUID& _Right) const
{ {
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues(); int N = GetNumValues();
for (int i = 1; i < N; ++i) if (N != _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{ {
if (values[i] != _Right.values[i]) if (values[i] != _Right.values[i])
return false; return false;
@ -104,6 +102,8 @@ public:
return true; return true;
} }
}; };
typedef _PIXELSHADERUID<false> PIXELSHADERUID;
typedef _PIXELSHADERUID<true> PIXELSHADERUIDSAFE;
// Different ways to achieve rendering with destination alpha // Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE enum DSTALPHA_MODE
@ -114,8 +114,11 @@ enum DSTALPHA_MODE
}; };
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components); const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode);
extern PIXELSHADERUID last_pixel_shader_uid; void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components);
#endif // GCOGL_PIXELSHADER_H #endif // GCOGL_PIXELSHADER_H

View File

@ -26,25 +26,19 @@
#include "VertexShaderGen.h" #include "VertexShaderGen.h"
#include "VideoConfig.h" #include "VideoConfig.h"
VERTEXSHADERUID last_vertex_shader_uid;
// Mash together all the inputs that contribute to the code of a generated vertex shader into // Mash together all the inputs that contribute to the code of a generated vertex shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot .... // a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
{ {
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] = components | uid->values[0] = components |
(xfregs.numTexGen.numTexGens << 23) | (xfregs.numTexGen.numTexGens << 23) |
(xfregs.numChan.numColorChans << 27) | (xfregs.numChan.numColorChans << 27) |
(xfregs.dualTexTrans.enabled << 29); (xfregs.dualTexTrans.enabled << 29);
for (int i = 0; i < 2; ++i) { // TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here?
uid->values[1+i] = xfregs.color[i].enablelighting ? GetLightingShaderId(&uid->values[1]);
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[1+i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31; uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
u32 *pcurvalue = &uid->values[3]; u32 *pcurvalue = &uid->values[3];
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) {
@ -69,6 +63,69 @@ void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
} }
} }
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components)
{
// Just store all used registers here without caring whether we need all bits or less.
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = components;
*ptr++ = xfregs.numTexGen.hex;
*ptr++ = xfregs.numChan.hex;
*ptr++ = xfregs.dualTexTrans.hex;
for (int i = 0; i < 2; ++i) {
*ptr++ = xfregs.color[i].hex;
*ptr++ = xfregs.alpha[i].hex;
}
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
for (unsigned int i = 0; i < 8; ++i) {
*ptr++ = xfregs.texMtxInfo[i].hex;
*ptr++ = xfregs.postMtxInfo[i].hex;
}
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
VERTEXSHADERUIDSAFE new_id;
GetSafeVertexShaderId(&new_id, components);
if (!(old_id == new_id))
{
std::string new_code(GenerateVertexShaderCode(components, api));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file(szTemp);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
static char text[16384]; static char text[16384];
#define WRITE p+=sprintf #define WRITE p+=sprintf
@ -244,7 +301,8 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type)
else else
WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
} }
// TODO: This probably isn't necessary if pixel lighting is enabled.
p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
if(xfregs.numChan.numColorChans < 2) if(xfregs.numChan.numColorChans < 2)

View File

@ -48,17 +48,18 @@
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) #define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 4) #define C_VENVCONST_END (C_DEPTHPARAMS + 4)
class VERTEXSHADERUID template<bool safe>
class _VERTEXSHADERUID
{ {
#define NUM_VSUID_VALUES_SAFE 25
public: public:
u32 values[9]; u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9];
VERTEXSHADERUID() _VERTEXSHADERUID()
{ {
memset(values, 0, sizeof(values));
} }
VERTEXSHADERUID(const VERTEXSHADERUID& r) _VERTEXSHADERUID(const _VERTEXSHADERUID& r)
{ {
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i]; values[i] = r.values[i];
@ -66,10 +67,11 @@ public:
int GetNumValues() const int GetNumValues() const
{ {
return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1 if (safe) return NUM_VSUID_VALUES_SAFE;
else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
} }
bool operator <(const VERTEXSHADERUID& _Right) const bool operator <(const _VERTEXSHADERUID& _Right) const
{ {
if (values[0] < _Right.values[0]) if (values[0] < _Right.values[0])
return true; return true;
@ -86,7 +88,7 @@ public:
return false; return false;
} }
bool operator ==(const VERTEXSHADERUID& _Right) const bool operator ==(const _VERTEXSHADERUID& _Right) const
{ {
if (values[0] != _Right.values[0]) if (values[0] != _Right.values[0])
return false; return false;
@ -99,14 +101,18 @@ public:
return true; return true;
} }
}; };
typedef _VERTEXSHADERUID<false> VERTEXSHADERUID;
typedef _VERTEXSHADERUID<true> VERTEXSHADERUIDSAFE;
// components is included in the uid. // components is included in the uid.
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type); char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type);
const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type); const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type);
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
extern VERTEXSHADERUID last_vertex_shader_uid; void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components);
// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components);
#endif // GCOGL_VERTEXSHADER_H #endif // GCOGL_VERTEXSHADER_H

View File

@ -96,6 +96,8 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "EnableOpenCL", &bEnableOpenCL, false); iniFile.Get("Settings", "EnableOpenCL", &bEnableOpenCL, false);
iniFile.Get("Settings", "OMPDecoder", &bOMPDecoder, false); iniFile.Get("Settings", "OMPDecoder", &bOMPDecoder, false);
iniFile.Get("Settings", "EnableShaderDebugging", &bEnableShaderDebugging, false);
iniFile.Get("Enhancements", "ForceFiltering", &bForceFiltering, 0); iniFile.Get("Enhancements", "ForceFiltering", &bForceFiltering, 0);
iniFile.Get("Enhancements", "MaxAnisotropy", &iMaxAnisotropy, 0); // NOTE - this is x in (1 << x) iniFile.Get("Enhancements", "MaxAnisotropy", &iMaxAnisotropy, 0); // NOTE - this is x in (1 << x)
iniFile.Get("Enhancements", "PostProcessingShader", &sPostProcessingShader, ""); iniFile.Get("Enhancements", "PostProcessingShader", &sPostProcessingShader, "");
@ -231,6 +233,8 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "EnableOpenCL", bEnableOpenCL); iniFile.Set("Settings", "EnableOpenCL", bEnableOpenCL);
iniFile.Set("Settings", "OMPDecoder", bOMPDecoder); iniFile.Set("Settings", "OMPDecoder", bOMPDecoder);
iniFile.Set("Settings", "EnableShaderDebugging", bEnableShaderDebugging);
iniFile.Set("Enhancements", "ForceFiltering", bForceFiltering); iniFile.Set("Enhancements", "ForceFiltering", bForceFiltering);
iniFile.Set("Enhancements", "MaxAnisotropy", iMaxAnisotropy); iniFile.Set("Enhancements", "MaxAnisotropy", iMaxAnisotropy);
iniFile.Set("Enhancements", "PostProcessingShader", sPostProcessingShader); iniFile.Set("Enhancements", "PostProcessingShader", sPostProcessingShader);

View File

@ -147,6 +147,9 @@ struct VideoConfig
// D3D only config, mostly to be merged into the above // D3D only config, mostly to be merged into the above
int iAdapter; int iAdapter;
// Debugging
bool bEnableShaderDebugging;
// Static config per API // Static config per API
// TODO: Move this out of VideoConfig // TODO: Move this out of VideoConfig
struct struct

View File

@ -41,6 +41,7 @@ namespace DX11
PixelShaderCache::PSCache PixelShaderCache::PixelShaders; PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry; const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache; LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
@ -412,6 +413,11 @@ void PixelShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
PixelShaderCacheInserter inserter; PixelShaderCacheInserter inserter;
g_ps_disk_cache.OpenAndRead(cache_filename, inserter); g_ps_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
last_entry = NULL;
} }
// ONLY to be used during shutdown. // ONLY to be used during shutdown.
@ -420,6 +426,8 @@ void PixelShaderCache::Clear()
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy(); iter->second.Destroy();
PixelShaders.clear(); PixelShaders.clear();
last_entry = NULL;
} }
// Used in Swap() when AA mode has changed // Used in Swap() when AA mode has changed
@ -454,28 +462,31 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{ {
PIXELSHADERUID uid; PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode); GetPixelShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set // Check if the shader is already set
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount) if (last_entry)
{ {
PSCache::const_iterator iter = PixelShaders.find(uid); if (uid == last_uid)
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); {
return (iter != PixelShaders.end() && iter->second.shader); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return (last_entry->shader != NULL);
}
} }
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID)); last_uid = uid;
// Check if the shader is already in the cache // Check if the shader is already in the cache
PSCache::iterator iter; PSCache::iterator iter;
iter = PixelShaders.find(uid); iter = PixelShaders.find(uid);
if (iter != PixelShaders.end()) if (iter != PixelShaders.end())
{ {
iter->second.frameCount = frameCount;
const PSCacheEntry &entry = iter->second; const PSCacheEntry &entry = iter->second;
last_entry = &entry; last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
@ -491,12 +502,18 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
// Insert the bytecode into the caches // Insert the bytecode into the caches
g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
g_ps_disk_cache.Sync();
bool result = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size()); bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
pbytecode->Release(); pbytecode->Release();
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return result; return success;
} }
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen) bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen)
@ -511,7 +528,6 @@ bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* byt
// Make an entry in the table // Make an entry in the table
PSCacheEntry newentry; PSCacheEntry newentry;
newentry.shader = shader; newentry.shader = shader;
newentry.frameCount = frameCount;
PixelShaders[uid] = newentry; PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid]; last_entry = &PixelShaders[uid];

View File

@ -17,11 +17,12 @@
#pragma once #pragma once
#include <map> #include "PixelShaderGen.h"
#include <d3d11.h> #include <d3d11.h>
class PIXELSHADERUID; #include <map>
enum DSTALPHA_MODE; enum DSTALPHA_MODE;
namespace DX11 namespace DX11
@ -52,9 +53,11 @@ private:
struct PSCacheEntry struct PSCacheEntry
{ {
ID3D11PixelShader* shader; ID3D11PixelShader* shader;
int frameCount;
PSCacheEntry() : shader(NULL), frameCount(0) {} PIXELSHADERUIDSAFE safe_uid;
std::string code;
PSCacheEntry() : shader(NULL) {}
void Destroy() { SAFE_RELEASE(shader); } void Destroy() { SAFE_RELEASE(shader); }
}; };
@ -62,6 +65,7 @@ private:
static PSCache PixelShaders; static PSCache PixelShaders;
static const PSCacheEntry* last_entry; static const PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid;
}; };
} // namespace DX11 } // namespace DX11

View File

@ -37,6 +37,7 @@ namespace DX11 {
VertexShaderCache::VSCache VertexShaderCache::vshaders; VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
static ID3D11VertexShader* SimpleVertexShader = NULL; static ID3D11VertexShader* SimpleVertexShader = NULL;
static ID3D11VertexShader* ClearVertexShader = NULL; static ID3D11VertexShader* ClearVertexShader = NULL;
@ -174,6 +175,11 @@ void VertexShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
VertexShaderCacheInserter inserter; VertexShaderCacheInserter inserter;
g_vs_disk_cache.OpenAndRead(cache_filename, inserter); g_vs_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
last_entry = NULL;
} }
void VertexShaderCache::Clear() void VertexShaderCache::Clear()
@ -181,6 +187,8 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy(); iter->second.Destroy();
vshaders.clear(); vshaders.clear();
last_entry = NULL;
} }
void VertexShaderCache::Shutdown() void VertexShaderCache::Shutdown()
@ -202,22 +210,26 @@ bool VertexShaderCache::SetShader(u32 components)
{ {
VERTEXSHADERUID uid; VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components); GetVertexShaderId(&uid, components);
if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount) if (last_entry)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); if (uid == last_uid)
return (vshaders[uid].shader != NULL); {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
} }
memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID)); last_uid = uid;
VSCache::iterator iter = vshaders.find(uid); VSCache::iterator iter = vshaders.find(uid);
if (iter != vshaders.end()) if (iter != vshaders.end())
{ {
iter->second.frameCount = frameCount;
const VSCacheEntry &entry = iter->second; const VSCacheEntry &entry = iter->second;
last_entry = &entry; last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
@ -232,12 +244,18 @@ bool VertexShaderCache::SetShader(u32 components)
return false; return false;
} }
g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size()); g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
g_vs_disk_cache.Sync();
bool result = InsertByteCode(uid, pbytecode); bool success = InsertByteCode(uid, pbytecode);
pbytecode->Release(); pbytecode->Release();
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return result; return success;
} }
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob) bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob)
@ -252,7 +270,6 @@ bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcod
// Make an entry in the table // Make an entry in the table
VSCacheEntry entry; VSCacheEntry entry;
entry.shader = shader; entry.shader = shader;
entry.frameCount = frameCount;
entry.SetByteCode(bcodeblob); entry.SetByteCode(bcodeblob);
vshaders[uid] = entry; vshaders[uid] = entry;

View File

@ -18,12 +18,12 @@
#ifndef _VERTEXSHADERCACHE_H #ifndef _VERTEXSHADERCACHE_H
#define _VERTEXSHADERCACHE_H #define _VERTEXSHADERCACHE_H
#include <map> #include "VertexShaderGen.h"
#include "D3DBase.h" #include "D3DBase.h"
#include "D3DBlob.h" #include "D3DBlob.h"
class VERTEXSHADERUID; #include <map>
namespace DX11 { namespace DX11 {
@ -51,9 +51,11 @@ private:
{ {
ID3D11VertexShader* shader; ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout D3DBlob* bytecode; // needed to initialize the input layout
int frameCount;
VSCacheEntry() : shader(NULL), bytecode(NULL), frameCount(0) {} VERTEXSHADERUIDSAFE safe_uid;
std::string code;
VSCacheEntry() : shader(NULL), bytecode(NULL) {}
void SetByteCode(D3DBlob* blob) void SetByteCode(D3DBlob* blob)
{ {
SAFE_RELEASE(bytecode); SAFE_RELEASE(bytecode);
@ -70,6 +72,7 @@ private:
static VSCache vshaders; static VSCache vshaders;
static const VSCacheEntry* last_entry; static const VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid;
}; };
} // namespace DX11 } // namespace DX11

View File

@ -43,6 +43,7 @@ namespace DX9
PixelShaderCache::PSCache PixelShaderCache::PixelShaders; PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry; const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache; static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders; static std::set<u32> unique_shaders;
@ -233,6 +234,8 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
void PixelShaderCache::Init() void PixelShaderCache::Init()
{ {
last_entry = NULL;
//program used for clear screen //program used for clear screen
{ {
char pprog[3072]; char pprog[3072];
@ -283,6 +286,9 @@ void PixelShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
PixelShaderCacheInserter inserter; PixelShaderCacheInserter inserter;
g_ps_disk_cache.OpenAndRead(cache_filename, inserter); g_ps_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
} }
// ONLY to be used during shutdown. // ONLY to be used during shutdown.
@ -292,7 +298,7 @@ void PixelShaderCache::Clear()
iter->second.Destroy(); iter->second.Destroy();
PixelShaders.clear(); PixelShaders.clear();
memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid)); last_entry = NULL;
} }
void PixelShaderCache::Shutdown() void PixelShaderCache::Shutdown()
@ -326,41 +332,47 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{ {
const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30;
PIXELSHADERUID uid; PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode); GetPixelShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set // Check if the shader is already set
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount) if (last_entry)
{ {
PSCache::const_iterator iter = PixelShaders.find(uid); if (uid == last_uid)
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); {
return (iter != PixelShaders.end() && iter->second.shader); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return last_entry->shader != NULL;
}
} }
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID)); last_uid = uid;
// Check if the shader is already in the cache // Check if the shader is already in the cache
PSCache::iterator iter; PSCache::iterator iter;
iter = PixelShaders.find(uid); iter = PixelShaders.find(uid);
if (iter != PixelShaders.end()) if (iter != PixelShaders.end())
{ {
iter->second.frameCount = frameCount;
const PSCacheEntry &entry = iter->second; const PSCacheEntry &entry = iter->second;
last_entry = &entry; last_entry = &entry;
if (entry.shader) D3D::SetPixelShader(entry.shader); if (entry.shader) D3D::SetPixelShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
// Need to compile a new shader // Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30, components); const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components);
u32 code_hash = HashAdler32((const u8 *)code, strlen(code)); if (g_ActiveConfig.bEnableShaderDebugging)
unique_shaders.insert(code_hash); {
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size()); u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
unique_shaders.insert(code_hash);
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size());
}
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
@ -381,14 +393,19 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
// Insert the bytecode into the caches // Insert the bytecode into the caches
g_ps_disk_cache.Append(uid, bytecode, bytecodelen); g_ps_disk_cache.Append(uid, bytecode, bytecodelen);
g_ps_disk_cache.Sync();
// And insert it into the shader cache. // And insert it into the shader cache.
bool result = InsertByteCode(uid, bytecode, bytecodelen, true); bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
delete [] bytecode; delete [] bytecode;
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return result; return success;
} }
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate)
@ -398,7 +415,6 @@ bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytec
// Make an entry in the table // Make an entry in the table
PSCacheEntry newentry; PSCacheEntry newentry;
newentry.shader = shader; newentry.shader = shader;
newentry.frameCount = frameCount;
PixelShaders[uid] = newentry; PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid]; last_entry = &PixelShaders[uid];

View File

@ -40,9 +40,11 @@ private:
{ {
LPDIRECT3DPIXELSHADER9 shader; LPDIRECT3DPIXELSHADER9 shader;
bool owns_shader; bool owns_shader;
int frameCount;
PSCacheEntry() : shader(NULL), owns_shader(true), frameCount(0) {} PIXELSHADERUIDSAFE safe_uid;
std::string code;
PSCacheEntry() : shader(NULL), owns_shader(true) {}
void Destroy() void Destroy()
{ {
if (shader && owns_shader) if (shader && owns_shader)
@ -55,6 +57,7 @@ private:
static PSCache PixelShaders; static PSCache PixelShaders;
static const PSCacheEntry *last_entry; static const PSCacheEntry *last_entry;
static PIXELSHADERUID last_uid;
static void Clear(); static void Clear();
public: public:

View File

@ -38,6 +38,7 @@ namespace DX9
VertexShaderCache::VSCache VertexShaderCache::vshaders; VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
#define MAX_SSAA_SHADERS 3 #define MAX_SSAA_SHADERS 3
@ -151,6 +152,11 @@ void VertexShaderCache::Init()
SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str()); SConfig::GetInstance().m_LocalCoreStartupParameter.m_strUniqueID.c_str());
VertexShaderCacheInserter inserter; VertexShaderCacheInserter inserter;
g_vs_disk_cache.OpenAndRead(cache_filename, inserter); g_vs_disk_cache.OpenAndRead(cache_filename, inserter);
if (g_Config.bEnableShaderDebugging)
Clear();
last_entry = NULL;
} }
void VertexShaderCache::Clear() void VertexShaderCache::Clear()
@ -159,7 +165,7 @@ void VertexShaderCache::Clear()
iter->second.Destroy(); iter->second.Destroy();
vshaders.clear(); vshaders.clear();
memset(&last_vertex_shader_uid, 0xFF, sizeof(last_vertex_shader_uid)); last_entry = NULL;
} }
void VertexShaderCache::Shutdown() void VertexShaderCache::Shutdown()
@ -184,23 +190,27 @@ bool VertexShaderCache::SetShader(u32 components)
{ {
VERTEXSHADERUID uid; VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components); GetVertexShaderId(&uid, components);
if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount) if (last_entry)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); if (uid == last_uid)
return (vshaders[uid].shader != NULL); {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
} }
memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID)); last_uid = uid;
VSCache::iterator iter = vshaders.find(uid); VSCache::iterator iter = vshaders.find(uid);
if (iter != vshaders.end()) if (iter != vshaders.end())
{ {
iter->second.frameCount = frameCount;
const VSCacheEntry &entry = iter->second; const VSCacheEntry &entry = iter->second;
last_entry = &entry; last_entry = &entry;
if (entry.shader) D3D::SetVertexShader(entry.shader); if (entry.shader) D3D::SetVertexShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
@ -213,12 +223,16 @@ bool VertexShaderCache::SetShader(u32 components)
return false; return false;
} }
g_vs_disk_cache.Append(uid, bytecode, bytecodelen); g_vs_disk_cache.Append(uid, bytecode, bytecodelen);
g_vs_disk_cache.Sync();
bool result = InsertByteCode(uid, bytecode, bytecodelen, true); bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
}
delete [] bytecode; delete [] bytecode;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return result; return success;
} }
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) { bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
@ -227,7 +241,6 @@ bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *byt
// Make an entry in the table // Make an entry in the table
VSCacheEntry entry; VSCacheEntry entry;
entry.shader = shader; entry.shader = shader;
entry.frameCount = frameCount;
vshaders[uid] = entry; vshaders[uid] = entry;
last_entry = &vshaders[uid]; last_entry = &vshaders[uid];

View File

@ -34,11 +34,11 @@ private:
struct VSCacheEntry struct VSCacheEntry
{ {
LPDIRECT3DVERTEXSHADER9 shader; LPDIRECT3DVERTEXSHADER9 shader;
int frameCount;
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string code; std::string code;
#endif VERTEXSHADERUIDSAFE safe_uid;
VSCacheEntry() : shader(NULL), frameCount(0) {}
VSCacheEntry() : shader(NULL) {}
void Destroy() void Destroy()
{ {
if (shader) if (shader)
@ -51,6 +51,7 @@ private:
static VSCache vshaders; static VSCache vshaders;
static const VSCacheEntry *last_entry; static const VSCacheEntry *last_entry;
static VERTEXSHADERUID last_uid;
static void Clear(); static void Clear();
public: public:

View File

@ -44,7 +44,8 @@ bool PixelShaderCache::s_displayCompileAlert;
GLuint PixelShaderCache::CurrentShader; GLuint PixelShaderCache::CurrentShader;
bool PixelShaderCache::ShaderEnabled; bool PixelShaderCache::ShaderEnabled;
static FRAGMENTSHADER* pShaderLast = NULL; PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry = NULL;
PIXELSHADERUID PixelShaderCache::last_uid;
GLuint PixelShaderCache::GetDepthMatrixProgram() GLuint PixelShaderCache::GetDepthMatrixProgram()
{ {
@ -61,10 +62,9 @@ void PixelShaderCache::Init()
glEnable(GL_FRAGMENT_PROGRAM_ARB); glEnable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = true; ShaderEnabled = true;
CurrentShader = 0; CurrentShader = 0;
last_entry = NULL;
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid));
s_displayCompileAlert = true; s_displayCompileAlert = true;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, (GLint *)&s_nMaxPixelInstructions); glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, (GLint *)&s_nMaxPixelInstructions);
@ -184,38 +184,43 @@ void PixelShaderCache::Shutdown()
FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{ {
PIXELSHADERUID uid; PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode); GetPixelShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set // Check if the shader is already set
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount) if (last_entry)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); if (uid == last_uid)
return pShaderLast; {
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(API_OPENGL, last_entry->safe_uid, last_entry->shader.strprog, dstAlphaMode, components);
return &last_entry->shader;
}
} }
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID)); last_uid = uid;
PSCache::iterator iter = PixelShaders.find(uid); PSCache::iterator iter = PixelShaders.find(uid);
if (iter != PixelShaders.end()) if (iter != PixelShaders.end())
{ {
iter->second.frameCount = frameCount;
PSCacheEntry &entry = iter->second; PSCacheEntry &entry = iter->second;
if (&entry.shader != pShaderLast) last_entry = &entry;
{
pShaderLast = &entry.shader;
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return pShaderLast; ValidatePixelShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, dstAlphaMode, components);
return &last_entry->shader;
} }
// Make an entry in the table // Make an entry in the table
PSCacheEntry& newentry = PixelShaders[uid]; PSCacheEntry& newentry = PixelShaders[uid];
newentry.frameCount = frameCount; last_entry = &newentry;
pShaderLast = &newentry.shader;
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components); const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);
if (g_ActiveConfig.bEnableShaderDebugging && code)
{
GetSafePixelShaderId(&newentry.safe_uid, dstAlphaMode, components);
newentry.shader.strprog = code;
}
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
static int counter = 0; static int counter = 0;
@ -234,7 +239,7 @@ FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 comp
INCSTAT(stats.numPixelShadersCreated); INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size()); SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return pShaderLast; return &last_entry->shader;
} }
bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram) bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram)
@ -318,9 +323,6 @@ bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpr
cgDestroyProgram(tempprog); cgDestroyProgram(tempprog);
#endif #endif
#if defined(_DEBUG) || defined(DEBUGFAST)
ps.strprog = pstrprogram;
#endif
return true; return true;
} }

View File

@ -39,9 +39,7 @@ struct FRAGMENTSHADER
} }
} }
GLuint glprogid; // opengl program id GLuint glprogid; // opengl program id
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string strprog; std::string strprog;
#endif
}; };
class PixelShaderCache class PixelShaderCache
@ -49,13 +47,13 @@ class PixelShaderCache
struct PSCacheEntry struct PSCacheEntry
{ {
FRAGMENTSHADER shader; FRAGMENTSHADER shader;
int frameCount; PSCacheEntry() {}
PSCacheEntry() : frameCount(0) {}
~PSCacheEntry() {} ~PSCacheEntry() {}
void Destroy() void Destroy()
{ {
shader.Destroy(); shader.Destroy();
} }
PIXELSHADERUIDSAFE safe_uid;
}; };
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache; typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
@ -67,6 +65,8 @@ class PixelShaderCache
static bool s_displayCompileAlert; static bool s_displayCompileAlert;
static GLuint CurrentShader; static GLuint CurrentShader;
static PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid;
static bool ShaderEnabled; static bool ShaderEnabled;

View File

@ -41,7 +41,9 @@ VertexShaderCache::VSCache VertexShaderCache::vshaders;
GLuint VertexShaderCache::CurrentShader; GLuint VertexShaderCache::CurrentShader;
bool VertexShaderCache::ShaderEnabled; bool VertexShaderCache::ShaderEnabled;
static VERTEXSHADER *pShaderLast = NULL; VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry = NULL;
VERTEXSHADERUID VertexShaderCache::last_uid;
static int s_nMaxVertexInstructions; static int s_nMaxVertexInstructions;
@ -50,7 +52,7 @@ void VertexShaderCache::Init()
glEnable(GL_VERTEX_PROGRAM_ARB); glEnable(GL_VERTEX_PROGRAM_ARB);
ShaderEnabled = true; ShaderEnabled = true;
CurrentShader = 0; CurrentShader = 0;
memset(&last_vertex_shader_uid, 0xFF, sizeof(last_vertex_shader_uid)); last_entry = NULL;
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions); glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&s_nMaxVertexInstructions);
if (strstr((const char*)glGetString(GL_VENDOR), "Humper") != NULL) s_nMaxVertexInstructions = 4096; if (strstr((const char*)glGetString(GL_VENDOR), "Humper") != NULL) s_nMaxVertexInstructions = 4096;
@ -74,31 +76,34 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components)
{ {
VERTEXSHADERUID uid; VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components); GetVertexShaderId(&uid, components);
if (uid == last_vertex_shader_uid && vshaders[uid].frameCount == frameCount) if (last_entry)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); if (uid == last_uid)
return pShaderLast; {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_OPENGL, vshaders[uid].safe_uid, vshaders[uid].shader.strprog, components);
return &last_entry->shader;
}
} }
memcpy(&last_vertex_shader_uid, &uid, sizeof(VERTEXSHADERUID));
last_uid = uid;
VSCache::iterator iter = vshaders.find(uid); VSCache::iterator iter = vshaders.find(uid);
if (iter != vshaders.end()) if (iter != vshaders.end())
{ {
iter->second.frameCount = frameCount;
VSCacheEntry &entry = iter->second; VSCacheEntry &entry = iter->second;
if (&entry.shader != pShaderLast) { last_entry = &entry;
pShaderLast = &entry.shader;
}
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return pShaderLast; ValidateVertexShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, components);
return &last_entry->shader;
} }
// Make an entry in the table // Make an entry in the table
VSCacheEntry& entry = vshaders[uid]; VSCacheEntry& entry = vshaders[uid];
entry.frameCount = frameCount; last_entry = &entry;
pShaderLast = &entry.shader;
const char *code = GenerateVertexShaderCode(components, API_OPENGL); const char *code = GenerateVertexShaderCode(components, API_OPENGL);
GetSafeVertexShaderId(&entry.safe_uid, components);
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
@ -118,7 +123,7 @@ VERTEXSHADER* VertexShaderCache::SetShader(u32 components)
INCSTAT(stats.numVertexShadersCreated); INCSTAT(stats.numVertexShadersCreated);
SETSTAT(stats.numVertexShadersAlive, vshaders.size()); SETSTAT(stats.numVertexShadersAlive, vshaders.size());
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return pShaderLast; return &last_entry->shader;
} }
bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram) bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrprogram)
@ -182,9 +187,8 @@ bool VertexShaderCache::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpr
cgDestroyProgram(tempprog); cgDestroyProgram(tempprog);
#endif #endif
#if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.bEnableShaderDebugging)
vs.strprog = pstrprogram; vs.strprog = pstrprogram;
#endif
return true; return true;
} }

View File

@ -32,9 +32,7 @@ struct VERTEXSHADER
VERTEXSHADER() : glprogid(0) {} VERTEXSHADER() : glprogid(0) {}
GLuint glprogid; // opengl program id GLuint glprogid; // opengl program id
#if defined(_DEBUG) || defined(DEBUGFAST)
std::string strprog; std::string strprog;
#endif
}; };
class VertexShaderCache class VertexShaderCache
@ -42,8 +40,8 @@ class VertexShaderCache
struct VSCacheEntry struct VSCacheEntry
{ {
VERTEXSHADER shader; VERTEXSHADER shader;
int frameCount; VERTEXSHADERUIDSAFE safe_uid;
VSCacheEntry() : frameCount(0) {} VSCacheEntry() {}
void Destroy() { void Destroy() {
// printf("Destroying vs %i\n", shader.glprogid); // printf("Destroying vs %i\n", shader.glprogid);
glDeleteProgramsARB(1, &shader.glprogid); glDeleteProgramsARB(1, &shader.glprogid);
@ -55,6 +53,9 @@ class VertexShaderCache
static VSCache vshaders; static VSCache vshaders;
static VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid;
static GLuint CurrentShader; static GLuint CurrentShader;
static bool ShaderEnabled; static bool ShaderEnabled;