Merge branch 'shader-uids-awesome'.

Replaces the old, hardcoded shader ID generator with a semi-automatic mechanism that generates IDs from hints in the code generator.

Also introduces a flexible framework to do all kinds of funky stuff with the shader code generation logic. As an example, a uniform usage profile generation class is added (unused for now, though).

Functionality can still be tested by setting the EnableShaderDebugging field in the gfx config to True. Any two shaders which are identified with the same ID will be written to a file and an error message will be written to the Dolphin log.
This commit is contained in:
NeoBrainX 2013-06-17 13:27:22 +02:00
commit 88bc8255b8
28 changed files with 1630 additions and 1401 deletions

View File

@ -144,6 +144,11 @@
#define TEVALPHAARG_KONST 6 #define TEVALPHAARG_KONST 6
#define TEVALPHAARG_ZERO 7 #define TEVALPHAARG_ZERO 7
#define GX_TEVPREV 0
#define GX_TEVREG0 1
#define GX_TEVREG1 2
#define GX_TEVREG2 3
#define ALPHACMP_NEVER 0 #define ALPHACMP_NEVER 0
#define ALPHACMP_LESS 1 #define ALPHACMP_LESS 1
#define ALPHACMP_EQUAL 2 #define ALPHACMP_EQUAL 2

View File

@ -89,21 +89,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path)
if (!useDstAlpha) if (!useDstAlpha)
{ {
output = "Destination alpha disabled:\n"; output = "Destination alpha disabled:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); /// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
} }
else else
{ {
if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend) if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{ {
output = "Using dual source blending for destination alpha:\n"; output = "Using dual source blending for destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); /// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
} }
else else
{ {
output = "Using two passes for emulating destination alpha:\n"; output = "Using two passes for emulating destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); /// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n"; output += "\n\nDestination alpha pass shader:\n";
output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); /// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
} }
} }
@ -117,7 +117,7 @@ void GFXDebuggerBase::DumpVertexShader(const char* path)
sprintf(filename, "%sdump_vs.txt", path); sprintf(filename, "%sdump_vs.txt", path);
File::CreateEmptyFile(filename); File::CreateEmptyFile(filename);
File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename); /// File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename);
} }
void GFXDebuggerBase::DumpPixelShaderConstants(const char* path) void GFXDebuggerBase::DumpPixelShaderConstants(const char* path)

View File

@ -5,217 +5,3 @@
#include "LightingShaderGen.h" #include "LightingShaderGen.h"
#include "NativeVertexFormat.h" #include "NativeVertexFormat.h"
#include "XFMemory.h" #include "XFMemory.h"
#define WRITE p+=sprintf
int GetLightingShaderId(u32* out)
{
for (u32 i = 0; i < xfregs.numChan.numColorChans; ++i)
{
out[i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
out[i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
_assert_(xfregs.numChan.numColorChans <= 2);
return xfregs.numChan.numColorChans;
}
// coloralpha - 1 if color, 2 if alpha
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha)
{
const char* swizzle = "xyzw";
if (coloralpha == 1 )
swizzle = "xyz";
else if (coloralpha == 2 )
swizzle = "w";
if (!(chan.attnfunc & 1))
{
// attenuation disabled
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
WRITE(p, "lacc.%s += %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle);
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
WRITE(p, "ldir = normalize(%s[%d + 3].xyz - pos.xyz);\n", lightsName, index * 5);
WRITE(p, "lacc.%s += %sdot(ldir, _norm0)) * %s[%d].%s;\n",
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index * 5, swizzle);
break;
default: _assert_(0);
}
}
else // spec and spot
{
if (chan.attnfunc == 3)
{ // spot
WRITE(p, "ldir = %s[%d + 3].xyz - pos.xyz;\n", lightsName, index * 5);
WRITE(p, "dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, %s[%d + 4].xyz));\n", lightsName, index * 5);
WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1.0f, attn, attn*attn))) / dot(%s[%d + 2].xyz, float3(1.0f,dist,dist2));\n", lightsName, index * 5, lightsName, index * 5);
}
else if (chan.attnfunc == 1)
{ // specular
WRITE(p, "ldir = normalize(%s[%d + 3].xyz);\n", lightsName, index * 5);
WRITE(p, "attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s[%d + 4].xyz)) : 0.0f;\n", lightsName, index * 5);
WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1,attn,attn*attn))) / dot(%s[%d + 2].xyz, float3(1,attn,attn*attn));\n", lightsName, index * 5, lightsName, index * 5);
}
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
WRITE(p, "lacc.%s += attn * %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle);
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
WRITE(p, "lacc.%s += attn * %sdot(ldir, _norm0)) * %s[%d].%s;\n",
swizzle,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(",
lightsName,
index * 5,
swizzle);
break;
default: _assert_(0);
}
}
WRITE(p, "\n");
return p;
}
// vertex shader
// lights/colors
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++)
{
const LitChannel& color = xfregs.color[j];
const LitChannel& alpha = xfregs.alpha[j];
WRITE(p, "{\n");
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
WRITE(p, "mat = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0)
WRITE(p, "mat = %s0;\n", inColorName);
else
WRITE(p, "mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
else // from color
{
WRITE(p, "mat = %s[%d];\n", materialsName, j+2);
}
if (color.enablelighting)
{
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "lacc = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
WRITE(p, "lacc = %s0;\n", inColorName);
else
WRITE(p, "lacc = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
}
else // from color
{
WRITE(p, "lacc = %s[%d];\n", materialsName, j);
}
}
else
{
WRITE(p, "lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// check if alpha is different
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
WRITE(p, "mat.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0)
WRITE(p, "mat.w = %s0.w;\n", inColorName);
else WRITE(p, "mat.w = 1.0f;\n");
}
else // from color
{
WRITE(p, "mat.w = %s[%d].w;\n", materialsName, j+2);
}
}
if (alpha.enablelighting)
{
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "lacc.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
WRITE(p, "lacc.w = %s0.w;\n", inColorName);
else
WRITE(p, "lacc.w = 0.0f;\n");
}
else // from color
{
WRITE(p, "lacc.w = %s[%d].w;\n", materialsName, j);
}
}
else
{
WRITE(p, "lacc.w = 1.0f;\n");
}
if(color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
if(color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if(mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1<<i))
p = GenerateLightShader(p, i, color, lightsName, 3);
}
}
}
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
p = GenerateLightShader(p, i, color, lightsName, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
p = GenerateLightShader(p, i, alpha, lightsName, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
int coloralpha = color.enablelighting ? 1 : 2;
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
p = GenerateLightShader(p, i, workingchannel, lightsName, coloralpha);
}
}
WRITE(p, "%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
WRITE(p, "}\n");
}
return p;
}

View File

@ -5,9 +5,255 @@
#ifndef _LIGHTINGSHADERGEN_H_ #ifndef _LIGHTINGSHADERGEN_H_
#define _LIGHTINGSHADERGEN_H_ #define _LIGHTINGSHADERGEN_H_
#include "CommonTypes.h" #include "ShaderGenCommon.h"
#include "NativeVertexFormat.h"
#include "XFMemory.h"
int GetLightingShaderId(u32* out); static const char* LightCol(const char* lightsName, unsigned int index, const char* swizzle)
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest); {
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d].%s", lightsName, index, swizzle);
return result;
}
static const char* LightCosAtt(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+1]", lightsName, index);
return result;
}
static const char* LightDistAtt(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+2]", lightsName, index);
return result;
}
static const char* LightPos(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+3]", lightsName, index);
return result;
}
static const char* LightDir(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+4]", lightsName, index);
return result;
}
template<class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha)
{
const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index];
const char* swizzle = "xyzw";
if (coloralpha == 1)
swizzle = "xyz";
else if (coloralpha == 2)
swizzle = "w";
uid_data.attnfunc |= chan.attnfunc << (2*litchan_index);
uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index);
if (!(chan.attnfunc & 1))
{
// atten disabled
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += %s;\n", swizzle, LightCol(lightsName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(lightsName, index));
object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s;\n",
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LightCol(lightsName, index, swizzle));
break;
default: _assert_(0);
}
}
else // spec and spot
{
if (chan.attnfunc == 3)
{ // spot
object.Write("ldir = %s.xyz - pos.xyz;\n", LightPos(lightsName, index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, %s.xyz));\n", LightDir(lightsName, index));
object.Write("attn = max(0.0f, dot(%s.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.xyz, float3(1.0f,dist,dist2));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index));
}
else if (chan.attnfunc == 1)
{ // specular
object.Write("ldir = normalize(%s.xyz);\n", LightPos(lightsName, index));
object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.xyz)) : 0.0f;\n", LightDir(lightsName, index));
object.Write("attn = max(0.0f, dot(%s.xyz, float3(1,attn,attn*attn))) / dot(%s.xyz, float3(1,attn,attn*attn));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index));
}
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += attn * %s;\n", swizzle, LightCol(lightsName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s;\n",
swizzle,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(",
LightCol(lightsName, index, swizzle));
break;
default: _assert_(0);
}
}
object.Write("\n");
}
// vertex shader
// lights/colors
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
template<class T>
static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++)
{
const LitChannel& color = xfregs.color[j];
const LitChannel& alpha = xfregs.alpha[j];
object.Write("{\n");
uid_data.matsource |= xfregs.color[j].matsource << j;
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("mat = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat = %s0;\n", inColorName);
else
object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
else // from color
{
object.Write("mat = %s[%d];\n", materialsName, j+2);
}
uid_data.enablelighting |= xfregs.color[j].enablelighting << j;
if (color.enablelighting)
{
uid_data.ambsource |= xfregs.color[j].ambsource << j;
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc = %s0;\n", inColorName);
else
object.Write("lacc = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
}
else // from color
{
object.Write("lacc = %s[%d];\n", materialsName, j);
}
}
else
{
object.Write("lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// check if alpha is different
uid_data.matsource |= xfregs.alpha[j].matsource << (j+2);
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
object.Write("mat.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat.w = %s0.w;\n", inColorName);
else object.Write("mat.w = 1.0f;\n");
}
else // from color
{
object.Write("mat.w = %s[%d].w;\n", materialsName, j+2);
}
}
uid_data.enablelighting |= xfregs.alpha[j].enablelighting << (j+2);
if (alpha.enablelighting)
{
uid_data.ambsource |= xfregs.alpha[j].ambsource << (j+2);
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc.w = %s0.w;\n", inColorName);
else
object.Write("lacc.w = 0.0f;\n");
}
else // from color
{
object.Write("lacc.w = %s[%d].w;\n", materialsName, j);
}
}
else
{
object.Write("lacc.w = 1.0f;\n");
}
if(color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
uid_data.attnfunc |= color.attnfunc << (2*j);
uid_data.attnfunc |= alpha.attnfunc << (2*(j+2));
uid_data.diffusefunc |= color.diffusefunc << (2*j);
uid_data.diffusefunc |= alpha.diffusefunc << (2*(j+2));
uid_data.light_mask |= color.GetFullLightMask() << (8*j);
uid_data.light_mask |= alpha.GetFullLightMask() << (8*(j+2));
if(color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if(mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1<<i))
{
GenerateLightShader<T>(object, uid_data, i, j, lightsName, 3);
}
}
}
}
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j, lightsName, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j+2, lightsName, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
const int lit_index = color.enablelighting ? j : (j+2);
int coloralpha = color.enablelighting ? 1 : 2;
uid_data.light_mask |= workingchannel.GetFullLightMask() << (8*lit_index);
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
GenerateLightShader<T>(object, uid_data, i, lit_index, lightsName, coloralpha);
}
}
object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
object.Write("}\n");
}
}
#endif // _LIGHTINGSHADERGEN_H_ #endif // _LIGHTINGSHADERGEN_H_

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,8 @@
#define GCOGL_PIXELSHADER_H #define GCOGL_PIXELSHADER_H
#include "VideoCommon.h" #include "VideoCommon.h"
#include "ShaderGenCommon.h"
#include "BPMemory.h"
#define I_COLORS "color" #define I_COLORS "color"
#define I_KCOLORS "k" #define I_KCOLORS "k"
@ -31,8 +33,14 @@
#define C_PLIGHTS (C_FOG + 3) #define C_PLIGHTS (C_FOG + 3)
#define C_PMATERIALS (C_PLIGHTS + 40) #define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4) #define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 116 // Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
{
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
// Annoying sure, can be removed once we get up to GLSL ~1.3 // Annoying sure, can be removed once we get up to GLSL ~1.3
const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 }, const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
@ -47,90 +55,135 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
{I_PMATERIALS, C_PMATERIALS, 4 }, {I_PMATERIALS, C_PMATERIALS, 4 },
}; };
// DO NOT make anything in this class virtual. // TODO: Should compact packing be enabled?
template<bool safe> //#pragma pack(4)
class _PIXELSHADERUID struct pixel_shader_uid_data
{ {
public: // TODO: Optimize field order for easy access!
u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
int num_values;
_PIXELSHADERUID() u32 components;
u32 dstAlphaMode : 2;
u32 Pretest : 2;
u32 genMode_numtexgens : 4;
u32 genMode_numtevstages : 4;
u32 genMode_numindstages : 3;
u32 nIndirectStagesUsed : 8;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;
u32 tevindref_bc0 : 3;
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;
inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{ {
if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bc1 = texcoord; tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bc3 = texcoord; tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bc4 = texcoord; tevindref_bi4 = texmap; }
}
inline void SetTevindrefTexmap(int index, u32 texmap)
{
if (index == 0) { tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bi4 = texmap; }
} }
_PIXELSHADERUID(const _PIXELSHADERUID& r) u64 tevorders_n_texcoord : 48; // 16 x 3 bits
{
num_values = r.num_values;
if (safe) u64 tevind_n_sw : 48; // 16 x 3 bits
memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE); u64 tevind_n_tw : 48; // 16 x 3 bits
else u32 tevind_n_fb_addprev : 16; // 16 x 1 bit
memcpy(values, r.values, r.GetNumValues() * sizeof(values[0])); u32 tevind_n_bs : 32; // 16 x 2 bits
u32 tevind_n_fmt : 32; // 16 x 2 bits
u32 tevind_n_bt : 32; // 16 x 2 bits
u64 tevind_n_bias : 48; // 16 x 3 bits
u64 tevind_n_mid : 64; // 16 x 4 bits
// NOTE: These assume that the affected bits are zero before calling
void Set_tevind_sw(int index, u64 val)
{
tevind_n_sw |= val << (3*index);
}
void Set_tevind_tw(int index, u64 val)
{
tevind_n_tw |= val << (3*index);
}
void Set_tevind_bias(int index, u64 val)
{
tevind_n_bias |= val << (3*index);
}
void Set_tevind_mid(int index, u64 val)
{
tevind_n_mid |= val << (4*index);
} }
int GetNumValues() const u32 tevksel_n_swap1 : 16; // 8x2 bits
{ u32 tevksel_n_swap2 : 16; // 8x2 bits
if (safe) u64 tevksel_n_kcsel0 : 40; // 8x5 bits
return (sizeof(values) / sizeof(u32)); u64 tevksel_n_kasel0 : 40; // 8x5 bits
else u64 tevksel_n_kcsel1 : 40; // 8x5 bits
return num_values; u64 tevksel_n_kasel1 : 40; // 8x5 bits
} void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); }
void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); }
bool operator <(const _PIXELSHADERUID& _Right) const u64 cc_n_d : 64; // 16x4 bits
{ u64 cc_n_c : 64; // 16x4 bits
int N = GetNumValues(); u64 cc_n_b : 64; // 16x4 bits
u64 cc_n_a : 64; // 16x4 bits
u32 cc_n_bias : 32; // 16x2 bits
u32 cc_n_op : 16; // 16x1 bit
u32 cc_n_clamp : 16; // 16x1 bit
u32 cc_n_shift : 32; // 16x2 bits
u32 cc_n_dest : 32; // 16x2 bits
if (N < _Right.GetNumValues()) u32 ac_n_rswap : 32; // 16x2 bits
return true; u32 ac_n_tswap : 32; // 16x2 bits
else if (N > _Right.GetNumValues()) u64 ac_n_d : 48; // 16x3 bits
return false; u64 ac_n_c : 48; // 16x3 bits
u64 ac_n_b : 48; // 16x3 bits
u64 ac_n_a : 48; // 16x3 bits
u32 ac_n_bias : 32; // 16x2 bits
u32 ac_n_op : 16; // 16x1 bit
u32 ac_n_clamp : 16; // 16x1 bit
u32 ac_n_shift : 32; // 16x2 bits
u32 ac_n_dest : 32; // 16x2 bits
for (int i = 0; i < N; ++i) u32 alpha_test_comp0 : 3;
{ u32 alpha_test_comp1 : 3;
if (values[i] < _Right.values[i]) u32 alpha_test_logic : 2;
return true; u32 alpha_test_use_zcomploc_hack : 1;
else if (values[i] > _Right.values[i])
return false;
}
return false; u32 fog_proj : 1;
} u32 fog_fsel : 3;
u32 fog_RangeBaseEnabled : 1;
bool operator ==(const _PIXELSHADERUID& _Right) const u32 ztex_op : 2;
{
int N = GetNumValues();
if (N != _Right.GetNumValues()) u32 fast_depth_calc : 1;
return false; u32 per_pixel_depth : 1;
u32 bHasIndStage : 16;
for (int i = 0; i < N; ++i) u32 xfregs_numTexGen_numTexGens : 4;
{
if (values[i] != _Right.values[i])
return false;
}
return true; // TODO: I think we're fine without an enablePixelLighting field, should probably double check, though..
} LightingUidData lighting;
}; };
//#pragma pack()
typedef _PIXELSHADERUID<false> PIXELSHADERUID; typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
typedef _PIXELSHADERUID<true> PIXELSHADERUIDSAFE; typedef ShaderCode PixelShaderCode; // TODO: Obsolete
typedef ShaderConstantProfile PixelShaderConstantProfile; // TODO: Obsolete
// Different ways to achieve rendering with destination alpha void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
enum DSTALPHA_MODE void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
{ void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components);
#endif // GCOGL_PIXELSHADER_H #endif // GCOGL_PIXELSHADER_H

View File

@ -29,19 +29,45 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30
static u32 lastZBias; static u32 lastZBias;
static int nMaterialsChanged; static int nMaterialsChanged;
static float s_constant_cache[C_PENVCONST_END*4];
inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{ {
// if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 &&
// s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4)
// return;
g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4); g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4);
s_constant_cache[const_number*4] = f1;
s_constant_cache[const_number*4+1] = f2;
s_constant_cache[const_number*4+2] = f3;
s_constant_cache[const_number*4+3] = f4;
} }
inline void SetPSConstant4fv(unsigned int const_number, const float *f) inline void SetPSConstant4fv(unsigned int const_number, const float *f)
{ {
// if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] &&
// s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3])
// return;
g_renderer->SetPSConstant4fv(const_number, f); g_renderer->SetPSConstant4fv(const_number, f);
s_constant_cache[const_number*4] = f[0];
s_constant_cache[const_number*4+1] = f[1];
s_constant_cache[const_number*4+2] = f[2];
s_constant_cache[const_number*4+3] = f[3];
} }
inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f) inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{ {
// for (unsigned int i = 0; i < 4*count; ++i)
// if (s_constant_cache[const_number*4+i] != f[i])
// break;
// else if (i == 4*count-1)
// return;
g_renderer->SetMultiPSConstant4fv(const_number, count, f); g_renderer->SetMultiPSConstant4fv(const_number, count, f);
for (unsigned int i = 0; i < 4*count; ++i)
s_constant_cache[const_number*4+i] = f[i];
} }
void PixelShaderManager::Init() void PixelShaderManager::Init()
@ -50,6 +76,7 @@ void PixelShaderManager::Init()
memset(lastTexDims, 0, sizeof(lastTexDims)); memset(lastTexDims, 0, sizeof(lastTexDims));
lastZBias = 0; lastZBias = 0;
memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); memset(lastRGBAfull, 0, sizeof(lastRGBAfull));
memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side....
Dirty(); Dirty();
} }
@ -70,11 +97,24 @@ void PixelShaderManager::Shutdown()
} }
void PixelShaderManager::SetConstants() void PixelShaderManager::SetConstants(u32 components)
{ {
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO) if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
Dirty(); Dirty();
// TODO: Probably broken in the non-UBO path
PixelShaderConstantProfile constant_profile(C_PENVCONST_END);
/// TODO: dst alpha/api/components type parameter...
GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components);
static int saved_updates = 0;
static int necessary_updates = 0;
// TODO: Remove this!
#define IncStuff() { \
saved_updates++; \
/*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ }
for (int i = 0; i < 2; ++i) for (int i = 0; i < 2; ++i)
{ {
if (s_nColorsChanged[i]) if (s_nColorsChanged[i])
@ -82,10 +122,13 @@ void PixelShaderManager::SetConstants()
int baseind = i ? C_KCOLORS : C_COLORS; int baseind = i ? C_KCOLORS : C_COLORS;
for (int j = 0; j < 4; ++j) for (int j = 0; j < 4; ++j)
{ {
if (s_nColorsChanged[i] & (1 << j)) if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j))
{
SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]); SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]);
s_nColorsChanged[i] &= ~(1<<j);
++necessary_updates;
} else if ((s_nColorsChanged[i] & (1 << j))) IncStuff();
} }
s_nColorsChanged[i] = 0;
} }
} }
@ -93,19 +136,23 @@ void PixelShaderManager::SetConstants()
{ {
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
if (s_nTexDimsChanged & (1<<i)) if ((s_nTexDimsChanged & (1<<i)) && constant_profile.ConstantIsUsed(C_TEXDIMS+i))
{
++necessary_updates;
SetPSTextureDims(i); SetPSTextureDims(i);
s_nTexDimsChanged &= ~(1<<i);
}else if (s_nTexDimsChanged & (1<<i)) IncStuff();
} }
s_nTexDimsChanged = 0;
} }
if (s_bAlphaChanged) if (s_bAlphaChanged && constant_profile.ConstantIsUsed(C_ALPHA))
{ {
++necessary_updates;
SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f);
s_bAlphaChanged = false; s_bAlphaChanged = false;
} } else if (s_bAlphaChanged) IncStuff();
if (s_bZTextureTypeChanged) if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS))
{ {
float ftemp[4]; float ftemp[4];
switch (bpmem.ztex2.type) switch (bpmem.ztex2.type)
@ -123,11 +170,12 @@ void PixelShaderManager::SetConstants()
ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0; ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0;
break; break;
} }
++necessary_updates;
SetPSConstant4fv(C_ZBIAS, ftemp); SetPSConstant4fv(C_ZBIAS, ftemp);
s_bZTextureTypeChanged = false; s_bZTextureTypeChanged = false;
} } else if (s_bZTextureTypeChanged) IncStuff();
if (s_bZBiasChanged || s_bDepthRangeChanged) if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1))
{ {
// reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz)
// [0] = width/2 // [0] = width/2
@ -138,9 +186,10 @@ void PixelShaderManager::SetConstants()
// [5] = 16777215 * farz // [5] = 16777215 * farz
//ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias);
++necessary_updates;
SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f); SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f);
s_bZBiasChanged = s_bDepthRangeChanged = false; s_bZBiasChanged = s_bDepthRangeChanged = false;
} }else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff();
// indirect incoming texture scales // indirect incoming texture scales
if (s_nIndTexScaleChanged) if (s_nIndTexScaleChanged)
@ -148,7 +197,7 @@ void PixelShaderManager::SetConstants()
// set as two sets of vec4s, each containing S and T of two ind stages. // set as two sets of vec4s, each containing S and T of two ind stages.
float f[8]; float f[8];
if (s_nIndTexScaleChanged & 0x03) if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE))
{ {
for (u32 i = 0; i < 2; ++i) for (u32 i = 0; i < 2; ++i)
{ {
@ -156,10 +205,13 @@ void PixelShaderManager::SetConstants()
f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1); f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
} }
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE, f); SetPSConstant4fv(C_INDTEXSCALE, f);
s_nIndTexScaleChanged &= ~0x03;
} }
else if ((s_nIndTexScaleChanged & 0x03)) IncStuff();
if (s_nIndTexScaleChanged & 0x0c) if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1))
{ {
for (u32 i = 2; i < 4; ++i) for (u32 i = 2; i < 4; ++i)
{ {
@ -167,17 +219,18 @@ void PixelShaderManager::SetConstants()
f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1); f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]); PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
} }
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]); SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]);
s_nIndTexScaleChanged &= ~0x0c;
} }
else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff();
s_nIndTexScaleChanged = 0;
} }
if (s_nIndTexMtxChanged) if (s_nIndTexMtxChanged)
{ {
for (int i = 0; i < 3; ++i) for (int i = 0; i < 3; ++i)
{ {
if (s_nIndTexMtxChanged & (1 << i)) if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1)))
{ {
int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) | int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
((u32)bpmem.indmtx[i].col1.s1 << 2) | ((u32)bpmem.indmtx[i].col1.s1 << 2) |
@ -187,6 +240,8 @@ void PixelShaderManager::SetConstants()
// xyz - static matrix // xyz - static matrix
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better // TODO w - dynamic matrix scale / 256...... somehow / 4 works better
// rev 2972 - now using / 256.... verify that this works // rev 2972 - now using / 256.... verify that this works
++necessary_updates;
++necessary_updates;
SetPSConstant4f(C_INDTEXMTX + 2 * i, SetPSConstant4f(C_INDTEXMTX + 2 * i,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col0.ma * fscale,
bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col1.mc * fscale,
@ -202,19 +257,22 @@ void PixelShaderManager::SetConstants()
i, 1024.0f*fscale, i, 1024.0f*fscale,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale,
bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale); bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);
s_nIndTexMtxChanged &= ~(1 << i);
}else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();}
} }
} }
s_nIndTexMtxChanged = 0;
}
if (s_bFogColorChanged) if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG))
{ {
++necessary_updates;
SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0); SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0);
s_bFogColorChanged = false; s_bFogColorChanged = false;
} }else if (s_bFogColorChanged) IncStuff();
if (s_bFogParamChanged) if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1))
{ {
++necessary_updates;
if(!g_ActiveConfig.bDisableFog) if(!g_ActiveConfig.bDisableFog)
{ {
//downscale magnitude to 0.24 bits //downscale magnitude to 0.24 bits
@ -227,10 +285,11 @@ void PixelShaderManager::SetConstants()
SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0); SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0);
s_bFogParamChanged = false; s_bFogParamChanged = false;
} }else if ( s_bFogParamChanged) IncStuff();
if (s_bFogRangeAdjustChanged) if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2))
{ {
++necessary_updates;
if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1) if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
{ {
//bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342; //bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342;
@ -251,8 +310,9 @@ void PixelShaderManager::SetConstants()
} }
s_bFogRangeAdjustChanged = false; s_bFogRangeAdjustChanged = false;
} }else if ( s_bFogRangeAdjustChanged) IncStuff();
// TODO: use constant profile here!
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f
{ {
if (nLightsChanged[0] >= 0) if (nLightsChanged[0] >= 0)
@ -349,8 +409,10 @@ void PixelShaderManager::SetPSTextureDims(int texid)
SetPSConstant4fv(C_TEXDIMS + texid, fdims); SetPSConstant4fv(C_TEXDIMS + texid, fdims);
} }
// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value // This one is high in profiles (0.5%).
// TODO: Move conversion out, only store the raw color value
// and update it when the shader constant is set, only. // and update it when the shader constant is set, only.
// TODO: Conversion should be checked in the context of tev_fixes..
void PixelShaderManager::SetColorChanged(int type, int num, bool high) void PixelShaderManager::SetColorChanged(int type, int num, bool high)
{ {
float *pf = &lastRGBAfull[type][num][0]; float *pf = &lastRGBAfull[type][num][0];

View File

@ -21,7 +21,7 @@ public:
static void Shutdown(); static void Shutdown();
static void DoState(PointerWrap &p); static void DoState(PointerWrap &p);
static void SetConstants(); // sets pixel shader constants static void SetConstants(u32 components); // sets pixel shader constants
// constant management, should be called after memory is committed // constant management, should be called after memory is committed
static void SetColorChanged(int type, int index, bool high); static void SetColorChanged(int type, int index, bool high);

View File

@ -0,0 +1,286 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _SHADERGENCOMMON_H
#define _SHADERGENCOMMON_H
#include <stdio.h>
#include <stdarg.h>
#include <string>
#include <vector>
#include <algorithm>
#include "CommonTypes.h"
#include "VideoCommon.h"
/**
* Common interface for classes that need to go through the shader generation path (GenerateVertexShader, GeneratePixelShader)
* In particular, this includes the shader code generator (ShaderCode).
* A different class (ShaderUid) can be used to uniquely identify each ShaderCode object.
* More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader constants are being used. This can be used to optimize buffer management.
* Each of the ShaderCode, ShaderUid and ShaderConstantProfile child classes only implement the subset of ShaderGeneratorInterface methods that are required for the specific tasks.
*/
class ShaderGeneratorInterface
{
public:
/*
* Used when the shader generator would write a piece of ShaderCode.
* Can be used like printf.
* @note In the ShaderCode implementation, this does indeed write the parameter string to an internal buffer. However, you're free to do whatever you like with the parameter.
*/
void Write(const char* fmt, ...) {}
/*
* Returns a read pointer to the internal buffer.
* @note When implementing this method in a child class, you likely want to return the argument of the last SetBuffer call here
* @note SetBuffer() should be called before using GetBuffer().
*/
const char* GetBuffer() { return NULL; }
/*
* Can be used to give the object a place to write to. This should be called before using Write().
* @param buffer pointer to a char buffer that the object can write to
*/
void SetBuffer(char* buffer) { }
/*
* Tells us that a specific constant range (including last_index) is being used by the shader
*/
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
/*
* Returns a pointer to an internally stored object of the uid_data type.
* @warning since most child classes use the default implementation you shouldn't access this directly without adding precautions against NULL access (e.g. via adding a dummy structure, cf. the vertex/pixel shader generators)
*/
template<class uid_data>
uid_data& GetUidData() { return *(uid_data*)NULL; }
};
/**
* Shader UID class used to uniquely identify the ShaderCode output written in the shader generator.
* uid_data can be any struct of parameters that uniquely identify each shader code output.
* Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint.
* Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw u32 values from a union.
*/
template<class uid_data>
class ShaderUid : public ShaderGeneratorInterface
{
public:
ShaderUid()
{
// TODO: Move to Shadergen => can be optimized out
memset(values, 0, sizeof(values));
}
bool operator == (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, sizeof(values)) == 0;
}
bool operator != (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, sizeof(values)) != 0;
}
// determines the storage order inside STL containers
bool operator < (const ShaderUid& obj) const
{
// TODO: Store last frame used and order by that? makes much more sense anyway...
for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i)
{
if (this->values[i] < obj.values[i])
return true;
else if (this->values[i] > obj.values[i])
return false;
}
return false;
}
template<class T>
inline T& GetUidData() { return data; }
const uid_data& GetUidData() const { return data; }
size_t GetUidDataSize() const { return sizeof(values); }
private:
union
{
uid_data data;
u32 values[sizeof(uid_data) / sizeof(u32)];
};
};
class ShaderCode : public ShaderGeneratorInterface
{
public:
ShaderCode() : buf(NULL), write_ptr(NULL)
{
}
void Write(const char* fmt, ...)
{
va_list arglist;
va_start(arglist, fmt);
write_ptr += vsprintf(write_ptr, fmt, arglist);
va_end(arglist);
}
const char* GetBuffer() { return buf; }
void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; }
private:
const char* buf;
char* write_ptr;
};
/**
* Generates a shader constant profile which can be used to query which constants are used in a shader
*/
class ShaderConstantProfile : public ShaderGeneratorInterface
{
public:
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index+1; ++i)
constant_usage[i] = true;
}
inline bool ConstantIsUsed(unsigned int index)
{
// TODO: Not ready for usage yet
return true;
// return constant_usage[index];
}
private:
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
};
template<class T>
static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num)
{
if (ApiType == API_OPENGL)
return; // Nothing to do here
object.Write(" : register(%s%d)", prefix, num);
}
template<class T>
static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos)
{
if (using_ubos)
return;
object.Write("uniform ");
}
template<class T>
static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name)
{
WriteLocation(object, api_type, using_ubos);
object.Write("%s %s ", type, name);
WriteRegister(object, api_type, "c", num);
object.Write(";\n");
}
#pragma pack(4)
/**
* Common uid data used for shader generators that use lighting calculations.
* Expected to be stored as a member called "lighting".
*/
struct LightingUidData
{
u32 matsource : 4; // 4x1 bit
u32 enablelighting : 4; // 4x1 bit
u32 ambsource : 4; // 4x1 bit
u32 diffusefunc : 8; // 4x2 bits
u32 attnfunc : 8; // 4x2 bits
u32 light_mask : 32; // 4x8 bits
};
#pragma pack()
/**
* Checks if there has been
*/
template<class UidT, class CodeT>
class UidChecker
{
public:
void Invalidate()
{
m_shaders.clear();
m_uids.clear();
}
void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix)
{
bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end();
if (!uid_is_indexed)
{
m_uids.push_back(new_uid);
m_shaders[new_uid] = new_code.GetBuffer();
}
else
{
// uid is already in the index => check if there's a shader with the same uid but different code
auto& old_code = m_shaders[new_uid];
if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0)
{
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
dump_prefix,
++num_failures);
// TODO: Should also dump uids
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << "Old shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code.GetBuffer();
file << "\n\nShader uid:\n";
for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i)
{
u32 value = ((u32*)&new_uid.GetUidData())[i];
if ((i % 4) == 0)
{
unsigned int last_value = (i+3 < new_uid.GetUidDataSize()-1) ? i+3 : new_uid.GetUidDataSize();
file << std::setfill(' ') << std::dec;
file << "Values " << std::setw(2) << i << " - " << last_value << ": ";
}
file << std::setw(8) << std::setfill('0') << std::hex << value << std::setw(1);
if ((i % 4) < 3)
file << ' ';
else
file << std::endl;
}
file.close();
ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, szTemp);
}
}
}
private:
std::map<UidT,std::string> m_shaders;
std::vector<UidT> m_uids;
};
#endif // _SHADERGENCOMMON_H

View File

@ -17,235 +17,134 @@
#include "VertexShaderGen.h" #include "VertexShaderGen.h"
#include "VideoConfig.h" #include "VideoConfig.h"
// Mash together all the inputs that contribute to the code of a generated vertex shader into static char text[16768];
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) template<class T>
static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1)
{ {
memset(uid->values, 0, sizeof(uid->values)); object.Write(" %s %s", type, name);
uid->values[0] = components | if (var_index != -1)
(xfregs.numTexGen.numTexGens << 23) | object.Write("%d", var_index);
(xfregs.numChan.numColorChans << 27) |
(xfregs.dualTexTrans.enabled << 29);
// TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here? if (api_type == API_OPENGL)
GetLightingShaderId(&uid->values[1]); object.Write(";\n");
else
uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
u32 *pcurvalue = &uid->values[3];
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
{ {
TexMtxInfo tinfo = xfregs.texMtxInfo[i]; if (semantic_index != -1)
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) object.Write(" : %s%d;\n", semantic, semantic_index);
tinfo.hex &= 0x7ff; else
if (tinfo.texgentype != XF_TEXGEN_REGULAR) object.Write(" : %s;\n", semantic);
tinfo.projection = 0;
u32 val = ((tinfo.hex >> 1) & 0x1ffff);
if (xfregs.dualTexTrans.enabled && tinfo.texgentype == XF_TEXGEN_REGULAR)
{
// rewrite normalization and post index
val |= ((u32)xfregs.postMtxInfo[i].index << 17) | ((u32)xfregs.postMtxInfo[i].normalize << 23);
}
switch (i & 3)
{
case 0: pcurvalue[0] |= val; break;
case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break;
case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break;
case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break;
}
} }
} }
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components) template<class T>
static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type)
{ {
// Just store all used registers here without caring whether we need all bits or less. object.Write("struct VS_OUTPUT {\n");
memset(uid->values, 0, sizeof(uid->values)); DefineVSOutputStructMember(object, api_type, "float4", "pos", -1, "POSITION");
u32* ptr = uid->values; DefineVSOutputStructMember(object, api_type, "float4", "colors_", 0, "COLOR", 0);
*ptr++ = components; DefineVSOutputStructMember(object, api_type, "float4", "colors_", 1, "COLOR", 1);
*ptr++ = xfregs.numTexGen.hex;
*ptr++ = xfregs.numChan.hex;
*ptr++ = xfregs.dualTexTrans.hex;
for (int i = 0; i < 2; ++i)
{
*ptr++ = xfregs.color[i].hex;
*ptr++ = xfregs.alpha[i].hex;
}
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
for (unsigned int i = 0; i < 8; ++i)
{
*ptr++ = xfregs.texMtxInfo[i].hex;
*ptr++ = xfregs.postMtxInfo[i].hex;
}
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
VERTEXSHADERUIDSAFE new_id;
GetSafeVertexShaderId(&new_id, components);
if (!(old_id == new_id))
{
std::string new_code(GenerateVertexShaderCode(components, api));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
static char text[16384];
#define WRITE p+=sprintf
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE ApiType)
{
// "centroid" attribute is only supported by D3D11
const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : "");
// GLSL makes this ugly
// TODO: Make pretty
WRITE(p, "struct VS_OUTPUT {\n");
WRITE(p, " %s float4 pos %s POSITION;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":");
WRITE(p, " %s float4 colors_0 %s COLOR0;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":");
WRITE(p, " %s float4 colors_1 %s COLOR1;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":");
if (xfregs.numTexGen.numTexGens < 7) if (xfregs.numTexGen.numTexGens < 7)
{ {
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, " %s float3 tex%d %s TEXCOORD%d;\n", optCentroid, i, ApiType == API_OPENGL ? ";//" : ":", i); DefineVSOutputStructMember(object, api_type, "float3", "tex", i, "TEXCOORD", i);
WRITE(p, " %s float4 clipPos %s TEXCOORD%d;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens); DefineVSOutputStructMember(object, api_type, "float4", "clipPos", -1, "TEXCOORD", xfregs.numTexGen.numTexGens);
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, " %s float4 Normal %s TEXCOORD%d;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens + 1); DefineVSOutputStructMember(object, api_type, "float4", "Normal", -1, "TEXCOORD", xfregs.numTexGen.numTexGens + 1);
} }
else else
{ {
// clip position is in w of first 4 texcoords // Store clip position in the w component of first 4 texcoords
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) bool ppl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
{ int num_texcoords = ppl ? 8 : xfregs.numTexGen.numTexGens;
for (int i = 0; i < 8; ++i) for (int i = 0; i < num_texcoords; ++i)
WRITE(p, " %s float4 tex%d %s TEXCOORD%d;\n", optCentroid, i, ApiType == API_OPENGL? ";//" : ":", i); DefineVSOutputStructMember(object, api_type, (ppl || i < 4) ? "float4" : "float3", "tex", i, "TEXCOORD", i);
} }
else object.Write("};\n");
{
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, " %s float%d tex%d %s TEXCOORD%d;\n", optCentroid, i < 4 ? 4 : 3 , i, ApiType == API_OPENGL ? ";//" : ":", i);
}
}
WRITE(p, "};\n");
return p;
} }
extern const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num); template<class T>
extern const char *WriteLocation(API_TYPE ApiType); static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
{ {
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
vertex_shader_uid_data dummy_data;
vertex_shader_uid_data& uid_data = (&out.template GetUidData<vertex_shader_uid_data>() != NULL)
? out.template GetUidData<vertex_shader_uid_data>() : dummy_data;
out.SetBuffer(text);
#ifndef ANDROID #ifndef ANDROID
locale_t locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation locale_t locale;
locale_t old_locale = uselocale(locale); // Apply the locale for this thread locale_t old_locale;
if (out.GetBuffer() != NULL)
{
locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation
old_locale = uselocale(locale); // Apply the locale for this thread
}
#endif #endif
text[sizeof(text) - 1] = 0x7C; // canary text[sizeof(text) - 1] = 0x7C; // canary
_assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens); _assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans); _assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans);
bool is_d3d = (ApiType & API_D3D9 || ApiType == API_D3D11); bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11);
u32 lightMask = 0;
if (xfregs.numChan.numColorChans > 0)
lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask();
if (xfregs.numChan.numColorChans > 1)
lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask();
char *p = text;
WRITE(p, "//Vertex Shader: comp:%x, \n", components);
// uniforms // uniforms
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
WRITE(p, "layout(std140) uniform VSBlock {\n"); out.Write("layout(std140) uniform VSBlock {\n");
WRITE(p, "%sfloat4 " I_POSNORMALMATRIX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSNORMALMATRIX)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]");
WRITE(p, "%sfloat4 " I_PROJECTION"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PROJECTION)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PROJECTION, "float4", I_PROJECTION"[4]");
WRITE(p, "%sfloat4 " I_MATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_MATERIALS)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_MATERIALS, "float4", I_MATERIALS"[4]");
WRITE(p, "%sfloat4 " I_LIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_LIGHTS)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_LIGHTS, "float4", I_LIGHTS"[40]");
WRITE(p, "%sfloat4 " I_TEXMATRICES"[24] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXMATRICES)); // also using tex matrices DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]");
WRITE(p, "%sfloat4 " I_TRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType),WriteRegister(ApiType, "c", C_TRANSFORMMATRICES)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]");
WRITE(p, "%sfloat4 " I_NORMALMATRICES"[32] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_NORMALMATRICES)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]");
WRITE(p, "%sfloat4 " I_POSTTRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSTTRANSFORMMATRICES)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]");
WRITE(p, "%sfloat4 " I_DEPTHPARAMS" %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_DEPTHPARAMS)); DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS);
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
WRITE(p, "};\n"); out.Write("};\n");
p = GenerateVSOutputStruct(p, components, ApiType); GenerateVSOutputStruct(out, components, api_type);
if(ApiType == API_OPENGL) uid_data.numTexGens = xfregs.numTexGen.numTexGens;
uid_data.components = components;
if(api_type == API_OPENGL)
{ {
WRITE(p, "ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); out.Write("ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
if (components & VB_HAS_POSMTXIDX) if (components & VB_HAS_POSMTXIDX)
WRITE(p, "ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); out.Write("ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
if (components & VB_HAS_NRM0) if (components & VB_HAS_NRM0)
WRITE(p, "ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); out.Write("ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
if (components & VB_HAS_NRM1) if (components & VB_HAS_NRM1)
WRITE(p, "ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); out.Write("ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
if (components & VB_HAS_NRM2) if (components & VB_HAS_NRM2)
WRITE(p, "ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); out.Write("ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
if (components & VB_HAS_COL0) if (components & VB_HAS_COL0)
WRITE(p, "ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); out.Write("ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
if (components & VB_HAS_COL1) if (components & VB_HAS_COL1)
WRITE(p, "ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); out.Write("ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i)); u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx) if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
WRITE(p, "ATTRIN float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i); out.Write("ATTRIN float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
} }
// Let's set up attributes // Let's set up attributes
if (xfregs.numTexGen.numTexGens < 7) if (xfregs.numTexGen.numTexGens < 7)
{ {
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
WRITE(p, "VARYOUT float3 uv%d_2;\n", i); out.Write("VARYOUT float3 uv%d_2;\n", i);
WRITE(p, "VARYOUT float4 clipPos_2;\n"); out.Write("VARYOUT float4 clipPos_2;\n");
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, "VARYOUT float4 Normal_2;\n"); out.Write("VARYOUT float4 Normal_2;\n");
} }
else else
{ {
@ -253,142 +152,140 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{ {
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
WRITE(p, "VARYOUT float4 uv%d_2;\n", i); out.Write("VARYOUT float4 uv%d_2;\n", i);
} }
else else
{ {
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, "VARYOUT float%d uv%d_2;\n", i < 4 ? 4 : 3 , i); out.Write("VARYOUT float%d uv%d_2;\n", i < 4 ? 4 : 3 , i);
} }
} }
WRITE(p, "VARYOUT float4 colors_02;\n"); out.Write("VARYOUT float4 colors_02;\n");
WRITE(p, "VARYOUT float4 colors_12;\n"); out.Write("VARYOUT float4 colors_12;\n");
WRITE(p, "void main()\n{\n"); out.Write("void main()\n{\n");
} }
else else
{ {
WRITE(p, "VS_OUTPUT main(\n"); out.Write("VS_OUTPUT main(\n");
// inputs // inputs
if (components & VB_HAS_NRM0) if (components & VB_HAS_NRM0)
WRITE(p, " float3 rawnorm0 : NORMAL0,\n"); out.Write(" float3 rawnorm0 : NORMAL0,\n");
if (components & VB_HAS_NRM1) if (components & VB_HAS_NRM1)
{ {
if (is_d3d) if (is_d3d)
WRITE(p, " float3 rawnorm1 : NORMAL1,\n"); out.Write(" float3 rawnorm1 : NORMAL1,\n");
else else
WRITE(p, " float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB); out.Write(" float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB);
} }
if (components & VB_HAS_NRM2) if (components & VB_HAS_NRM2)
{ {
if (is_d3d) if (is_d3d)
WRITE(p, " float3 rawnorm2 : NORMAL2,\n"); out.Write(" float3 rawnorm2 : NORMAL2,\n");
else else
WRITE(p, " float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB); out.Write(" float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB);
} }
if (components & VB_HAS_COL0) if (components & VB_HAS_COL0)
{ out.Write(" float4 color0 : COLOR0,\n");
WRITE(p, " float4 color0 : COLOR0,\n");
}
if (components & VB_HAS_COL1) if (components & VB_HAS_COL1)
{ out.Write(" float4 color1 : COLOR1,\n");
WRITE(p, " float4 color1 : COLOR1,\n");
}
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i)); u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx) if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
WRITE(p, " float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
} }
if (components & VB_HAS_POSMTXIDX) if (components & VB_HAS_POSMTXIDX)
{ {
if (is_d3d) if (is_d3d)
WRITE(p, " float4 blend_indices : BLENDINDICES,\n"); out.Write(" float4 blend_indices : BLENDINDICES,\n");
else else
WRITE(p, " float fposmtx : ATTR%d,\n", SHADER_POSMTX_ATTRIB); out.Write(" float fposmtx : ATTR%d,\n", SHADER_POSMTX_ATTRIB);
} }
WRITE(p, " float4 rawpos : POSITION) {\n"); out.Write(" float4 rawpos : POSITION) {\n");
} }
WRITE(p, "VS_OUTPUT o;\n"); out.Write("VS_OUTPUT o;\n");
// transforms // transforms
if (components & VB_HAS_POSMTXIDX) if (components & VB_HAS_POSMTXIDX)
{ {
if (ApiType & API_D3D9) if (api_type & API_D3D9)
{ {
WRITE(p, "int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n"); out.Write("int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n");
WRITE(p, "int posmtx = indices.x;\n"); out.Write("int posmtx = indices.x;\n");
} }
else if (ApiType == API_D3D11) else if (api_type == API_D3D11)
{ {
WRITE(p, "int posmtx = blend_indices.x * 255.0f;\n"); out.Write("int posmtx = blend_indices.x * 255.0f;\n");
} }
else else
{ {
WRITE(p, "int posmtx = int(fposmtx);\n"); out.Write("int posmtx = int(fposmtx);\n");
} }
if (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS)) if (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS))
{ {
// This'll cause issues, but it can't be helped // This'll cause issues, but it can't be helped
WRITE(p, "float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n"); out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL) if (components & VB_HAS_NRMALL)
WRITE(p, "float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n");
} }
else else
{ {
WRITE(p, "float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n"); out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL) { if (components & VB_HAS_NRMALL) {
WRITE(p, "int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n"); out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
WRITE(p, "float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n"); out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
} }
} }
if (components & VB_HAS_NRM0) if (components & VB_HAS_NRM0)
WRITE(p, "float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"); out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
if (components & VB_HAS_NRM1) if (components & VB_HAS_NRM1)
WRITE(p, "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"); out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
if (components & VB_HAS_NRM2) if (components & VB_HAS_NRM2)
WRITE(p, "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"); out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
} }
else else
{ {
WRITE(p, "float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n"); out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n");
if (components & VB_HAS_NRM0) if (components & VB_HAS_NRM0)
WRITE(p, "float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n"); out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n");
if (components & VB_HAS_NRM1) if (components & VB_HAS_NRM1)
WRITE(p, "float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n"); out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n");
if (components & VB_HAS_NRM2) if (components & VB_HAS_NRM2)
WRITE(p, "float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n"); out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n");
} }
if (!(components & VB_HAS_NRM0)) if (!(components & VB_HAS_NRM0))
WRITE(p, "float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n"); out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n");
WRITE(p, "o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
WRITE(p, "float4 mat, lacc;\n" out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
out.Write("float4 mat, lacc;\n"
"float3 ldir, h;\n" "float3 ldir, h;\n"
"float dist, dist2, attn;\n"); "float dist, dist2, attn;\n");
uid_data.numColorChans = xfregs.numChan.numColorChans;
if (xfregs.numChan.numColorChans == 0) if (xfregs.numChan.numColorChans == 0)
{ {
if (components & VB_HAS_COL0) if (components & VB_HAS_COL0)
WRITE(p, "o.colors_0 = color0;\n"); out.Write("o.colors_0 = color0;\n");
else else
WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n"); out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
} }
// TODO: This probably isn't necessary if pixel lighting is enabled. // TODO: This probably isn't necessary if pixel lighting is enabled.
p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); GenerateLightingShader<T>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
if (xfregs.numChan.numColorChans < 2) if (xfregs.numChan.numColorChans < 2)
{ {
if (components & VB_HAS_COL1) if (components & VB_HAS_COL1)
WRITE(p, "o.colors_1 = color1;\n"); out.Write("o.colors_1 = color1;\n");
else else
WRITE(p, "o.colors_1 = o.colors_0;\n"); out.Write("o.colors_1 = o.colors_0;\n");
} }
// special case if only pos and tex coord 0 and tex coord input is AB11 // special case if only pos and tex coord 0 and tex coord input is AB11
// donko - this has caused problems in some games. removed for now. // donko - this has caused problems in some games. removed for now.
@ -400,25 +297,25 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
*/ */
// transform texcoords // transform texcoords
WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
{ {
TexMtxInfo& texinfo = xfregs.texMtxInfo[i]; TexMtxInfo& texinfo = xfregs.texMtxInfo[i];
WRITE(p, "{\n"); out.Write("{\n");
WRITE(p, "coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n"); out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
uid_data.texMtxInfo[i].sourcerow = xfregs.texMtxInfo[i].sourcerow;
switch (texinfo.sourcerow) switch (texinfo.sourcerow)
{ {
case XF_SRCGEOM_INROW: case XF_SRCGEOM_INROW:
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = rawpos;\n"); // pos.w is 1 out.Write("coord = rawpos;\n"); // pos.w is 1
break; break;
case XF_SRCNORMAL_INROW: case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0) if (components & VB_HAS_NRM0)
{ {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n"); out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n");
} }
break; break;
case XF_SRCCOLORS_INROW: case XF_SRCCOLORS_INROW:
@ -428,24 +325,25 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (components & VB_HAS_NRM1) if (components & VB_HAS_NRM1)
{ {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n"); out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n");
} }
break; break;
case XF_SRCBINORMAL_B_INROW: case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2) if (components & VB_HAS_NRM2)
{ {
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 ); _assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n"); out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n");
} }
break; break;
default: default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW); _assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) ) if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) )
WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW); out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break; break;
} }
// first transformation // first transformation
uid_data.texMtxInfo[i].texgentype = xfregs.texMtxInfo[i].texgentype;
switch (texinfo.texgentype) switch (texinfo.texgentype)
{ {
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
@ -453,51 +351,57 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (components & (VB_HAS_NRM1|VB_HAS_NRM2)) if (components & (VB_HAS_NRM1|VB_HAS_NRM2))
{ {
// transform the light dir into tangent space // transform the light dir into tangent space
WRITE(p, "ldir = normalize(" I_LIGHTS"[5*%d + 3].xyz - pos.xyz);\n", texinfo.embosslightshift); uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift;
WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift); uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(I_LIGHTS, texinfo.embosslightshift));
out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift);
} }
else else
{ {
_assert_(0); // should have normals _assert_(0); // should have normals
WRITE(p, "o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift); uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift;
out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
} }
break; break;
case XF_TEXGEN_COLOR_STRGBC0: case XF_TEXGEN_COLOR_STRGBC0:
_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
WRITE(p, "o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i); out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break; break;
case XF_TEXGEN_COLOR_STRGBC1: case XF_TEXGEN_COLOR_STRGBC1:
_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW); _assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
WRITE(p, "o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i); out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break; break;
case XF_TEXGEN_REGULAR: case XF_TEXGEN_REGULAR:
default: default:
uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection;
if (components & (VB_HAS_TEXMTXIDX0<<i)) if (components & (VB_HAS_TEXMTXIDX0<<i))
{ {
WRITE(p, "int tmp = int(tex%d.z);\n", i); out.Write("int tmp = int(tex%d.z);\n", i);
if (texinfo.projection == XF_TEXPROJ_STQ) if (texinfo.projection == XF_TEXPROJ_STQ)
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i); out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
else else
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i); out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
} }
else else
{ {
if (texinfo.projection == XF_TEXPROJ_STQ) if (texinfo.projection == XF_TEXPROJ_STQ)
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2); out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2);
else else
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1); out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1);
} }
break; break;
} }
uid_data.dualTexTrans.enabled = xfregs.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types? // CHECKME: does this only work for regular tex gen types?
if (xfregs.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR) if (xfregs.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{ {
const PostMtxInfo& postInfo = xfregs.postMtxInfo[i]; const PostMtxInfo& postInfo = xfregs.postMtxInfo[i];
uid_data.postMtxInfo[i].index = xfregs.postMtxInfo[i].index;
int postidx = postInfo.index; int postidx = postInfo.index;
WRITE(p, "float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n" out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n" "float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n", "float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n",
postidx&0x3f, (postidx+1)&0x3f, (postidx+2)&0x3f); postidx&0x3f, (postidx+1)&0x3f, (postidx+2)&0x3f);
@ -509,76 +413,77 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
// q of output is unknown // q of output is unknown
// multiply by postmatrix // multiply by postmatrix
WRITE(p, "o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0f);\n", i, i, i); out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0f);\n", i, i, i);
} }
else else
{ {
uid_data.postMtxInfo[i].normalize = xfregs.postMtxInfo[i].normalize;
if (postInfo.normalize) if (postInfo.normalize)
WRITE(p, "o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i); out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
// multiply by postmatrix // multiply by postmatrix
WRITE(p, "o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i); out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
} }
} }
WRITE(p, "}\n"); out.Write("}\n");
} }
// clipPos/w needs to be done in pixel shader, not here // clipPos/w needs to be done in pixel shader, not here
if (xfregs.numTexGen.numTexGens < 7) if (xfregs.numTexGen.numTexGens < 7)
{ {
WRITE(p, "o.clipPos = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n"); out.Write("o.clipPos = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n");
} }
else else
{ {
WRITE(p, "o.tex0.w = pos.x;\n"); out.Write("o.tex0.w = pos.x;\n");
WRITE(p, "o.tex1.w = pos.y;\n"); out.Write("o.tex1.w = pos.y;\n");
WRITE(p, "o.tex2.w = o.pos.z;\n"); out.Write("o.tex2.w = o.pos.z;\n");
WRITE(p, "o.tex3.w = o.pos.w;\n"); out.Write("o.tex3.w = o.pos.w;\n");
} }
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{ {
if (xfregs.numTexGen.numTexGens < 7) if (xfregs.numTexGen.numTexGens < 7)
{ {
WRITE(p, "o.Normal = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n"); out.Write("o.Normal = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n");
} }
else else
{ {
WRITE(p, "o.tex4.w = _norm0.x;\n"); out.Write("o.tex4.w = _norm0.x;\n");
WRITE(p, "o.tex5.w = _norm0.y;\n"); out.Write("o.tex5.w = _norm0.y;\n");
WRITE(p, "o.tex6.w = _norm0.z;\n"); out.Write("o.tex6.w = _norm0.z;\n");
if (xfregs.numTexGen.numTexGens < 8) if (xfregs.numTexGen.numTexGens < 8)
WRITE(p, "o.tex7 = pos.xyzz;\n"); out.Write("o.tex7 = pos.xyzz;\n");
else else
WRITE(p, "o.tex7.w = pos.z;\n"); out.Write("o.tex7.w = pos.z;\n");
} }
if (components & VB_HAS_COL0) if (components & VB_HAS_COL0)
WRITE(p, "o.colors_0 = color0;\n"); out.Write("o.colors_0 = color0;\n");
if (components & VB_HAS_COL1) if (components & VB_HAS_COL1)
WRITE(p, "o.colors_1 = color1;\n"); out.Write("o.colors_1 = color1;\n");
} }
//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values //write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
//if not early z culling will improve speed //if not early z culling will improve speed
if (is_d3d) if (is_d3d)
{ {
WRITE(p, "o.pos.z = " I_DEPTHPARAMS".x * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y;\n"); out.Write("o.pos.z = " I_DEPTHPARAMS".x * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y;\n");
} }
else else
{ {
// this results in a scale from -1..0 to -1..1 after perspective // this results in a scale from -1..0 to -1..1 after perspective
// divide // divide
WRITE(p, "o.pos.z = o.pos.w + o.pos.z * 2.0f;\n"); out.Write("o.pos.z = o.pos.w + o.pos.z * 2.0f;\n");
// Sonic Unleashed puts its final rendering at the near or // Sonic Unleashed puts its final rendering at the near or
// far plane of the viewing frustrum(actually box, they use // far plane of the viewing frustrum(actually box, they use
// orthogonal projection for that), and we end up putting it // orthogonal projection for that), and we end up putting it
// just beyond, and the rendering gets clipped away. (The // just beyond, and the rendering gets clipped away. (The
// primitive gets dropped) // primitive gets dropped)
WRITE(p, "o.pos.z = o.pos.z * 1048575.0f/1048576.0f;\n"); out.Write("o.pos.z = o.pos.z * 1048575.0f/1048576.0f;\n");
// the next steps of the OGL pipeline are: // the next steps of the OGL pipeline are:
// (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology // (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology
@ -591,14 +496,14 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
//seems to get rather complicated //seems to get rather complicated
} }
if (ApiType & API_D3D9) if (api_type & API_D3D9)
{ {
// D3D9 is addressing pixel centers instead of pixel boundaries in clip space. // D3D9 is addressing pixel centers instead of pixel boundaries in clip space.
// Thus we need to offset the final position by half a pixel // Thus we need to offset the final position by half a pixel
WRITE(p, "o.pos = o.pos + float4(" I_DEPTHPARAMS".z, " I_DEPTHPARAMS".w, 0.f, 0.f);\n"); out.Write("o.pos = o.pos + float4(" I_DEPTHPARAMS".z, " I_DEPTHPARAMS".w, 0.f, 0.f);\n");
} }
if(ApiType == API_OPENGL) if(api_type == API_OPENGL)
{ {
// Bit ugly here // Bit ugly here
// TODO: Make pretty // TODO: Make pretty
@ -610,15 +515,13 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
for (unsigned int i = 0; i < 8; ++i) for (unsigned int i = 0; i < 8; ++i)
{ {
if(i < xfregs.numTexGen.numTexGens) if(i < xfregs.numTexGen.numTexGens)
WRITE(p, " uv%d_2.xyz = o.tex%d;\n", i, i); out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i);
else else
WRITE(p, " uv%d_2.xyz = float3(0.0f, 0.0f, 0.0f);\n", i); out.Write(" uv%d_2.xyz = float3(0.0f, 0.0f, 0.0f);\n", i);
} }
out.Write(" clipPos_2 = o.clipPos;\n");
WRITE(p, " clipPos_2 = o.clipPos;\n");
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, " Normal_2 = o.Normal;\n"); out.Write(" Normal_2 = o.Normal;\n");
} }
else else
{ {
@ -626,29 +529,47 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{ {
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
WRITE(p, " uv%d_2 = o.tex%d;\n", i, i); out.Write(" uv%d_2 = o.tex%d;\n", i, i);
} }
else else
{ {
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, " uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz" , i); out.Write(" uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz" , i);
} }
} }
WRITE(p, "colors_02 = o.colors_0;\n"); out.Write("colors_02 = o.colors_0;\n");
WRITE(p, "colors_12 = o.colors_1;\n"); out.Write("colors_12 = o.colors_1;\n");
WRITE(p, "gl_Position = o.pos;\n"); out.Write("gl_Position = o.pos;\n");
WRITE(p, "}\n"); out.Write("}\n");
} }
else else
{ {
WRITE(p, "return o;\n}\n"); out.Write("return o;\n}\n");
} }
if (text[sizeof(text) - 1] != 0x7C) if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("VertexShader generator - buffer too small, canary has been eaten!"); PanicAlert("VertexShader generator - buffer too small, canary has been eaten!");
#ifndef ANDROID #ifndef ANDROID
if (out.GetBuffer() != NULL)
{
uselocale(old_locale); // restore locale uselocale(old_locale); // restore locale
freelocale(locale); freelocale(locale);
}
#endif #endif
return text; }
void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type)
{
GenerateVertexShader<VertexShaderUid>(object, components, api_type);
}
void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type)
{
GenerateVertexShader<VertexShaderCode>(object, components, api_type);
}
void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type)
{
GenerateVSOutputStruct<ShaderCode>(object, components, api_type);
} }

View File

@ -5,8 +5,10 @@
#ifndef GCOGL_VERTEXSHADER_H #ifndef GCOGL_VERTEXSHADER_H
#define GCOGL_VERTEXSHADER_H #define GCOGL_VERTEXSHADER_H
#include <stdarg.h>
#include "XFMemory.h" #include "XFMemory.h"
#include "VideoCommon.h" #include "VideoCommon.h"
#include "ShaderGenCommon.h"
// TODO should be reordered // TODO should be reordered
#define SHADER_POSITION_ATTRIB 0 #define SHADER_POSITION_ATTRIB 0
@ -49,6 +51,7 @@
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32) #define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32)
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64) #define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 1) #define C_VENVCONST_END (C_DEPTHPARAMS + 1)
const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 }, const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_PROJECTION , C_PROJECTION, 4 }, {I_PROJECTION , C_PROJECTION, 4 },
{I_MATERIALS, C_MATERIALS, 4 }, {I_MATERIALS, C_MATERIALS, 4 },
@ -59,75 +62,40 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_POSTTRANSFORMMATRICES, C_POSTTRANSFORMMATRICES, 64 }, {I_POSTTRANSFORMMATRICES, C_POSTTRANSFORMMATRICES, 64 },
{I_DEPTHPARAMS, C_DEPTHPARAMS, 1 }, {I_DEPTHPARAMS, C_DEPTHPARAMS, 1 },
}; };
template<bool safe>
class _VERTEXSHADERUID #pragma pack(4)
struct vertex_shader_uid_data
{ {
#define NUM_VSUID_VALUES_SAFE 25 u32 components;
public: u32 numColorChans : 2;
u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9]; u32 numTexGens : 4;
_VERTEXSHADERUID() struct {
{ u32 projection : 1; // XF_TEXPROJ_X
} u32 inputform : 2; // XF_TEXINPUT_X
u32 texgentype : 3; // XF_TEXGEN_X
u32 sourcerow : 5; // XF_SRCGEOM_X
u32 embosssourceshift : 3; // what generated texcoord to use
u32 embosslightshift : 3; // light index that is used
} texMtxInfo[8];
struct {
u32 index : 6; // base row of dual transform matrix
u32 normalize : 1; // normalize before send operation
} postMtxInfo[8];
struct {
u32 enabled : 1;
} dualTexTrans;
_VERTEXSHADERUID(const _VERTEXSHADERUID& r) LightingUidData lighting;
{
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
}
int GetNumValues() const
{
if (safe) return NUM_VSUID_VALUES_SAFE;
else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
}
bool operator <(const _VERTEXSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}
return false;
}
bool operator ==(const _VERTEXSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
}
return true;
}
}; };
typedef _VERTEXSHADERUID<false> VERTEXSHADERUID; #pragma pack()
typedef _VERTEXSHADERUID<true> VERTEXSHADERUIDSAFE;
typedef ShaderUid<vertex_shader_uid_data> VertexShaderUid;
typedef ShaderCode VertexShaderCode; // TODO: Obsolete..
// components is included in the uid. void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type);
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type); void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type);
const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type); void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type);
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components);
// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components);
#endif // GCOGL_VERTEXSHADER_H #endif // GCOGL_VERTEXSHADER_H

View File

@ -252,6 +252,7 @@
<ClInclude Include="Src\PixelShaderGen.h" /> <ClInclude Include="Src\PixelShaderGen.h" />
<ClInclude Include="Src\PixelShaderManager.h" /> <ClInclude Include="Src\PixelShaderManager.h" />
<ClInclude Include="Src\RenderBase.h" /> <ClInclude Include="Src\RenderBase.h" />
<ClInclude Include="Src\ShaderGenCommon.h" />
<ClInclude Include="Src\Statistics.h" /> <ClInclude Include="Src\Statistics.h" />
<ClInclude Include="Src\TextureCacheBase.h" /> <ClInclude Include="Src\TextureCacheBase.h" />
<ClInclude Include="Src\TextureConversionShader.h" /> <ClInclude Include="Src\TextureConversionShader.h" />

View File

@ -258,6 +258,9 @@
<ClInclude Include="Src\FPSCounter.h"> <ClInclude Include="Src\FPSCounter.h">
<Filter>Util</Filter> <Filter>Util</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="Src\ShaderGenCommon.h">
<Filter>Shader Generators</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="CMakeLists.txt" /> <None Include="CMakeLists.txt" />

View File

@ -169,10 +169,11 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth,
if (shaderIt == m_shaders.end()) if (shaderIt == m_shaders.end())
{ {
// Generate new shader. Warning: not thread-safe. // Generate new shader. Warning: not thread-safe.
static char code[16384]; static char buffer[16384];
char* p = code; ShaderCode code;
p = GenerateVSOutputStruct(p, components, API_D3D11); code.SetBuffer(buffer);
p += sprintf(p, "\n%s", LINE_GS_COMMON); GenerateVSOutputStructForGS(code, components, API_D3D11);
code.Write("\n%s", LINE_GS_COMMON);
std::stringstream numTexCoordsStream; std::stringstream numTexCoordsStream;
numTexCoordsStream << xfregs.numTexGen.numTexGens; numTexCoordsStream << xfregs.numTexGen.numTexGens;
@ -185,7 +186,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth,
{ "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { "NUM_TEXCOORDS", numTexCoordsStr.c_str() },
{ NULL, NULL } { NULL, NULL }
}; };
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros); ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros);
if (!newShader) if (!newShader)
{ {
WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components); WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components);

View File

@ -28,9 +28,10 @@ namespace DX11
PixelShaderCache::PSCache PixelShaderCache::PixelShaders; PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry; const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid; PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;
LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache; LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
ID3D11PixelShader* s_ColorMatrixProgram[2] = {NULL}; ID3D11PixelShader* s_ColorMatrixProgram[2] = {NULL};
ID3D11PixelShader* s_ColorCopyProgram[2] = {NULL}; ID3D11PixelShader* s_ColorCopyProgram[2] = {NULL};
@ -352,10 +353,10 @@ ID3D11Buffer* &PixelShaderCache::GetConstantBuffer()
} }
// this class will load the precompiled shaders into our cache // this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader<PIXELSHADERUID, u8> class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{ {
public: public:
void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size) void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
{ {
PixelShaderCache::InsertByteCode(key, value, value_size); PixelShaderCache::InsertByteCode(key, value, value_size);
} }
@ -415,6 +416,7 @@ void PixelShaderCache::Clear()
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy(); iter->second.Destroy();
PixelShaders.clear(); PixelShaders.clear();
pixel_uid_checker.Invalidate();
last_entry = NULL; last_entry = NULL;
} }
@ -450,8 +452,14 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{ {
PIXELSHADERUID uid; PixelShaderUid uid;
GetPixelShaderId(&uid, dstAlphaMode, components); GetPixelShaderUid(uid, dstAlphaMode, API_D3D11, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components);
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}
// Check if the shader is already set // Check if the shader is already set
if (last_entry) if (last_entry)
@ -459,7 +467,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (uid == last_uid) if (uid == last_uid)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return (last_entry->shader != NULL); return (last_entry->shader != NULL);
} }
} }
@ -475,15 +482,15 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
last_entry = &entry; last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
// Need to compile a new shader // Need to compile a new shader
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components); PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components);
D3DBlob* pbytecode; D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code, (unsigned int)strlen(code), &pbytecode)) if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), &pbytecode))
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false; return false;
@ -497,15 +504,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (g_ActiveConfig.bEnableShaderDebugging && success) if (g_ActiveConfig.bEnableShaderDebugging && success)
{ {
PixelShaders[uid].code = code; PixelShaders[uid].code = code.GetBuffer();
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
} }
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success; return success;
} }
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen) bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{ {
ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
if (shader == NULL) if (shader == NULL)

View File

@ -22,7 +22,7 @@ public:
static void Clear(); static void Clear();
static void Shutdown(); static void Shutdown();
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader
static bool InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen); static bool InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen);
static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; } static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; }
static ID3D11Buffer* &GetConstantBuffer(); static ID3D11Buffer* &GetConstantBuffer();
@ -41,18 +41,19 @@ private:
{ {
ID3D11PixelShader* shader; ID3D11PixelShader* shader;
PIXELSHADERUIDSAFE safe_uid;
std::string code; std::string code;
PSCacheEntry() : shader(NULL) {} PSCacheEntry() : shader(NULL) {}
void Destroy() { SAFE_RELEASE(shader); } void Destroy() { SAFE_RELEASE(shader); }
}; };
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache; typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
static PSCache PixelShaders; static PSCache PixelShaders;
static const PSCacheEntry* last_entry; static const PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid; static PixelShaderUid last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
}; };
} // namespace DX11 } // namespace DX11

View File

@ -163,10 +163,11 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize,
if (shaderIt == m_shaders.end()) if (shaderIt == m_shaders.end())
{ {
// Generate new shader. Warning: not thread-safe. // Generate new shader. Warning: not thread-safe.
static char code[16384]; static char buffer[16384];
char* p = code; ShaderCode code;
p = GenerateVSOutputStruct(p, components, API_D3D11); code.SetBuffer(buffer);
p += sprintf(p, "\n%s", POINT_GS_COMMON); GenerateVSOutputStructForGS(code, components, API_D3D11);
code.Write("\n%s", POINT_GS_COMMON);
std::stringstream numTexCoordsStream; std::stringstream numTexCoordsStream;
numTexCoordsStream << xfregs.numTexGen.numTexGens; numTexCoordsStream << xfregs.numTexGen.numTexGens;
@ -179,7 +180,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize,
{ "NUM_TEXCOORDS", numTexCoordsStr.c_str() }, { "NUM_TEXCOORDS", numTexCoordsStr.c_str() },
{ NULL, NULL } { NULL, NULL }
}; };
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros); ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros);
if (!newShader) if (!newShader)
{ {
WARN_LOG(VIDEO, "Point geometry shader for components 0x%.08X failed to compile", components); WARN_LOG(VIDEO, "Point geometry shader for components 0x%.08X failed to compile", components);

View File

@ -238,7 +238,7 @@ void VertexManager::vFlush()
// set global constants // set global constants
VertexShaderManager::SetConstants(); VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants(); PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;

View File

@ -24,14 +24,15 @@ namespace DX11 {
VertexShaderCache::VSCache VertexShaderCache::vshaders; VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid; VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;
static ID3D11VertexShader* SimpleVertexShader = NULL; static ID3D11VertexShader* SimpleVertexShader = NULL;
static ID3D11VertexShader* ClearVertexShader = NULL; static ID3D11VertexShader* ClearVertexShader = NULL;
static ID3D11InputLayout* SimpleLayout = NULL; static ID3D11InputLayout* SimpleLayout = NULL;
static ID3D11InputLayout* ClearLayout = NULL; static ID3D11InputLayout* ClearLayout = NULL;
LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache; LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; } ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; }
ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; } ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; }
@ -57,10 +58,10 @@ ID3D11Buffer* &VertexShaderCache::GetConstantBuffer()
} }
// this class will load the precompiled shaders into our cache // this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8> class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{ {
public: public:
void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size) void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{ {
D3DBlob* blob = new D3DBlob(value_size, value); D3DBlob* blob = new D3DBlob(value_size, value);
VertexShaderCache::InsertByteCode(key, blob); VertexShaderCache::InsertByteCode(key, blob);
@ -176,6 +177,7 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy(); iter->second.Destroy();
vshaders.clear(); vshaders.clear();
vertex_uid_checker.Invalidate();
last_entry = NULL; last_entry = NULL;
} }
@ -197,14 +199,20 @@ void VertexShaderCache::Shutdown()
bool VertexShaderCache::SetShader(u32 components) bool VertexShaderCache::SetShader(u32 components)
{ {
VERTEXSHADERUID uid; VertexShaderUid uid;
GetVertexShaderId(&uid, components); GetVertexShaderUid(uid, components, API_D3D11);
if (g_ActiveConfig.bEnableShaderDebugging)
{
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D11);
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}
if (last_entry) if (last_entry)
{ {
if (uid == last_uid) if (uid == last_uid)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL); return (last_entry->shader != NULL);
} }
} }
@ -218,14 +226,14 @@ bool VertexShaderCache::SetShader(u32 components)
last_entry = &entry; last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
const char *code = GenerateVertexShaderCode(components, API_D3D11); VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D11);
D3DBlob* pbytecode = NULL; D3DBlob* pbytecode = NULL;
D3D::CompileVertexShader(code, (int)strlen(code), &pbytecode); D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &pbytecode);
if (pbytecode == NULL) if (pbytecode == NULL)
{ {
@ -239,15 +247,14 @@ bool VertexShaderCache::SetShader(u32 components)
if (g_ActiveConfig.bEnableShaderDebugging && success) if (g_ActiveConfig.bEnableShaderDebugging && success)
{ {
vshaders[uid].code = code; vshaders[uid].code = code.GetBuffer();
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
} }
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success; return success;
} }
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob) bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob)
{ {
ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob); ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
if (shader == NULL) if (shader == NULL)

View File

@ -31,7 +31,7 @@ public:
static ID3D11InputLayout* GetSimpleInputLayout(); static ID3D11InputLayout* GetSimpleInputLayout();
static ID3D11InputLayout* GetClearInputLayout(); static ID3D11InputLayout* GetClearInputLayout();
static bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob); static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob);
private: private:
struct VSCacheEntry struct VSCacheEntry
@ -39,7 +39,6 @@ private:
ID3D11VertexShader* shader; ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout D3DBlob* bytecode; // needed to initialize the input layout
VERTEXSHADERUIDSAFE safe_uid;
std::string code; std::string code;
VSCacheEntry() : shader(NULL), bytecode(NULL) {} VSCacheEntry() : shader(NULL), bytecode(NULL) {}
@ -55,11 +54,13 @@ private:
SAFE_RELEASE(bytecode); SAFE_RELEASE(bytecode);
} }
}; };
typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache; typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
static VSCache vshaders; static VSCache vshaders;
static const VSCacheEntry* last_entry; static const VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid; static VertexShaderUid last_uid;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
}; };
} // namespace DX11 } // namespace DX11

View File

@ -31,9 +31,10 @@ namespace DX9
PixelShaderCache::PSCache PixelShaderCache::PixelShaders; PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry; const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid; PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;
static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache; static LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders; static std::set<u32> unique_shaders;
#define MAX_SSAA_SHADERS 3 #define MAX_SSAA_SHADERS 3
@ -55,10 +56,10 @@ static LPDIRECT3DPIXELSHADER9 s_ClearProgram = NULL;
static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL; static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL;
static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL; static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL;
class PixelShaderCacheInserter : public LinearDiskCacheReader<PIXELSHADERUID, u8> class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{ {
public: public:
void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size) void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
{ {
PixelShaderCache::InsertByteCode(key, value, value_size, false); PixelShaderCache::InsertByteCode(key, value, value_size, false);
} }
@ -287,6 +288,7 @@ void PixelShaderCache::Clear()
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++) for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy(); iter->second.Destroy();
PixelShaders.clear(); PixelShaders.clear();
pixel_uid_checker.Invalidate();
last_entry = NULL; last_entry = NULL;
} }
@ -323,8 +325,14 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components) bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{ {
const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30; const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30;
PIXELSHADERUID uid; PixelShaderUid uid;
GetPixelShaderId(&uid, dstAlphaMode, components); GetPixelShaderUid(uid, dstAlphaMode, API_D3D9, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D9, components);
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}
// Check if the shader is already set // Check if the shader is already set
if (last_entry) if (last_entry)
@ -332,7 +340,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (uid == last_uid) if (uid == last_uid)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return last_entry->shader != NULL; return last_entry->shader != NULL;
} }
} }
@ -349,34 +356,34 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (entry.shader) D3D::SetPixelShader(entry.shader); if (entry.shader) D3D::SetPixelShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
// Need to compile a new shader // Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components); PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, api, components);
if (g_ActiveConfig.bEnableShaderDebugging) if (g_ActiveConfig.bEnableShaderDebugging)
{ {
u32 code_hash = HashAdler32((const u8 *)code, strlen(code)); u32 code_hash = HashAdler32((const u8 *)code.GetBuffer(), strlen(code.GetBuffer()));
unique_shaders.insert(code_hash); unique_shaders.insert(code_hash);
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size()); SETSTAT(stats.numUniquePixelShaders, unique_shaders.size());
} }
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) { if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
static int counter = 0; static int counter = 0;
char szTemp[MAX_PATH]; char szTemp[MAX_PATH];
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, code); SaveData(szTemp, code.GetBuffer());
} }
#endif #endif
u8 *bytecode = 0; u8 *bytecode = 0;
int bytecodelen = 0; int bytecodelen = 0;
if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) { if (!D3D::CompilePixelShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false; return false;
} }
@ -390,15 +397,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (g_ActiveConfig.bEnableShaderDebugging && success) if (g_ActiveConfig.bEnableShaderDebugging && success)
{ {
PixelShaders[uid].code = code; PixelShaders[uid].code = code.GetBuffer();
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
} }
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success; return success;
} }
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate)
{ {
LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen); LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);

View File

@ -28,7 +28,6 @@ private:
LPDIRECT3DPIXELSHADER9 shader; LPDIRECT3DPIXELSHADER9 shader;
bool owns_shader; bool owns_shader;
PIXELSHADERUIDSAFE safe_uid;
std::string code; std::string code;
PSCacheEntry() : shader(NULL), owns_shader(true) {} PSCacheEntry() : shader(NULL), owns_shader(true) {}
@ -40,18 +39,20 @@ private:
} }
}; };
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache; typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
static PSCache PixelShaders; static PSCache PixelShaders;
static const PSCacheEntry *last_entry; static const PSCacheEntry *last_entry;
static PIXELSHADERUID last_uid; static PixelShaderUid last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
static void Clear(); static void Clear();
public: public:
static void Init(); static void Init();
static void Shutdown(); static void Shutdown();
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 componets); static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 componets);
static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); static bool InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate);
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode); static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, bool depthConversion); static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, bool depthConversion);

View File

@ -348,7 +348,7 @@ void VertexManager::vFlush()
// set global constants // set global constants
VertexShaderManager::SetConstants(); VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants(); PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
u32 stride = g_nativeVertexFmt->GetVertexStride(); u32 stride = g_nativeVertexFmt->GetVertexStride();
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24; bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;

View File

@ -25,14 +25,15 @@ namespace DX9
VertexShaderCache::VSCache VertexShaderCache::vshaders; VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid; VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;
#define MAX_SSAA_SHADERS 3 #define MAX_SSAA_SHADERS 3
static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS]; static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS];
static LPDIRECT3DVERTEXSHADER9 ClearVertexShader; static LPDIRECT3DVERTEXSHADER9 ClearVertexShader;
LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache; LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level) LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level)
{ {
@ -45,10 +46,10 @@ LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader()
} }
// this class will load the precompiled shaders into our cache // this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8> class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{ {
public: public:
void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size) void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{ {
VertexShaderCache::InsertByteCode(key, value, value_size, false); VertexShaderCache::InsertByteCode(key, value, value_size, false);
} }
@ -150,6 +151,7 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter) for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy(); iter->second.Destroy();
vshaders.clear(); vshaders.clear();
vertex_uid_checker.Invalidate();
last_entry = NULL; last_entry = NULL;
} }
@ -174,14 +176,20 @@ void VertexShaderCache::Shutdown()
bool VertexShaderCache::SetShader(u32 components) bool VertexShaderCache::SetShader(u32 components)
{ {
VERTEXSHADERUID uid; VertexShaderUid uid;
GetVertexShaderId(&uid, components); GetVertexShaderUid(uid, components, API_D3D9);
if (g_ActiveConfig.bEnableShaderDebugging)
{
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D9);
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}
if (last_entry) if (last_entry)
{ {
if (uid == last_uid) if (uid == last_uid)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL); return (last_entry->shader != NULL);
} }
} }
@ -196,14 +204,15 @@ bool VertexShaderCache::SetShader(u32 components)
if (entry.shader) D3D::SetVertexShader(entry.shader); if (entry.shader) D3D::SetVertexShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components);
return (entry.shader != NULL); return (entry.shader != NULL);
} }
const char *code = GenerateVertexShaderCode(components, API_D3D9); VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D9);
u8 *bytecode; u8 *bytecode;
int bytecodelen; int bytecodelen;
if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen)) if (!D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen))
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false; return false;
@ -213,15 +222,14 @@ bool VertexShaderCache::SetShader(u32 components)
bool success = InsertByteCode(uid, bytecode, bytecodelen, true); bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
if (g_ActiveConfig.bEnableShaderDebugging && success) if (g_ActiveConfig.bEnableShaderDebugging && success)
{ {
vshaders[uid].code = code; vshaders[uid].code = code.GetBuffer();
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
} }
delete [] bytecode; delete [] bytecode;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success; return success;
} }
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) { bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) {
LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen); LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen);
// Make an entry in the table // Make an entry in the table

View File

@ -23,7 +23,6 @@ private:
LPDIRECT3DVERTEXSHADER9 shader; LPDIRECT3DVERTEXSHADER9 shader;
std::string code; std::string code;
VERTEXSHADERUIDSAFE safe_uid;
VSCacheEntry() : shader(NULL) {} VSCacheEntry() : shader(NULL) {}
void Destroy() void Destroy()
@ -34,11 +33,14 @@ private:
} }
}; };
typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache; typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
static VSCache vshaders; static VSCache vshaders;
static const VSCacheEntry *last_entry; static const VSCacheEntry *last_entry;
static VERTEXSHADERUID last_uid; static VertexShaderUid last_uid;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
static void Clear(); static void Clear();
public: public:
@ -47,7 +49,7 @@ public:
static bool SetShader(u32 components); static bool SetShader(u32 components);
static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(int level); static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(int level);
static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader(); static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader();
static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); static bool InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate);
static std::string GetCurrentShaderCode(); static std::string GetCurrentShaderCode();
}; };

View File

@ -31,6 +31,8 @@ static GLuint CurrentProgram = 0;
ProgramShaderCache::PCache ProgramShaderCache::pshaders; ProgramShaderCache::PCache ProgramShaderCache::pshaders;
ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry;
SHADERUID ProgramShaderCache::last_uid; SHADERUID ProgramShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> ProgramShaderCache::pixel_uid_checker;
UidChecker<VertexShaderUid,VertexShaderCode> ProgramShaderCache::vertex_uid_checker;
static char s_glsl_header[1024] = ""; static char s_glsl_header[1024] = "";
@ -193,7 +195,6 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
if (uid == last_uid) if (uid == last_uid)
{ {
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidateShaderIDs(last_entry, dstAlphaMode, components);
last_entry->shader.Bind(); last_entry->shader.Bind();
return &last_entry->shader; return &last_entry->shader;
} }
@ -209,7 +210,6 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
last_entry = entry; last_entry = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidateShaderIDs(entry, dstAlphaMode, components);
last_entry->shader.Bind(); last_entry->shader.Bind();
return &last_entry->shader; return &last_entry->shader;
} }
@ -219,14 +219,15 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
last_entry = &newentry; last_entry = &newentry;
newentry.in_cache = 0; newentry.in_cache = 0;
const char *vcode = GenerateVertexShaderCode(components, API_OPENGL); VertexShaderCode vcode;
const char *pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components); PixelShaderCode pcode;
GenerateVertexShaderCode(vcode, components, API_OPENGL);
GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
if (g_ActiveConfig.bEnableShaderDebugging) if (g_ActiveConfig.bEnableShaderDebugging)
{ {
GetSafeShaderId(&newentry.safe_uid, dstAlphaMode, components); newentry.shader.strvprog = vcode.GetBuffer();
newentry.shader.strvprog = vcode; newentry.shader.strpprog = pcode.GetBuffer();
newentry.shader.strpprog = pcode;
} }
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
@ -234,13 +235,13 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
static int counter = 0; static int counter = 0;
char szTemp[MAX_PATH]; char szTemp[MAX_PATH];
sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, vcode); SaveData(szTemp, vcode.GetBuffer());
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, pcode); SaveData(szTemp, pcode.GetBuffer());
} }
#endif #endif
if (!CompileShader(newentry.shader, vcode, pcode)) { if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return NULL; return NULL;
} }
@ -380,28 +381,23 @@ GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code )
return result; return result;
} }
void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components)
void ProgramShaderCache::GetShaderId ( SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components )
{ {
GetPixelShaderId(&uid->puid, dstAlphaMode, components); GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components);
GetVertexShaderId(&uid->vuid, components); GetVertexShaderUid(uid->vuid, components, API_OPENGL);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode pcode;
GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");
VertexShaderCode vcode;
GenerateVertexShaderCode(vcode, components, API_OPENGL);
vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v");
}
} }
void ProgramShaderCache::GetSafeShaderId ( SHADERUIDSAFE* uid, DSTALPHA_MODE dstAlphaMode, u32 components )
{
GetSafePixelShaderId(&uid->puid, dstAlphaMode, components);
GetSafeVertexShaderId(&uid->vuid, components);
}
void ProgramShaderCache::ValidateShaderIDs ( PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components )
{
ValidateVertexShaderIDs(API_OPENGL, entry->safe_uid.vuid, entry->shader.strvprog, components);
ValidatePixelShaderIDs(API_OPENGL, entry->safe_uid.puid, entry->shader.strpprog, dstAlphaMode, components);
}
ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void) ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void)
{ {
return *last_entry; return *last_entry;
@ -497,6 +493,9 @@ void ProgramShaderCache::Shutdown(void)
iter->second.Destroy(); iter->second.Destroy();
pshaders.clear(); pshaders.clear();
pixel_uid_checker.Invalidate();
vertex_uid_checker.Invalidate();
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO) if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{ {
delete s_buffer; delete s_buffer;

View File

@ -16,18 +16,17 @@
namespace OGL namespace OGL
{ {
template<bool safe> class SHADERUID
class _SHADERUID
{ {
public: public:
_VERTEXSHADERUID<safe> vuid; VertexShaderUid vuid;
_PIXELSHADERUID<safe> puid; PixelShaderUid puid;
_SHADERUID() {} SHADERUID() {}
_SHADERUID(const _SHADERUID& r) : vuid(r.vuid), puid(r.puid) {} SHADERUID(const SHADERUID& r) : vuid(r.vuid), puid(r.puid) {}
bool operator <(const _SHADERUID& r) const bool operator <(const SHADERUID& r) const
{ {
if(puid < r.puid) return true; if(puid < r.puid) return true;
if(r.puid < puid) return false; if(r.puid < puid) return false;
@ -35,13 +34,11 @@ public:
return false; return false;
} }
bool operator ==(const _SHADERUID& r) const bool operator ==(const SHADERUID& r) const
{ {
return puid == r.puid && vuid == r.vuid; return puid == r.puid && vuid == r.vuid;
} }
}; };
typedef _SHADERUID<false> SHADERUID;
typedef _SHADERUID<true> SHADERUIDSAFE;
const int NUM_UNIFORMS = 19; const int NUM_UNIFORMS = 19;
@ -72,7 +69,6 @@ public:
struct PCacheEntry struct PCacheEntry
{ {
SHADER shader; SHADER shader;
SHADERUIDSAFE safe_uid;
bool in_cache; bool in_cache;
void Destroy() void Destroy()
@ -81,12 +77,12 @@ public:
} }
}; };
typedef std::map<SHADERUID, PCacheEntry> PCache;
static PCacheEntry GetShaderProgram(void); static PCacheEntry GetShaderProgram(void);
static GLuint GetCurrentProgram(void); static GLuint GetCurrentProgram(void);
static SHADER* SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); static SHADER* SetShader(DSTALPHA_MODE dstAlphaMode, u32 components);
static void GetShaderId(SHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components); static void GetShaderId(SHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
static void GetSafeShaderId(SHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
static void ValidateShaderIDs(PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components);
static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode); static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode);
static GLuint CompileSingleShader(GLuint type, const char *code); static GLuint CompileSingleShader(GLuint type, const char *code);
@ -106,12 +102,13 @@ private:
void Read(const SHADERUID &key, const u8 *value, u32 value_size); void Read(const SHADERUID &key, const u8 *value, u32 value_size);
}; };
typedef std::map<SHADERUID, PCacheEntry> PCache;
static PCache pshaders; static PCache pshaders;
static PCacheEntry* last_entry; static PCacheEntry* last_entry;
static SHADERUID last_uid; static SHADERUID last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
static GLintptr s_vs_data_size; static GLintptr s_vs_data_size;
static GLintptr s_ps_data_size; static GLintptr s_ps_data_size;
static GLintptr s_vs_data_offset; static GLintptr s_vs_data_offset;

View File

@ -257,7 +257,7 @@ void VertexManager::vFlush()
// set global constants // set global constants
VertexShaderManager::SetConstants(); VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants(); PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
ProgramShaderCache::UploadConstants(); ProgramShaderCache::UploadConstants();
// setup the pointers // setup the pointers
@ -278,7 +278,7 @@ void VertexManager::vFlush()
{ {
// Need to set these again, if we don't support UBO // Need to set these again, if we don't support UBO
VertexShaderManager::SetConstants(); VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants(); PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
} }
// only update alpha // only update alpha