From a1837662a05ba050ae3f1d77e163138268479fa7 Mon Sep 17 00:00:00 2001 From: donkopunchstania Date: Thu, 2 Oct 2008 03:26:08 +0000 Subject: [PATCH] Added shader dump option to GL plugin. glScissor is always set, even when copying to EFB. Indirect texturing and alpha blending fixes. Changed determination if texture dimension recalculation is needed - this might break stuff! Let me know if there are issues so it can be tweaked. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@739 8ced0084-cf51-0410-be5f-012b33b47a6e --- .../Plugins/Plugin_VideoOGL/Src/BPStructs.cpp | 12 +- .../Plugins/Plugin_VideoOGL/Src/Globals.cpp | 13 ++ Source/Plugins/Plugin_VideoOGL/Src/Globals.h | 2 + .../Plugin_VideoOGL/Src/PixelShader.cpp | 27 +++- .../Src/PixelShaderManager.cpp | 132 ++++++++++-------- .../Plugin_VideoOGL/Src/PixelShaderManager.h | 3 + .../Plugin_VideoOGL/Src/TextureMngr.cpp | 2 +- .../Src/VertexShaderManager.cpp | 11 ++ 8 files changed, 131 insertions(+), 71 deletions(-) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp index 50db2d00b7..dd52edb40b 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp @@ -675,10 +675,11 @@ void LoadBPReg(u32 value0) u32 nRestoreZBufferTarget = Renderer::GetZBufferTarget(); glViewport(0, 0, Renderer::GetTargetWidth(), Renderer::GetTargetHeight()); - // if copied to texture, set the dimensions to the source copy dims, otherwise, clear the entire buffer - if( PE_copy.copy_to_xfb == 0 ) - glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom), - (multirc.right - multirc.left), (multirc.bottom - multirc.top)); + + // Always set the scissor in case it was set by the game and has not been reset + glScissor(multirc.left, (Renderer::GetTargetHeight() - multirc.bottom), + (multirc.right - multirc.left), (multirc.bottom - multirc.top)); + VertexShaderMngr::SetViewportChanged(); // since clear operations use the source rectangle, have to do regular renders (glClear clears the entire buffer) @@ -724,8 +725,7 @@ void LoadBPReg(u32 value0) glDrawBuffers(2, s_drawbuffers); } - if( PE_copy.copy_to_xfb == 0 ) - SetScissorRect(); // reset the scissor rect + SetScissorRect(); // reset the scissor rect } } break; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp index 441ddf8a19..e96ca12e4c 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp @@ -71,6 +71,7 @@ void Config::Load() iniFile.Get("Settings", "DLOptimize", &iCompileDLsLevel, 0); iniFile.Get("Settings", "DumpTextures", &bDumpTextures, 0); iniFile.Get("Settings", "ShowShaderErrors", &bShowShaderErrors, 0); + iniFile.Get("Settings", "LogLevel", &iLog, 0); iniFile.Get("Settings", "Multisample", &iMultisampleMode, 0); if(iMultisampleMode == 0) iMultisampleMode = 1; @@ -107,6 +108,7 @@ void Config::Save() iniFile.Set("Settings", "DLOptimize", iCompileDLsLevel); iniFile.Set("Settings", "DumpTextures", bDumpTextures); iniFile.Set("Settings", "ShowShaderErrors", bShowShaderErrors); + iniFile.Set("Settings", "LogLevel", iLog); iniFile.Set("Settings", "Multisample", iMultisampleMode); iniFile.Set("Settings", "TexDumpPath", texDumpPath); iniFile.Set("Settings", "TexFmtOverlayEnable", bTexFmtOverlayEnable); @@ -185,6 +187,17 @@ bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int he return SaveTGA(filename, width, height, &data[0]); } +bool SaveData(const char* filename, const char* data) +{ + FILE* f = fopen(filename, "wb"); + if (f == NULL) + return false; + + fwrite(data, strlen(data), 1, f); + fclose(f); + return true; +} + #ifdef _WIN32 // The one for Linux is in Linux/Linux.cpp static HANDLE hConsole = NULL; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Globals.h b/Source/Plugins/Plugin_VideoOGL/Src/Globals.h index df96cec24e..7dfe5eaaf7 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Globals.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/Globals.h @@ -117,6 +117,7 @@ extern int frameCount; #define CONF_PRIMLOG 2 #define CONF_SAVETEXTURES 4 #define CONF_SAVETARGETS 8 +#define CONF_SAVESHADERS 16 struct Config { @@ -226,6 +227,7 @@ void HandleGLError(); void InitLUTs(); bool SaveTGA(const char* filename, int width, int height, void* pdata); bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height); +bool SaveData(const char* filename, const char* pdata); #if defined(_MSC_VER) && !defined(__x86_64__) && !defined(_M_X64) void * memcpy_amd(void *dest, const void *src, size_t n); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp index 1c7f06301e..e26fd72580 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp @@ -422,6 +422,7 @@ char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRende if( !WriteAlphaTest(p) ) { // alpha test will always fail, so restart the shader and just make it an empty function p = pmainstart; + WRITE(p, "discard;\n"); WRITE(p, "ocol0 = 0;\n"); } else { @@ -449,7 +450,6 @@ char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRende WRITE(p, "ocol1 = frac(float4(256.0f*256.0f, 256.0f, 1.0f, 0.0f) * uv%d.w);\n", ztexcoord); } } - WRITE(p,"}\n"); return text; @@ -470,10 +470,25 @@ void WriteStage(char *&p, int n, u32 texture_mask) bHasIndStage = true; int texmap = bpmem.tevorders[n/2].getEnable(n&1) ? bpmem.tevorders[n/2].getTexMap(n&1) : bpmem.tevindref.getTexMap(bpmem.tevind[n].bt); - if( bpmem.tevind[n].bs != ITBA_OFF ) + if( bpmem.tevind[n].bs != ITBA_OFF ) { // write the bump alpha - WRITE(p, "alphabump = %s (indtex%d.%s %s);\n", bpmem.tevind[n].fmt==ITF_8?"":"frac", bpmem.tevind[n].bt, - tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); + + if( bpmem.tevind[n].fmt == ITF_8 ) { + WRITE(p, "alphabump = indtex%d.%s %s;\n", bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); + } + else { + // donkopunchstania: really bad way to do this + // cannot always use fract because fract(1.0) is 0.0 when it needs to be 1.0 + // omitting fract seems to work as well + WRITE(p, "if( indtex%d.%s >= 1.0f )\n", bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs]); + WRITE(p, " alphabump = 1.0f;\n"); + WRITE(p, "else\n"); + WRITE(p, " alphabump = fract ( indtex%d.%s %s );\n", bpmem.tevind[n].bt, + tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaScale[bpmem.tevind[n].fmt]); + } + } // bias WRITE(p, "float3 indtevcrd%d = indtex%d;\n", n, bpmem.tevind[n].bt); @@ -757,9 +772,9 @@ void WriteAlphaCompare(char *&p, int num, int comp) case ALPHACMP_ALWAYS: WRITE(p,"(false)"); break; case ALPHACMP_NEVER: WRITE(p,"(true)"); break; case ALPHACMP_LEQUAL: WRITE(p,"(prev.a > %s)",alphaRef[num]); break; - case ALPHACMP_LESS: WRITE(p,"(prev.a >= %s+%f)",alphaRef[num],epsilon*0.5f);break; + case ALPHACMP_LESS: WRITE(p,"(prev.a >= %s - %f)",alphaRef[num],epsilon*0.5f);break; case ALPHACMP_GEQUAL: WRITE(p,"(prev.a < %s)",alphaRef[num]); break; - case ALPHACMP_GREATER: WRITE(p,"(prev.a <= %s - %f)",alphaRef[num],epsilon*0.5f);break; + case ALPHACMP_GREATER: WRITE(p,"(prev.a <= %s + %f)",alphaRef[num],epsilon*0.5f);break; case ALPHACMP_EQUAL: WRITE(p,"(abs(prev.a-%s)>%f)",alphaRef[num],epsilon*2); break; case ALPHACMP_NEQUAL: WRITE(p,"(abs(prev.a-%s)<%f)",alphaRef[num],epsilon*2); break; } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp index 3dcf743b44..b8695c8afc 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp @@ -32,9 +32,10 @@ PixelShaderMngr::PIXELSHADERUID PixelShaderMngr::s_curuid; static int s_nMaxPixelInstructions; static int s_nColorsChanged[2]; // 0 - regular colors, 1 - k colors -static int s_nTexDimsChanged[2], s_nIndTexMtxChanged = 0; //min, max +static int s_nIndTexMtxChanged = 0; static bool s_bAlphaChanged, s_bZBiasChanged, s_bIndTexScaleChanged; static float lastRGBAfull[2][4][4]; +static u8 s_nTexDimsChanged; static u32 lastAlpha = 0; static u32 lastTexDims[8]={0}; static u32 lastZBias = 0; @@ -60,7 +61,7 @@ void PixelShaderMngr::SetPSConstant4fv(int const_number, const float *f) { void PixelShaderMngr::Init() { s_nColorsChanged[0] = s_nColorsChanged[1] = 0; - s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = -1; + s_nTexDimsChanged = 0; s_nIndTexMtxChanged = 15; s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; GL_REPORT_ERRORD(); @@ -133,12 +134,23 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader() char *code = GeneratePixelShader(s_texturemask, Renderer::GetZBufferTarget() != 0, Renderer::GetRenderMode() != Renderer::RM_Normal); -// printf("Compiling pixel shader. size = %i\n", strlen(code)); + +#ifdef _DEBUG + if( g_Config.iLog & CONF_SAVESHADERS && code ) { + static int counter = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s/ps_%04i.txt", g_Config.texDumpPath, counter++); + + SaveData(szTemp, code); + } +#endif + + // printf("Compiling pixel shader. size = %i\n", strlen(code)); if (!code || !CompilePixelShader(newentry.shader, code)) { ERROR_LOG("failed to create pixel shader\n"); return NULL; } - + //Make an entry in the table newentry.frameCount = frameCount; @@ -243,61 +255,30 @@ void PixelShaderMngr::SetConstants(FRAGMENTSHADER& ps) int texmap = bpmem.tevorders[i/2].getTexMap(i&1); maptocoord[texmap] = bpmem.tevorders[i/2].getTexCoord(i&1); newmask |= 1<= 0 ) { - float fdims[4]; - for(int i = s_nTexDimsChanged[0]; i <= s_nTexDimsChanged[1]; ++i) { - if( s_texturemask & (1<= 0 ) { - TCoordInfo& tc = bpmem.texcoords[maptocoord[i]]; - fdims[0] = (float)(lastTexDims[i]&0xffff); - fdims[1] = (float)((lastTexDims[i]>>16)&0xfff); - fdims[2] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[i]&0xffff); - fdims[3] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[i]>>16)&0xfff); - } - else { - fdims[0] = (float)(lastTexDims[i]&0xffff); - fdims[1] = (float)((lastTexDims[i]>>16)&0xfff); - fdims[2] = 1.0f; - fdims[3] = 1.0f; - } - } - else { - if( maptocoord[i] >= 0 ) { - TCoordInfo& tc = bpmem.texcoords[maptocoord[i]]; - fdims[0] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[i]&0xffff); - fdims[1] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[i]>>16)&0xfff); - fdims[2] = 1.0f/(float)(tc.s.scale_minus_1+1); - fdims[3] = 1.0f/(float)(tc.t.scale_minus_1+1); - } - else { - fdims[0] = 1.0f; - fdims[1] = 1.0f; - fdims[2] = 1.0f/(float)(lastTexDims[i]&0xffff); - fdims[3] = 1.0f/(float)((lastTexDims[i]>>16)&0xfff); - } - } - - PRIM_LOG("texdims%d: %f %f %f %f\n", i, fdims[0], fdims[1], fdims[2], fdims[3]); - SetPSConstant4fv(C_TEXDIMS + i, fdims); + if( s_nTexDimsChanged ) { + for(int i = 0; i < 8; ++i) { + if( s_nTexDimsChanged & (1<= 0 ) { + TCoordInfo& tc = bpmem.texcoords[maptocoord[texid]]; + fdims[0] = (float)(lastTexDims[texid]&0xffff); + fdims[1] = (float)((lastTexDims[texid]>>16)&0xfff); + fdims[2] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[texid]&0xffff); + fdims[3] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[texid]>>16)&0xfff); + } + else { + fdims[0] = (float)(lastTexDims[texid]&0xffff); + fdims[1] = (float)((lastTexDims[texid]>>16)&0xfff); + fdims[2] = 1.0f; + fdims[3] = 1.0f; + } + } + else { + if( maptocoord[texid] >= 0 ) { + TCoordInfo& tc = bpmem.texcoords[maptocoord[texid]]; + fdims[0] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[texid]&0xffff); + fdims[1] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[texid]>>16)&0xfff); + fdims[2] = 1.0f/(float)(tc.s.scale_minus_1+1); + fdims[3] = 1.0f/(float)(tc.t.scale_minus_1+1); + } + else { + fdims[0] = 1.0f; + fdims[1] = 1.0f; + fdims[2] = 1.0f/(float)(lastTexDims[texid]&0xffff); + fdims[3] = 1.0f/(float)((lastTexDims[texid]>>16)&0xfff); + } + } + + PRIM_LOG("texdims%d: %f %f %f %f\n", texid, fdims[0], fdims[1], fdims[2], fdims[3]); + SetPSConstant4fv(C_TEXDIMS + texid, fdims); +} + void PixelShaderMngr::SetColorChanged(int type, int num) { int r=bpmem.tevregs[num].low.a, a=bpmem.tevregs[num].low.b; @@ -406,13 +426,7 @@ void PixelShaderMngr::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wh = width|(height<<16)|(wraps<<28)|(wrapt<<30); if( lastTexDims[texmapid] != wh ) { lastTexDims[texmapid] = wh; - if( s_nTexDimsChanged[0] == -1 ) { - s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = texmapid; - } - else { - if( s_nTexDimsChanged[0] > texmapid ) s_nTexDimsChanged[0] = texmapid; - else if( s_nTexDimsChanged[1] < texmapid ) s_nTexDimsChanged[1] = texmapid; - } + s_nTexDimsChanged |= 1< i ) s_nTexDimsChanged[0] = i; - else if( s_nTexDimsChanged[1] < i ) s_nTexDimsChanged[1] = i; + if( nonpow2tex & (0x10101< texmapid ) s_nTexDimsChanged[0] = texmapid; - else if( s_nTexDimsChanged[1] < texmapid ) s_nTexDimsChanged[1] = texmapid; + // this check was previously implicit, but should it be here? + if( s_nTexDimsChanged ) + s_nTexDimsChanged |= 1<