diff --git a/Source/Core/Core/Src/MemTools.cpp b/Source/Core/Core/Src/MemTools.cpp index 69f97d3792..e9e091a14e 100644 --- a/Source/Core/Core/Src/MemTools.cpp +++ b/Source/Core/Core/Src/MemTools.cpp @@ -233,7 +233,12 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) //Figure out what address was hit DWORD_PTR badAddress = (DWORD_PTR)pPtrs->ExceptionRecord->ExceptionInformation[1]; //TODO: First examine the address, make sure it's within the emulated memory space + if (badAddress < memspaceBottom) { + PanicAlert("Exception handler - access below memory space. %08x%08x", + badAddress >> 32, badAddress); + } u32 emAddress = (u32)(badAddress - memspaceBottom); + //Now we have the emulated address. //_assert_msg_(DYNA_REC,0,"MT : %08x",emAddress); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp index c16c88457d..c17e526afc 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp @@ -347,7 +347,6 @@ namespace Jit64 SetCodePtr(prev_code); } - #define BLR_OP 0x4e800020 void InvalidateCodeRange(u32 address, u32 length) @@ -368,7 +367,6 @@ namespace Jit64 void ClearCache() { - // Is destroying the blocks really necessary? for (int i = 0; i < numBlocks; i++) { @@ -393,8 +391,8 @@ namespace Jit64 ++counter; if (counter == 30) { - counter ++; - counter --; + counter++; + counter--; } // TODO: also mark and remember the instruction address as known HW memory access, for use in later compiles. diff --git a/Source/Dolphin.sln b/Source/Dolphin.sln index 8a11a9a47d..f26c19bddb 100644 --- a/Source/Dolphin.sln +++ b/Source/Dolphin.sln @@ -2,19 +2,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00 # Visual Studio 2005 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Core", "Core\Core\Core.vcproj", "{F0B874CB-4476-4199-9315-8343D05AE684}" ProjectSection(ProjectDependencies) = postProject - {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C} = {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C} - {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} + {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} + {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C} = {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugin_VideoDX9", "Plugins\Plugin_VideoDX9\Plugin_VideoDX9.vcproj", "{636FAD5F-02D1-4E9A-BE67-FB8EA99B9A18}" ProjectSection(ProjectDependencies) = postProject - {9A183B48-ECC2-4121-876A-9B3793686073} = {9A183B48-ECC2-4121-876A-9B3793686073} - {3E03C179-8251-46E4-81F4-466F114BAC63} = {3E03C179-8251-46E4-81F4-466F114BAC63} - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} = {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} - {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} - {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} + {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} + {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} + {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} = {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} + {3E03C179-8251-46E4-81F4-466F114BAC63} = {3E03C179-8251-46E4-81F4-466F114BAC63} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugin_PadSimple", "Plugins\Plugin_PadSimple\Plugin_PadSimple.vcproj", "{9A183B48-ECC2-4121-876A-9B3793686073}" @@ -40,21 +39,21 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugin_VideoOGL", "Plugins\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Core\Common\Common.vcproj", "{C573CAF7-EE6A-458E-8049-16C0BF34C2E9}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wxDolphin", "Core\DolphinWX\DolphinWX.vcproj", "{A72606EF-C5C1-4954-90AD-F0F93A8D97D9}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DolphinWX", "Core\DolphinWX\DolphinWX.vcproj", "{A72606EF-C5C1-4954-90AD-F0F93A8D97D9}" ProjectSection(ProjectDependencies) = postProject - {9AC65CBE-7854-4A86-AA10-D73FF9E5D61F} = {9AC65CBE-7854-4A86-AA10-D73FF9E5D61F} - {636FAD5F-02D1-4E9A-BE67-FB8EA99B9A18} = {636FAD5F-02D1-4E9A-BE67-FB8EA99B9A18} - {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} = {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} - {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} - {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} = {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} - {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} = {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} - {9A183B48-ECC2-4121-876A-9B3793686073} = {9A183B48-ECC2-4121-876A-9B3793686073} - {3E03C179-8251-46E4-81F4-466F114BAC63} = {3E03C179-8251-46E4-81F4-466F114BAC63} - {0318BA30-EF48-441A-9E10-DC85EFAE39F0} = {0318BA30-EF48-441A-9E10-DC85EFAE39F0} - {4D3CD4C5-412B-4B49-9B1B-A68A2A129C77} = {4D3CD4C5-412B-4B49-9B1B-A68A2A129C77} - {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} - {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C} = {B7F1A9FB-BEA8-416E-9460-AE35A6A5165C} + {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} + {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} + {4D3CD4C5-412B-4B49-9B1B-A68A2A129C77} = {4D3CD4C5-412B-4B49-9B1B-A68A2A129C77} + {0318BA30-EF48-441A-9E10-DC85EFAE39F0} = {0318BA30-EF48-441A-9E10-DC85EFAE39F0} + {3E03C179-8251-46E4-81F4-466F114BAC63} = {3E03C179-8251-46E4-81F4-466F114BAC63} + {9A183B48-ECC2-4121-876A-9B3793686073} = {9A183B48-ECC2-4121-876A-9B3793686073} + {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} = {CFDCEE0E-FA45-4F72-9FCC-0B88F5A75160} + {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} = {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} + {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} + {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} = {C60D0E7A-ED05-4C67-9EE7-3A6C0D7801C8} + {636FAD5F-02D1-4E9A-BE67-FB8EA99B9A18} = {636FAD5F-02D1-4E9A-BE67-FB8EA99B9A18} + {9AC65CBE-7854-4A86-AA10-D73FF9E5D61F} = {9AC65CBE-7854-4A86-AA10-D73FF9E5D61F} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wxBase28", "..\Externals\wxWidgets\build\msw\wx_base.vcproj", "{48AD7E0A-25B1-4974-A1E3-03F8C438D34F}" @@ -63,11 +62,11 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wxCore28", "..\Externals\wx EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DebuggerWX", "Core\DebuggerWX\DebuggerWX.vcproj", "{4D3CD4C5-412B-4B49-9B1B-A68A2A129C77}" ProjectSection(ProjectDependencies) = postProject - {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} = {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} - {0318BA30-EF48-441A-9E10-DC85EFAE39F0} = {0318BA30-EF48-441A-9E10-DC85EFAE39F0} - {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} - {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} = {C573CAF7-EE6A-458E-8049-16C0BF34C2E9} + {F0B874CB-4476-4199-9315-8343D05AE684} = {F0B874CB-4476-4199-9315-8343D05AE684} + {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} = {29C2ABC1-ADA5-42CD-A5FC-96022D52A510} + {0318BA30-EF48-441A-9E10-DC85EFAE39F0} = {0318BA30-EF48-441A-9E10-DC85EFAE39F0} + {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} = {48AD7E0A-25B1-4974-A1E3-03F8C438D34F} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Plugin_DSP_NULL", "Plugins\Plugin_DSP_NULL\Plugin_DSP_NULL.vcproj", "{9AC65CBE-7854-4A86-AA10-D73FF9E5D61F}" diff --git a/Source/Dolphin.suo b/Source/Dolphin.suo deleted file mode 100644 index 0dbc6be5bf..0000000000 Binary files a/Source/Dolphin.suo and /dev/null differ diff --git a/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.cpp index a94ecc600c..7c8d2dbd65 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.cpp @@ -18,6 +18,9 @@ BPMemory bpmem; bool textureChanged[8]; #define BPMEM_GENMODE 0x00 +#define BPMEM_IND_MTX 0x06 +#define BPMEM_RAS1_SS0 0x25 // ind tex coord scale 0 +#define BPMEM_RAS1_SS1 0x26 // ind tex coord scale 1 #define BPMEM_ZMODE 0x40 #define BPMEM_BLENDMODE 0x41 #define BPMEM_CONSTANTALPHA 0x42 @@ -138,6 +141,29 @@ void BPWritten(int addr, int changes, int newval) } } break; + case BPMEM_IND_MTX+0: + case BPMEM_IND_MTX+1: + case BPMEM_IND_MTX+2: + case BPMEM_IND_MTX+3: + case BPMEM_IND_MTX+4: + case BPMEM_IND_MTX+5: + case BPMEM_IND_MTX+6: + case BPMEM_IND_MTX+7: + case BPMEM_IND_MTX+8: + if (changes) { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; + // PixelShaderMngr::SetIndMatrixChanged((addr-BPMEM_IND_MTX)/3); + } + break; + case BPMEM_RAS1_SS0: + case BPMEM_RAS1_SS1: + if (changes) { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; + // PixelShaderMngr::SetIndTexScaleChanged(); + } + break; case BPMEM_ZMODE: if (changes) @@ -195,63 +221,91 @@ void BPWritten(int addr, int changes, int newval) break; case BPMEM_LINEPTWIDTH: -// glPointSize(1); //bpmem.lineptwidth.pointsize); -// glLineWidth(1); //bpmem.lineptwidth.linesize); + // We can't change line width in D3D. However, we can change point size. TODO + //bpmem.lineptwidth.pointsize); + //bpmem.lineptwidth.linesize); break; - + + case 0x43: + if (changes) { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; + } + break; + case BPMEM_BLENDMODE: if (changes & 0xFFFF) { CVertexHandler::Flush(); ((u32*)&bpmem)[addr] = newval; if (changes & 1) dev->SetRenderState(D3DRS_ALPHABLENDENABLE,bpmem.blendmode.blendenable); - if (changes & 4) dev->SetRenderState(D3DRS_DITHERENABLE,bpmem.blendmode.dither); + if (changes & 2) ; // Logic op blending. D3D can't do this but can fake some modes. + if (changes & 4) { + // Dithering is pointless. Will make things uglier and will be different from GC. + // dev->SetRenderState(D3DRS_DITHERENABLE,bpmem.blendmode.dither); + } D3DBLEND src = d3dSrcFactors[bpmem.blendmode.srcfactor]; D3DBLEND dst = d3dDestFactors[bpmem.blendmode.dstfactor]; - if (changes & 0x700) dev->SetRenderState(D3DRS_SRCBLEND, src); - if (changes & 0xE0) dev->SetRenderState(D3DRS_DESTBLEND, dst); - if (changes & 0x800) - dev->SetRenderState(D3DRS_BLENDOP,bpmem.blendmode.subtract?D3DBLENDOP_SUBTRACT:D3DBLENDOP_ADD); + if (changes & 0x700) { + dev->SetRenderState(D3DRS_SRCBLEND, src); + } + if (changes & 0xE0) { + if (!bpmem.blendmode.subtract) + dev->SetRenderState(D3DRS_DESTBLEND, dst); + else + dev->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE); + } + if (changes & 0x800) { + if (bpmem.blendmode.subtract) { + dev->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); + dev->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE); + } else { + dev->SetRenderState(D3DRS_SRCBLEND, src); + dev->SetRenderState(D3DRS_DESTBLEND, dst); + } + dev->SetRenderState(D3DRS_BLENDOP,bpmem.blendmode.subtract?D3DBLENDOP_SUBTRACT:D3DBLENDOP_ADD); + } //if (bpmem.blendmode.logicopenable) // && bpmem.blendmode.logicmode == 4) // MessageBox(0,"LOGIC",0,0); if (changes & 0x18) { + // Color Mask DWORD write = 0; if (bpmem.blendmode.alphaupdate) write = D3DCOLORWRITEENABLE_ALPHA; if (bpmem.blendmode.colorupdate) write |= D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE; - dev->SetRenderState(D3DRS_COLORWRITEENABLE,write); + dev->SetRenderState(D3DRS_COLORWRITEENABLE, write); } } break; case BPMEM_FOGPARAM0: { - u32 fogATemp = bpmem.fog.a<<12; - float fogA = *(float*)(&fogATemp); + // u32 fogATemp = bpmem.fog.a<<12; + // float fogA = *(float*)(&fogATemp); + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; } break; case BPMEM_FOGBEXPONENT: - { - - } - break; - case BPMEM_FOGBMAGNITUDE: { - + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; } break; case BPMEM_FOGPARAM3: //fog settings { - u32 fogCTemp = bpmem.fog.c_proj_fsel.cShifted12 << 12; - float fogC = *(float*)(&fogCTemp); + /// u32 fogCTemp = bpmem.fog.c_proj_fsel.cShifted12 << 12; + // float fogC = *(float*)(&fogCTemp); + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; } break; @@ -272,11 +326,6 @@ void BPWritten(int addr, int changes, int newval) { CVertexHandler::Flush(); ((u32*)&bpmem)[addr] = newval; - int x=bpmem.scissorOffset.x*2-342; - int y=bpmem.scissorOffset.y*2-342; - char temp[256]; - sprintf(temp,"ScissorOffset: %i %i",x,y); - g_VideoInitialize.pLog(temp, FALSE); } break; @@ -304,9 +353,41 @@ void BPWritten(int addr, int changes, int newval) } break; case BPMEM_ZTEX1: + if (changes) { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; + //PRIM_LOG("ztex bias=0x%x\n", bpmem.ztex1.bias); + //PixelShaderMngr::SetZTextureBias(bpmem.ztex1.bias); + } break; case BPMEM_ZTEX2: + if (changes) { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; +#ifdef _DEBUG + const char* pzop[] = {"DISABLE", "ADD", "REPLACE", "?"}; + const char* pztype[] = {"Z8", "Z16", "Z24", "?"}; + PRIM_LOG("ztex op=%s, type=%s\n", pzop[bpmem.ztex2.op], pztype[bpmem.ztex2.type]); +#endif + } break; + + case 0xf6: // ksel0 + case 0xf7: // ksel1 + case 0xf8: // ksel2 + case 0xf9: // ksel3 + case 0xfa: // ksel4 + case 0xfb: // ksel5 + case 0xfc: // ksel6 + case 0xfd: // ksel7 + if (changes) + { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; + // PixelShaderMngr::SetTevKSelChanged(addr-0xf6); + } + break; + default: switch(addr & 0xF8) //texture sampler filter { @@ -412,6 +493,15 @@ void BPWritten(int addr, int changes, int newval) //dev->SetRenderState(D3DRS_WRAP0+stage, D3DWRAPCOORD_0); } break; + case 0xC0: + case 0xD0: + if (changes) + { + CVertexHandler::Flush(); + ((u32*)&bpmem)[addr] = newval; + // PixelShaderMngr::SetTevCombinerChanged((addr&0x1f)/2); + } + break; case 0xE0: if (addr<0xe8) @@ -446,8 +536,6 @@ void BPWritten(int addr, int changes, int newval) } break; case 0x20: - case 0xC0: - case 0xD0: case 0x80: case 0x90: case 0xA0: @@ -510,14 +598,8 @@ void LoadBPReg(u32 value0) DebugLog("SetPEToken + INT 0x%04x", (value0 & 0xFFFF)); break; - case 0x67: - { -// char test[256]; -// sprintf(test, "Setgpmetric: 0x%08x", value0); -// MessageBox(0, test, "Setgpmetric", 0); - } - //Setgpmetric - break; + case 0x67: // set gp metric? + break; case 0x52: { @@ -551,6 +633,7 @@ void LoadBPReg(u32 value0) if (PE_copy.copy_to_xfb == 0) // bpmem.triggerEFBCopy & EFBCOPY_EFBTOTEXTURE) { // EFB to texture + // for some reason it sets bpmem.zcontrol.pixel_format to PIXELFMT_Z24 every time a zbuffer format is given as a dest to GXSetTexCopyDst TextureCache::CopyEFBToRenderTarget(bpmem.copyTexDest<<5, &rc); } else diff --git a/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.h b/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.h index 223e6973f0..68c28be18e 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/BPStructs.h @@ -36,6 +36,17 @@ enum Compare COMPARE_ALWAYS }; +#define TEVCMP_R8_GT 8 +#define TEVCMP_R8_EQ 9 +#define TEVCMP_GR16_GT 10 +#define TEVCMP_GR16_EQ 11 +#define TEVCMP_BGR24_GT 12 +#define TEVCMP_BGR24_EQ 13 +#define TEVCMP_RGB8_GT 14 +#define TEVCMP_RGB8_EQ 15 +#define TEVCMP_A8_GT 14 +#define TEVCMP_A8_EQ 15 + //color chan above: // rasterized color selections #define RAS1_CC_0 0x00000000 /* color channel 0 */ @@ -49,6 +60,63 @@ enum Compare #define TEV_Z_TYPE_U16 0x00000001 #define TEV_Z_TYPE_U24 0x00000002 +#define ZTEXTURE_DISABLE 0 +#define ZTEXTURE_ADD 1 +#define ZTEXTURE_REPLACE 2 + + +union IND_MTXA +{ + struct + { + signed ma : 11; + signed mb : 11; + unsigned s0 : 2; // bits 0-1 of scale factor + unsigned rid : 8; + }; + u32 hex; +}; + +union IND_MTXB +{ + struct + { + signed mc : 11; + signed md : 11; + unsigned s1 : 2; // bits 2-3 of scale factor + unsigned rid : 8; + }; + u32 hex; +}; + +union IND_MTXC +{ + struct + { + signed me : 11; + signed mf : 11; + unsigned s2 : 2; // bits 4-5 of scale factor + unsigned rid : 8; + }; + u32 hex; +}; + +struct IND_MTX +{ + IND_MTXA col0; + IND_MTXB col1; + IND_MTXC col2; +}; + +union IND_IMASK +{ + struct + { + unsigned mask : 24; + unsigned rid : 8; + }; + u32 hex; +}; struct TevStageCombiner { @@ -65,7 +133,7 @@ struct TevStageCombiner unsigned op : 1; unsigned clamp : 1; - unsigned scale : 2; + unsigned shift : 2; unsigned outreg : 2; //1,2,3 }; @@ -86,7 +154,7 @@ struct TevStageCombiner unsigned op : 1; unsigned clamp : 1; - unsigned scale : 2; + unsigned shift : 2; unsigned outreg : 2; //1,2,3 }; u32 hex; @@ -96,6 +164,68 @@ struct TevStageCombiner AlphaCombiner alphaC; }; +#define ITF_8 0 +#define ITF_5 1 +#define ITF_4 2 +#define ITF_3 3 + +#define ITB_NONE 0 +#define ITB_S 1 +#define ITB_T 2 +#define ITB_ST 3 +#define ITB_U 4 +#define ITB_SU 5 +#define ITB_TU 6 +#define ITB_STU 7 + +#define ITBA_OFF 0 +#define ITBA_S 1 +#define ITBA_T 2 +#define ITBA_U 3 + +#define ITW_OFF 0 +#define ITW_256 1 +#define ITW_128 2 +#define ITW_64 3 +#define ITW_32 4 +#define ITW_16 5 +#define ITW_0 6 + +// several discoveries: +// GXSetTevIndBumpST(tevstage, indstage, matrixind) +// if( matrix == 2 ) realmat = 6; // 10 +// else if( matrix == 3 ) realmat = 7; // 11 +// else if( matrix == 1 ) realmat = 5; // 9 +// GXSetTevIndirect(tevstage, indstage, 0, 3, realmat, 6, 6, 0, 0, 0) +// GXSetTevIndirect(tevstage+1, indstage, 0, 3, realmat+4, 6, 6, 1, 0, 0) +// GXSetTevIndirect(tevstage+2, indstage, 0, 0, 0, 0, 0, 1, 0, 0) + +union TevStageIndirect +{ + // if mid, sw, tw, and addprev are 0, then no indirect stage is used, mask = 0x17fe00 + struct + { + unsigned bt : 2; // indirect tex stage ID + unsigned fmt : 2; // format: ITF_X + unsigned bias : 3; // ITB_X + unsigned bs : 2; // ITBA_X, indicates which coordinate will become the 'bump alpha' + unsigned mid : 4; // matrix id to multiply offsets with + unsigned sw : 3; // ITW_X, wrapping factor for S of regular coord + unsigned tw : 3; // ITW_X, wrapping factor for T of regular coord + unsigned lb_utclod : 1; // use modified or unmodified texture coordinates for LOD computation + unsigned fb_addprev : 1; // 1 if the texture coordinate results from the previous TEV stage should be added + unsigned pad0 : 3; + unsigned rid : 8; + }; + struct + { + u32 hex : 21; + u32 unused : 11; + }; + + bool IsActive() { return (hex&0x17fe00)!=0; } +}; + union TwoTevStageOrders { struct @@ -121,7 +251,42 @@ union TwoTevStageOrders int getColorChan(int i){return i?colorchan1:colorchan0;} }; +union TEXSCALE +{ + struct + { + unsigned ss0 : 4; // indirect tex stage 0, 2^(-ss0) + unsigned ts0 : 4; // indirect tex stage 0 + unsigned ss1 : 4; // indirect tex stage 1 + unsigned ts1 : 4; // indirect tex stage 1 + unsigned pad : 8; + unsigned rid : 8; + }; + u32 hex; + float getScaleS(int i){return 1.0f/(float)(1<<(i?ss1:ss0));} + float getScaleT(int i){return 1.0f/(float)(1<<(i?ts1:ts0));} +}; + +union RAS1_IREF +{ + struct + { + unsigned bi0 : 3; // indirect tex stage 0 ntexmap + unsigned bc0 : 3; // indirect tex stage 0 ntexcoord + unsigned bi1 : 3; + unsigned bc1 : 3; + unsigned bi2 : 3; + unsigned bc3 : 3; + unsigned bi4 : 3; + unsigned bc4 : 3; + unsigned rid : 8; + }; + u32 hex; + + u32 getTexCoord(int i) { return (hex>>(6*i+3))&3; } + u32 getTexMap(int i) { return (hex>>(6*i))&3; } +}; ////////////////////////////////////////////////////////////////////////// // Texture structs @@ -328,17 +493,27 @@ union FogParam0 }; union FogParam3 { - struct - { - unsigned cShifted12 : 20; - unsigned proj : 1; - unsigned fsel : 3; - }; - u32 hex; + struct + { + unsigned c_mant : 11; + unsigned c_exp : 8; + unsigned c_sign : 1; + unsigned proj : 1; // 0 - perspective, 1 - orthographic + unsigned fsel : 3; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 - backward exp, 7 - backward exp2 + }; + + // amount to subtract from eyespacez after range adjustment + float GetC() { + union { u32 i; float f; } dummy; + dummy.i = ((u32)c_sign << 31) | ((u32)c_exp << 23) | ((u32)c_mant << 12); + return dummy.f; + } + + u32 hex; }; struct FogParams { - u32 a; + FogParam0 a; u32 b_magnitude; u32 b_exponent; FogParam3 c_proj_fsel; @@ -366,6 +541,29 @@ union ConstantAlpha u32 hex; }; +#define PIXELFMT_RGB8_Z24 0 +#define PIXELFMT_RGBA6_Z24 1 +#define PIXELFMT_RGB565_Z16 2 +#define PIXELFMT_Z24 3 +#define PIXELFMT_Y8 4 +#define PIXELFMT_U8 5 +#define PIXELFMT_V8 6 +#define PIXELFMT_YUV420 7 + +union PE_CONTROL +{ + struct + { + unsigned pixel_format : 3; // PIXELFMT_X + unsigned zformat : 3; // 0 - linear, 1 - near, 2 - mid, 3 - far + unsigned zcomploc : 1; // 1: before tex stage + unsigned unused : 17; + unsigned rid : 8; + }; + u32 hex; +}; + + ////////////////////////////////////////////////////////////////////////// // Texture coordinate stuff ////////////////////////////////////////////////////////////////////////// @@ -385,11 +583,6 @@ struct TCoordInfo TCInfo t; }; - -////////////////////////////////////////////////////////////////////////// -// All of BP memory -////////////////////////////////////////////////////////////////////////// - union ColReg { u32 hex; @@ -460,31 +653,48 @@ union UPE_Copy #define EFBCOPY_CLEAR 0x800 #define EFBCOPY_GENERATEMIPS 0x200 + +////////////////////////////////////////////////////////////////////////// +// All of BP memory +////////////////////////////////////////////////////////////////////////// + struct BPMemory { GenMode genMode; - u32 unknown[15]; //0f = flushtexturestate - u32 tevind[16]; + u32 display_copy_filter[4]; //01-04 + u32 unknown; //05 + // indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::mid) + // abc form a 2x3 offset matrix, there's 3 such matrices + // the 3 offset matrices can either be indirect type, S-type, or T-type + // 6bit scale factor s is distributed across IND_MTXA/B/C. + // before using matrices scale by 2^-(s-17) + IND_MTX indmtx[3];//06-0e GXSetIndTexMtx, 2x3 matrices + IND_IMASK imask;//0f + TevStageIndirect tevind[16];//10 GXSetTevIndirect X12Y12 scissorTL; //20 X12Y12 scissorBR; //21 LPSize lineptwidth; //22 line and point width - u32 unknown1[2]; //23-24 - u32 unknown2[3]; //25-27 + u32 sucounter; //23 + u32 rascounter; //24 + TEXSCALE texscale[2]; //25-26 GXSetIndTexCoordScale + RAS1_IREF tevindref; //27 GXSetIndTexOrder TwoTevStageOrders tevorders[8]; //28-2F TCoordInfo texcoords[8]; //0x30 s,t,s,t,s,t,s,t... ZMode zmode; //40 BlendMode blendmode; //41 ConstantAlpha dstalpha; //42 - u32 unknown4; //43 // GXSetZCompLoc, GXPixModeSync + PE_CONTROL zcontrol; //43 GXSetZCompLoc, GXPixModeSync u32 fieldmask; //44 - u32 drawdone; //45 - u32 unknown5; //46 - u32 drawsync1; //47 - u32 drawsync2; //48 + u32 drawdone; //45, bit1=1 if end of list + u32 unknown5; //46 clock? + u32 petoken; //47 + u32 petokenint; //48 X10Y10 copyTexSrcXY; //49 X10Y10 copyTexSrcWH; //4a u32 copyTexDest; //4b// 4b == CopyAddress (GXDispCopy and GXTexCopy use it) - u32 unknown6[2]; //4c, 4d + u32 unknown6; //4c, 4d + u32 copyMipMapStrideChannels; // 4d usually set to 4 when dest is single channel, 8 when dest is 2 channel, 16 when dest is RGBA + // also, doubles whenever mipmap box filter option is set (excent on RGBA). Probably to do with number of bytes to look at when smoothing u32 dispcopyyscale; //4e u32 clearcolorAR; //4f u32 clearcolorGB; //50 @@ -500,13 +710,16 @@ struct BPMemory u32 tlutXferDest; //65 u32 texinvalidate;//66 u32 unknown9; //67 - u32 unknown10[8];//68-6F + u32 fieldmode; //68 + u32 unknown10[7];//69-6F u32 unknown11[16];//70-7F FourTexUnits tex[2]; //80-bf TevStageCombiner combiners[16]; //0xC0-0xDF TevReg tevregs[4]; //0xE0 u32 fogRangeAdj; //0xE8 - u32 unknown15[5]; //0xe9,0xea,0xeb,0xec,0xed + u32 unknown15[3]; //0xe9,0xea,0xeb,0xec,0xed + u32 tev_range_adj_c; //0xec - screenx center for range adjustment, range adjustment enable + u32 tev_range_adj_k; //0xed - specifies range adjustment function = sqrt(x*x+k*k)/k FogParams fog; //0xEE,0xEF,0xF0,0xF1,0xF2 AlphaFunc alphaFunc; //0xF3 ZTex1 ztex1; //0xf4,0xf5 diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.cpp b/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.cpp index e71c4d5d2f..ce7056b837 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DShader.cpp @@ -1,4 +1,5 @@ #include +#include #include "Globals.h" #include "D3DShader.h" @@ -9,7 +10,7 @@ namespace D3D LPDIRECT3DVERTEXSHADER9 CompileVShader(const char *code, int len) { //try to compile - LPD3DXBUFFER shaderBuffer=0,errorBuffer=0; + LPD3DXBUFFER shaderBuffer = 0, errorBuffer = 0; LPDIRECT3DVERTEXSHADER9 vShader = 0; HRESULT hr = D3DXCompileShader(code,len,0,0,"main","vs_1_1",0,&shaderBuffer,&errorBuffer,0); if (FAILED(hr)) @@ -20,10 +21,13 @@ namespace D3D if (FAILED(hr)) { - //compilation error, damnit + //compilation error + std::string hello = (char*)errorBuffer->GetBufferPointer(); + hello += "\n\n"; + hello += code; if (g_Config.bShowShaderErrors) - MessageBox(0,(char*)errorBuffer->GetBufferPointer(),"VS compilation error",MB_ICONERROR); - vShader=0; + MessageBox(0, hello.c_str(), "Error compiling vertex shader", MB_ICONERROR); + vShader = 0; } else if (SUCCEEDED(hr)) { @@ -72,7 +76,7 @@ namespace D3D LPDIRECT3DPIXELSHADER9 CompilePShader(const char *code, int len) { - LPD3DXBUFFER shaderBuffer=0,errorBuffer=0; + LPD3DXBUFFER shaderBuffer = 0, errorBuffer = 0; LPDIRECT3DPIXELSHADER9 pShader = 0; static char *versions[6] = {"ERROR","ps_1_1","ps_1_4","ps_2_0","ps_3_0","ps_4_0"}; HRESULT hr = D3DXCompileShader(code,len,0,0, @@ -81,10 +85,12 @@ namespace D3D if (FAILED(hr)) { - // We should not be getting these - MessageBox(0,code,(char*)errorBuffer->GetBufferPointer(),MB_ICONERROR); - - pShader=0; + std::string hello = (char*)errorBuffer->GetBufferPointer(); + hello += "\n\n"; + hello += code; + if (g_Config.bShowShaderErrors) + MessageBox(0, hello.c_str(), "Error compiling pixel shader", MB_ICONERROR); + pShader = 0; } else { diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp index 29cd924f8a..9eb66329a9 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp @@ -183,22 +183,26 @@ const char *tevAInputTable[] = "rastemp.a", //RASA, "konsttemp.a", //KONST, (hw1 had quarter) "0.0", //ZERO - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", - "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", +}; +const char *tevAInputTable2[] = +{ + "prev", //APREV, + "c0", //A0, + "c1", //A1, + "c2", //A2, + "textemp", //TEXA, + "rastemp", //RASA, + "konsttemp", //KONST, (hw1 had quarter) + "0.0", //ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", }; const char *tevRasTable[] = @@ -208,48 +212,49 @@ const char *tevRasTable[] = "ERROR", //2 "ERROR", //3 "ERROR", //4 - "float4(0,1,0,1)", //RAS1_CC_B 0x00000005 /* indirect texture bump alpha */ //green cuz unsupported - "float4(0,1,0,1)", //RAS1_CC_BN 0x00000006 /* ind tex bump alpha, normalized 0-255 *///green cuz unsupported + "alphabump", //RAS1_CC_B 0x00000005 /* indirect texture bump alpha */ //green cuz unsupported + "(alphabump*(255.0f/248.0f))", //RAS1_CC_BN 0x00000006 /* ind tex bump alpha, normalized 0-255 *///green cuz unsupported "float4(0,0,0,0)", //RAS1_CC_Z 0x00000007 /* set color value to zero */ }; -const char *tevCOutputTable[] = -{ - "prev.rgb", - "c0.rgb", - "c1.rgb", - "c2.rgb", -}; -const char *tevAOutputTable[] = -{ - "prev.a", - "c0.a", - "c1.a", - "c2.a", -}; +const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; +const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; +const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; +const char *tevIndAlphaScale[] = {"", "*32","*16","*8"}; +const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias +const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt +const char *tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" }; +const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" }; -const char *texFuncs[] = +const char *tevTexFuncs[] = { "tex2D", "tex2Dproj" }; +const char *alphaRef[2] = +{ + "alphaRef.x", + "alphaRef.y" +}; + + //I hope we don't get too many hash collisions :p //all these magic numbers are primes, it should help a bit tevhash GetCurrentTEV() { u32 hash = bpmem.genMode.numindstages + bpmem.genMode.numtevstages*11 + bpmem.genMode.numtexgens*8*17; - for (int i=0; i<(int)bpmem.genMode.numtevstages+1; i++) + for (int i = 0; i < (int)bpmem.genMode.numtevstages+1; i++) { hash = _rotl(hash,3) ^ (bpmem.combiners[i].colorC.hex*13); hash = _rotl(hash,7) ^ ((bpmem.combiners[i].alphaC.hex&0xFFFFFFFC)*3); hash = _rotl(hash,9) ^ texcoords[i].texmtxinfo.projection*451; } - for (int i=0; i<(int)bpmem.genMode.numtevstages/2+1; i++) + for (int i = 0; i < (int)bpmem.genMode.numtevstages/2+1; i++) { hash = _rotl(hash,13) ^ (bpmem.tevorders[i].hex*7); } - for (int i=0; i<8; i++) + for (int i = 0; i < 8; i++) { hash = _rotl(hash,3) ^ bpmem.tevksel[i].swap1; hash = _rotl(hash,3) ^ bpmem.tevksel[i].swap2; @@ -275,7 +280,7 @@ char swapModeTable[4][5]; void BuildSwapModeTable() { //bpmem.tevregs[0]. - for (int i=0; i<4; i++) + for (int i = 0; i < 4; i++) { swapModeTable[i][0]=swapColors[bpmem.tevksel[i*2].swap1]; swapModeTable[i][1]=swapColors[bpmem.tevksel[i*2].swap2]; @@ -302,10 +307,10 @@ LPDIRECT3DPIXELSHADER9 GeneratePixelShader() bpmem.genMode.numtevstages,bpmem.genMode.numtexgens,bpmem.genMode.numindstages,bpmem.genMode.numcolchans); //write kcolor declarations - for (int i=0; i<4; i++) + for (int i = 0; i < 4; i++) WRITE(p,"float4 k%i : register(c%i);\n",i,PS_CONST_KCOLORS+i); - for (int i=0; i<3; i++) + for (int i = 0; i < 3; i++) WRITE(p,"float4 color%i : register(c%i);\n",i,PS_CONST_COLORS+i+1); WRITE(p,"float constalpha : register(c%i);\n",PS_CONST_CONSTALPHA); @@ -330,7 +335,7 @@ LPDIRECT3DPIXELSHADER9 GeneratePixelShader() //WRITE(p, "return 1;}\n"); //return D3D::CompilePShader(text,(int)(p-text)); - for (int i=0; i %s) ? %s : 0)\n", - tevAInputTable[ac.d],tevAInputTable[ac.a], - tevAInputTable[ac.b],tevAInputTable[ac.c]); - else - WRITE(p," %s + (abs(%s - %s)<%f ? %s : 0)\n", - tevAInputTable[ac.d],tevAInputTable[ac.a], - tevAInputTable[ac.b],epsilon,tevAInputTable[ac.c]); - + switch(cmp) { + case TEVCMP_R8_GT: + case TEVCMP_A8_GT: + WRITE(p," %s + ((%s.%s > %s.%s) ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]); + break; + case TEVCMP_R8_EQ: + case TEVCMP_A8_EQ: + WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon,tevAInputTable[ac.c]); + break; + + case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) + case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r + WRITE(p," %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]); + break; + case TEVCMP_GR16_EQ: + case TEVCMP_BGR24_EQ: + WRITE(p," %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon,tevAInputTable[ac.c]); + break; + default: + WRITE(p,"0)\n"); + break; + } } - WRITE(p,");"); - //end of alpha - WRITE(p,"\n"); + WRITE(p, ");"); + if (ac.clamp) + WRITE(p, "%s = clamp(%s, 0.0f, 1.0f);\n", tevAOutputTable[ac.outreg], tevAOutputTable[ac.outreg]); + WRITE(p, "\n"); } -char *alphaRef[2] = -{ - "alphaRef.x", - "alphaRef.y" -}; - void WriteAlphaCompare(char *&p, int num, int comp) { WRITE(p," res%i = ",num); @@ -478,7 +496,7 @@ void WriteAlphaTest(char *&p) //first kill all the simple cases if (op == ALPHAOP_AND && (comp[0] == COMPARE_ALWAYS && comp[1] == COMPARE_ALWAYS)) return; if (op == ALPHAOP_OR && (comp[0] == COMPARE_ALWAYS || comp[1] == COMPARE_ALWAYS)) return; - for (int i=0; i<2; i++) + for (int i = 0; i < 2; i++) { int one = i; int other = 1-i; @@ -495,11 +513,11 @@ void WriteAlphaTest(char *&p) //Ok, didn't get to do the easy way out :P // do the general way - WRITE(p,"float res0,res1;\n"); - WriteAlphaCompare(p,0,bpmem.alphaFunc.comp0); - WriteAlphaCompare(p,1,bpmem.alphaFunc.comp1); - WRITE(p,"res0=max(res0,0);\n"); - WRITE(p,"res1=max(res1,0);\n"); + WRITE(p,"float res0, res1;\n"); + WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0); + WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1); + WRITE(p,"res0 = max(res0, 0);\n"); + WRITE(p,"res1 = max(res1, 0);\n"); //probably should use lookup textures for some of these :P switch(bpmem.alphaFunc.logic) { case ALPHAOP_AND: // if both are 0 diff --git a/Source/Plugins/Plugin_VideoDX9/Src/ShaderManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/ShaderManager.cpp index 7f4eec81d0..fdc034eb20 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/ShaderManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/ShaderManager.cpp @@ -37,7 +37,7 @@ void PShaderCache::SetShader() if (iter != pshaders.end()) { - iter->second.frameCount=frameCount; + iter->second.frameCount = frameCount; PSCacheEntry &entry = iter->second; if (!lastShader || entry.shader != lastShader) { @@ -54,7 +54,7 @@ void PShaderCache::SetShader() //Make an entry in the table PSCacheEntry newentry; newentry.shader = shader; - newentry.frameCount=frameCount; + newentry.frameCount = frameCount; pshaders[currentHash] = newentry; } @@ -66,13 +66,13 @@ void PShaderCache::SetShader() void PShaderCache::Cleanup() { - PSCache::iterator iter; - iter = pshaders.begin(); + PSCache::iterator iter; + iter = pshaders.begin(); - while(iter!=pshaders.end()) + while(iter != pshaders.end()) { PSCacheEntry &entry = iter->second; - if (entry.frameCountSetPosNrmIdx(index); } -#define MAKETEX(n) \ -void LOADERDECL TexMtx_ReadDirect_UByte##n(void* _p) \ -{ \ - TVtxAttr* pVtxAttr = (TVtxAttr*)_p; \ - int index = ReadBuffer8(); \ - varray->SetTcIdx(n, index); \ +int s_texmtxread = 0, s_texmtxwrite = 0; +void LOADERDECL TexMtx_ReadDirect_UByte(void* _p) +{ + TVtxAttr* pVtxAttr = (TVtxAttr*)_p; + int index = ReadBuffer8() & 0x3f; + varray->SetTcIdx(s_texmtxread++, index); } - -MAKETEX(0) -MAKETEX(1) -MAKETEX(2) -MAKETEX(3) -MAKETEX(4) -MAKETEX(5) -MAKETEX(6) -MAKETEX(7) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.h b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.h index 2341127338..ffcb48af91 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.h @@ -630,10 +630,6 @@ struct TCoordInfo }; -////////////////////////////////////////////////////////////////////////// -// All of BP memory -////////////////////////////////////////////////////////////////////////// - union ColReg { u32 hex; @@ -701,6 +697,10 @@ union UPE_Copy }; }; +////////////////////////////////////////////////////////////////////////// +// All of BP memory +////////////////////////////////////////////////////////////////////////// + struct BPMemory { GenMode genMode; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp index 79f3860fab..e1d5b990f0 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp @@ -15,1362 +15,1362 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#include "Globals.h" -#include -#include - -#include "VertexShader.h" -#include "PixelShader.h" - -#define I_COLORS "color" -#define I_KCOLORS "k" -#define I_ALPHA "alphaRef" -#define I_TEXDIMS "texdim" -#define I_ZBIAS "czbias" -#define I_INDTEXSCALE "cindscale" -#define I_INDTEXMTX "cindmtx" - -#define C_COLORS 0 -#define C_KCOLORS (C_COLORS+4) -#define C_ALPHA (C_KCOLORS+4) -#define C_TEXDIMS (C_ALPHA+1) -#define C_ZBIAS (C_TEXDIMS+8) -#define C_INDTEXSCALE (C_ZBIAS+2) -#define C_INDTEXMTX (C_INDTEXSCALE+2) -#define C_ENVCONST_END (C_INDTEXMTX+6) - -#define C_COLORMATRIX (C_INDTEXMTX+6) - - -void WriteStage(char *&p, int n); -void WrapNonPow2Tex(char* &p, const char* var, int texmap); -void WriteAlphaCompare(char *&p, int num, int comp); -bool WriteAlphaTest(char *&p); - -PixelShaderMngr::PSCache PixelShaderMngr::pshaders; -FRAGMENTSHADER* PixelShaderMngr::pShaderLast = NULL; -PixelShaderMngr::PIXELSHADERUID PixelShaderMngr::s_curuid; - -static int s_nMaxPixelInstructions; -static int s_nColorsChanged[2]; // 0 - regular colors, 1 - k colors -static int s_nTexDimsChanged[2], s_nIndTexMtxChanged = 0; //min, max -static bool s_bAlphaChanged, s_bZBiasChanged, s_bIndTexScaleChanged; -static float lastRGBAfull[2][4][4] = {0}; -static u32 lastAlpha = 0; -static u32 lastTexDims[8]={0}; -static u32 lastZBias = 0; - -// lower byte describes if a texture is nonpow2 or pow2 -// next byte describes whether the repeat wrap mode is enabled for the s channel -// next byte is for t channel -static u32 s_texturemask = 0; - -static int maptocoord[8]; // indexed by texture map, holds the texcoord associated with the map -static u32 maptocoord_mask=0; - -static GLuint s_ColorMatrixProgram=0; - -void PixelShaderMngr::Init() -{ - s_nColorsChanged[0] = s_nColorsChanged[1] = 0; - s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = -1; - s_nIndTexMtxChanged = 15; - s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; - GL_REPORT_ERRORD(); - for(int i = 0; i < 8; ++i) maptocoord[i] = -1; - maptocoord_mask = 0; - memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); - - glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, &s_nMaxPixelInstructions); - - int maxinst, maxattribs; - glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, &maxinst); - glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, &maxattribs); - ERROR_LOG("pixel max_alu=%d, max_inst=%d, max_attrib=%d\n", s_nMaxPixelInstructions, maxinst, maxattribs); - - char pmatrixprog[1024]; - sprintf(pmatrixprog, "!!ARBfp1.0" - "TEMP R0;\n" - "TEMP R1;\n" - "TEX R0, fragment.texcoord[0], texture[0], RECT;\n" - "DP4 R1.w, R0, program.env[%d];\n" - "DP4 R1.z, R0, program.env[%d];\n" - "DP4 R1.x, R0, program.env[%d];\n" - "DP4 R1.y, R0, program.env[%d];\n" - "ADD result.color, R1, program.env[%d];\n" - "END\n", C_COLORMATRIX+3, C_COLORMATRIX+2, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+4); - glGenProgramsARB( 1, &s_ColorMatrixProgram ); - glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, s_ColorMatrixProgram ); - - glProgramStringARB( GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); - - GLenum err=GL_NO_ERROR; - GL_REPORT_ERROR(); - if( err != GL_NO_ERROR ) { - ERROR_LOG("Failed to create color matrix fragment program\n"); - - SAFE_RELEASE_PROG(s_ColorMatrixProgram); - } -} - -void PixelShaderMngr::Shutdown() -{ - SAFE_RELEASE_PROG(s_ColorMatrixProgram); - PSCache::iterator iter = pshaders.begin(); - for (;iter!=pshaders.end();iter++) - iter->second.Destroy(); - pshaders.clear(); -} - -FRAGMENTSHADER* PixelShaderMngr::GetShader() -{ - DVSTARTPROFILE(); - PIXELSHADERUID uid; - GetPixelShaderId(uid); - - PSCache::iterator iter = pshaders.find(uid); - - if (iter != pshaders.end()) { - iter->second.frameCount=frameCount; - PSCacheEntry &entry = iter->second; - if (&entry.shader != pShaderLast) - { - pShaderLast = &entry.shader; - } - return pShaderLast; - } - - PSCacheEntry& newentry = pshaders[uid]; - - if (!GeneratePixelShader(newentry.shader)) { - ERROR_LOG("failed to create pixel shader\n"); - return NULL; - } - - //Make an entry in the table - newentry.frameCount=frameCount; - - pShaderLast = &newentry.shader; - INCSTAT(stats.numPixelShadersCreated); - SETSTAT(stats.numPixelShadersAlive, pshaders.size()); - return pShaderLast; -} - -void PixelShaderMngr::Cleanup() -{ - PSCache::iterator iter = pshaders.begin(); - while(iter != pshaders.end()) { - PSCacheEntry &entry = iter->second; - if (entry.frameCount= 0 ) { - float fdims[4]; - for(int i = s_nTexDimsChanged[0]; i <= s_nTexDimsChanged[1]; ++i) { - if( s_texturemask & (1<= 0 ) { - TCoordInfo& tc = bpmem.texcoords[maptocoord[i]]; - fdims[0] = (float)(lastTexDims[i]&0xffff); - fdims[1] = (float)((lastTexDims[i]>>16)&0xfff); - fdims[2] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[i]&0xffff); - fdims[3] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[i]>>16)&0xfff); - } - else { - fdims[0] = (float)(lastTexDims[i]&0xffff); - fdims[1] = (float)((lastTexDims[i]>>16)&0xfff); - fdims[2] = 1.0f; - fdims[3] = 1.0f; - } - } - else { - if( maptocoord[i] >= 0 ) { - TCoordInfo& tc = bpmem.texcoords[maptocoord[i]]; - fdims[0] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[i]&0xffff); - fdims[1] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[i]>>16)&0xfff); - fdims[2] = 1.0f/(float)(tc.s.scale_minus_1+1); - fdims[3] = 1.0f/(float)(tc.t.scale_minus_1+1); - } - else { - fdims[0] = 1.0f; - fdims[1] = 1.0f; - fdims[2] = 1.0f/(float)(lastTexDims[i]&0xffff); - fdims[3] = 1.0f/(float)((lastTexDims[i]>>16)&0xfff); - } - } - - PRIM_LOG("texdims%d: %f %f %f %f\n", i, fdims[0], fdims[1], fdims[2], fdims[3]); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_TEXDIMS+i, fdims); - } - s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = -1; - } - - if( s_bAlphaChanged ) { - glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); - } - - if( s_bZBiasChanged ) { - u32 bits; - float ffrac = 255.0f/256.0f; - float ftemp[4]; - switch(bpmem.ztex2.type) { - case 0: - bits = 8; - ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; - break; - case 1: - bits = 16; - ftemp[0] = 0; ftemp[1] = ffrac/(256.0f*256.0f); ftemp[2] = ffrac/256.0f; ftemp[3] = ffrac; - break; - case 2: - bits = 24; - ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; - break; - } - //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_ZBIAS, ftemp); - glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_ZBIAS+1, 0, 0, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); - } - - // indirect incoming texture scales, update all! - if( s_bIndTexScaleChanged ) { - float f[8]; - - for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { - int srctexmap = bpmem.tevindref.getTexMap(i); - int texcoord = bpmem.tevindref.getTexCoord(i); - TCoordInfo& tc = bpmem.texcoords[texcoord]; - - f[2*i] = bpmem.texscale[i/2].getScaleS(i&1) * (float)(tc.s.scale_minus_1+1) / (float)(lastTexDims[srctexmap]&0xffff); - f[2*i+1] = bpmem.texscale[i/2].getScaleT(i&1) * (float)(tc.t.scale_minus_1+1) / (float)((lastTexDims[srctexmap]>>16)&0xfff); - - PRIM_LOG("tex indscale%d: %f %f\n", i, f[2*i], f[2*i+1]); - } - - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXSCALE, f); - - if( bpmem.genMode.numindstages > 2 ) - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXSCALE+1, &f[4]); - - s_bIndTexScaleChanged = false; - } - - if( s_nIndTexMtxChanged ) { - for(int i = 0; i < 3; ++i) { - if( s_nIndTexMtxChanged & (1<>16) ) { - lastAlpha = (lastAlpha&~0xff0000)|((alpha.hex&0xff)<<16); - s_bAlphaChanged = true; - } -} - -void PixelShaderMngr::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt) -{ - u32 wh = width|(height<<16)|(wraps<<28)|(wrapt<<30); - if( lastTexDims[texmapid] != wh ) { - lastTexDims[texmapid] = wh; - if( s_nTexDimsChanged[0] == -1 ) { - s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = texmapid; - } - else { - if( s_nTexDimsChanged[0] > texmapid ) s_nTexDimsChanged[0] = texmapid; - else if( s_nTexDimsChanged[1] < texmapid ) s_nTexDimsChanged[1] = texmapid; - } - } -} - -void PixelShaderMngr::SetZTetureBias(u32 bias) -{ - if( lastZBias != bias ) { - s_bZBiasChanged = true; - lastZBias = bias; - } -} - -void PixelShaderMngr::SetIndTexScaleChanged() -{ - s_bIndTexScaleChanged = true; -} - -void PixelShaderMngr::SetIndMatrixChanged(int matrixidx) -{ - s_nIndTexMtxChanged |= 1 << matrixidx; -} - -void PixelShaderMngr::SetGenModeChanged() -{ -} - -void PixelShaderMngr::SetTevCombinerChanged(int id) -{ -} - -void PixelShaderMngr::SetTevKSelChanged(int id) -{ -} - -void PixelShaderMngr::SetTevOrderChanged(int id) -{ -} - -void PixelShaderMngr::SetTevIndirectChanged(int id) -{ -} - -void PixelShaderMngr::SetZTetureOpChanged() -{ - s_bZBiasChanged = true; -} - -void PixelShaderMngr::SetTexturesUsed(u32 nonpow2tex) -{ - if( s_texturemask != nonpow2tex ) { - u32 mask = s_texturemask ^ nonpow2tex; - for(int i = 0; i < 8; ++i) { - if( mask & (0x10101< i ) s_nTexDimsChanged[0] = i; - else if( s_nTexDimsChanged[1] < i ) s_nTexDimsChanged[1] = i; - } - } - s_texturemask = nonpow2tex; - } -} - -void PixelShaderMngr::SetTexDimsChanged(int texmapid) -{ - if( s_nTexDimsChanged[0] > texmapid ) s_nTexDimsChanged[0] = texmapid; - else if( s_nTexDimsChanged[1] < texmapid ) s_nTexDimsChanged[1] = texmapid; - SetIndTexScaleChanged(); -} - -void PixelShaderMngr::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) -{ - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX, pmatrix); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+1, pmatrix+4); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+2, pmatrix+8); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+3, pmatrix+12); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+4, pfConstAdd); -} - -GLuint PixelShaderMngr::GetColorMatrixProgram() -{ - return s_ColorMatrixProgram; -} - -// old tev->pixelshader notes -// -// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 -// konstant for this stage (alpha, color) is given by bpmem.tevksel -// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current chan color -// according to GXTevColorArg table above -// output is given by .outreg -// tevtemp is set according to swapmodetables and -const float epsilon = 1.0f/255.0f; - -const char *tevKSelTableC[] = // KCSEL -{ - "1.0f,1.0f,1.0f", //1 = 0x00 - "0.875,0.875,0.875",//7_8 = 0x01 - "0.75,0.75,0.75", //3_4 = 0x02 - "0.625,0.625,0.625",//5_8 = 0x03 - "0.5,0.5,0.5", //1_2 = 0x04 - "0.375,0.375,0.375",//3_8 = 0x05 - "0.25,0.25,0.25", //1_4 = 0x06 - "0.125,0.125,0.125",//1_8 = 0x07 - "ERROR", //0x08 - "ERROR", //0x09 - "ERROR", //0x0a - "ERROR", //0x0b - I_KCOLORS"[0].rgb",//K0 = 0x0C - I_KCOLORS"[1].rgb",//K1 = 0x0D - I_KCOLORS"[2].rgb",//K2 = 0x0E - I_KCOLORS"[3].rgb",//K3 = 0x0F - I_KCOLORS"[0].rrr",//K0_R = 0x10 - I_KCOLORS"[1].rrr",//K1_R = 0x11 - I_KCOLORS"[2].rrr",//K2_R = 0x12 - I_KCOLORS"[3].rrr",//K3_R = 0x13 - I_KCOLORS"[0].ggg",//K0_G = 0x14 - I_KCOLORS"[1].ggg",//K1_G = 0x15 - I_KCOLORS"[2].ggg",//K2_G = 0x16 - I_KCOLORS"[3].ggg",//K3_G = 0x17 - I_KCOLORS"[0].bbb",//K0_B = 0x18 - I_KCOLORS"[1].bbb",//K1_B = 0x19 - I_KCOLORS"[2].bbb",//K2_B = 0x1A - I_KCOLORS"[3].bbb",//K3_B = 0x1B - I_KCOLORS"[0].aaa",//K0_A = 0x1C - I_KCOLORS"[1].aaa",//K1_A = 0x1D - I_KCOLORS"[2].aaa",//K2_A = 0x1E - I_KCOLORS"[3].aaa",//K3_A = 0x1F -}; - -const char *tevKSelTableA[] = // KASEL -{ - "1.0f", //1 = 0x00 - "0.875f",//7_8 = 0x01 - "0.75f", //3_4 = 0x02 - "0.625f",//5_8 = 0x03 - "0.5f", //1_2 = 0x04 - "0.375f",//3_8 = 0x05 - "0.25f", //1_4 = 0x06 - "0.125f",//1_8 = 0x07 - "ERROR", //0x08 - "ERROR", //0x09 - "ERROR", //0x0a - "ERROR", //0x0b - "ERROR", //0x0c - "ERROR", //0x0d - "ERROR", //0x0e - "ERROR", //0x0f - I_KCOLORS"[0].r",//K0_R = 0x10 - I_KCOLORS"[1].r",//K1_R = 0x11 - I_KCOLORS"[2].r",//K2_R = 0x12 - I_KCOLORS"[3].r",//K3_R = 0x13 - I_KCOLORS"[0].g",//K0_G = 0x14 - I_KCOLORS"[1].g",//K1_G = 0x15 - I_KCOLORS"[2].g",//K2_G = 0x16 - I_KCOLORS"[3].g",//K3_G = 0x17 - I_KCOLORS"[0].b",//K0_B = 0x18 - I_KCOLORS"[1].b",//K1_B = 0x19 - I_KCOLORS"[2].b",//K2_B = 0x1A - I_KCOLORS"[3].b",//K3_B = 0x1B - I_KCOLORS"[0].a",//K0_A = 0x1C - I_KCOLORS"[1].a",//K1_A = 0x1D - I_KCOLORS"[2].a",//K2_A = 0x1E - I_KCOLORS"[3].a",//K3_A = 0x1F -}; - -const char *tevScaleTable[] = // CS -{ - "1.0f", //SCALE_1 - "2.0f", //SCALE_2 - "4.0f", //SCALE_4 - "0.5f",//DIVIDE_2 -}; - -const char *tevBiasTable[] = // TB -{ - "", //ZERO, - "+0.5f", //ADDHALF, - "-0.5f", //SUBHALF, - "", -}; - -const char *tevOpTable[] = { // TEV - "+", //TEVOP_ADD = 0, - "-", //TEVOP_SUB = 1, -}; - -const char *tevCompOpTable[] = { ">", "==" }; - -#define TEVCMP_R8 0 -#define TEVCMP_GR16 1 -#define TEVCMP_BGR24 2 -#define TEVCMP_RGB8 3 - -const char *tevCInputTable[] = // CC -{ - "prev.rgb", //CPREV, - "prev.aaa", //APREV, - "c0.rgb", //C0, - "c0.aaa", //A0, - "c1.rgb", //C1, - "c1.aaa", //A1, - "c2.rgb", //C2, - "c2.aaa", //A2, - "textemp.rgb", //TEXC, - "textemp.aaa", //TEXA, - "rastemp.rgb", //RASC, - "rastemp.aaa", //RASA, - "float3(1.0f,1.0f,1.0f)", //ONE, - "float3(.5f,.5f,.5f)", //HALF, - "konsttemp.rgb", //KONST, - "float3(0.0f,0.0f,0.0f)", //ZERO - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", -}; -const char *tevCInputTable2[] = // CC -{ - "prev", //CPREV, - "(prev.aaa)", //APREV, - "c0", //C0, - "(c0.aaa)", //A0, - "c1", //C1, - "(c1.aaa)", //A1, - "c2", //C2, - "(c2.aaa)", //A2, - "textemp", //TEXC, - "(textemp.aaa)", //TEXA, - "rastemp", //RASC, - "(rastemp.aaa)", //RASA, - "float3(1.0f,1.0f,1.0f)", //ONE, - "float3(.5f,.5f,.5f)", //HALF, - "konsttemp", //"konsttemp.rgb", //KONST, - "float3(0.0f,0.0f,0.0f)", //ZERO - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", -}; - -const char *tevAInputTable[] = // CA -{ - "prev.a", //APREV, - "c0.a", //A0, - "c1.a", //A1, - "c2.a", //A2, - "textemp.a", //TEXA, - "rastemp.a", //RASA, - "konsttemp.a", //KONST - "0.0", //ZERO - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", -}; - -const char *tevAInputTable2[] = // CA -{ - "prev", //APREV, - "c0", //A0, - "c1", //A1, - "c2", //A2, - "textemp", //TEXA, - "rastemp", //RASA, - "konsttemp", //KONST, (hw1 had quarter) - "float4(0,0,0,0)", //ZERO - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", "PADERROR", - "PADERROR", "PADERROR", "PADERROR", -}; - -const char *tevRasTable[] = -{ - "colors[0]", - "colors[1]", - "ERROR", //2 - "ERROR", //3 - "ERROR", //4 - "alphabump", // use bump alpha - "(alphabump*(255.0f/248.0f))", //normalized - "float4(0,0,0,0)", // zero -}; - -const char* tevTexFunc[] = { "tex2D", "texRECT" }; - -const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; -const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; -const char* tevIndAlphaSel[] = {"", "x", "y", "z"}; -const char* tevIndAlphaScale[] = {"", "*32","*16","*8"}; -const char* tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias -const char* tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexecd by fmt -const char* tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" }; -const char* tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" }; - -void PixelShaderMngr::GetPixelShaderId(PixelShaderMngr::PIXELSHADERUID& uid) -{ - u32 projtexcoords = 0; - for (u32 i = 0; i < bpmem.genMode.numtevstages+1; i++) { - if( bpmem.tevorders[i/2].getEnable(i&1) ) { - int texcoord = bpmem.tevorders[i/2].getTexCoord(i&1); - if( xfregs.texcoords[texcoord].texmtxinfo.projection ) - projtexcoords |= 1<>16)&0xff)<<12)|(projtexcoords<<20)|((u32)bpmem.ztex2.op<<28) - |(zbufrender<<30)|(zBufRenderToCol0<<31); - - s_curuid.values[0] = (s_curuid.values[0]&~0x0ff00000)|(projtexcoords<<20); - - // swap table - for(int i = 0; i < 8; i += 2) - ((u8*)&uid.values[1])[i/2] = (bpmem.tevksel[i].hex&0xf)|((bpmem.tevksel[i+1].hex&0xf)<<4); - - uid.values[2] = s_texturemask; - int hdr = 3; - - u32* pcurvalue = &uid.values[hdr]; - for(u32 i = 0; i < bpmem.genMode.numtevstages+1; ++i) { - TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC; - TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC; - - u32 val0 = cc.hex&0xffffff; - u32 val1 = ac.hex&0xffffff; - val0 |= bpmem.tevksel[i/2].getKC(i&1)<<24; - val1 |= bpmem.tevksel[i/2].getKA(i&1)<<24; - - pcurvalue[0] = val0; - pcurvalue[1] = val1; - pcurvalue+=2; - } - - for(u32 i = 0; i < (bpmem.genMode.numtevstages+1)/2; ++i) { - u32 val0, val1; - if( bpmem.tevorders[i].hex&0x40 ) val0 = bpmem.tevorders[i].hex&0x3ff; - else val0 = bpmem.tevorders[i].hex&0x380; - if( bpmem.tevorders[i].hex&0x40000 ) val1 = (bpmem.tevorders[i].hex&0x3ff000)>>12; - else val1 = (bpmem.tevorders[i].hex&0x380000)>>12; - - switch(i % 3) { - case 0: pcurvalue[0] = val0|(val1<<10); break; - case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break; - case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break; - } - } - - if( (bpmem.genMode.numtevstages+1)&1 ) { // odd - u32 val0; - if( bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x40 ) val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x3ff; - else val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x380; - - switch(bpmem.genMode.numtevstages % 3) { - case 0: pcurvalue[0] = val0; break; - case 1: pcurvalue[0] |= val0<<20; break; - case 2: pcurvalue[1] |= (val0<<10); pcurvalue++; break; - } - } - - if( (bpmem.genMode.numtevstages % 3) != 2 ) - ++pcurvalue; - - uid.tevstages = (u32)(pcurvalue-&uid.values[0]-hdr); - - for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { - u32 val = bpmem.tevind[i].hex&0x1fffff; // 21 bits - switch(i%3) { - case 0: pcurvalue[0] = val; break; - case 1: pcurvalue[0] |= val<<21; pcurvalue[1] = val>>11; ++pcurvalue; break; - case 2: pcurvalue[0] |= val<<10; ++pcurvalue; break; - } - } - - uid.indstages = (u32)(pcurvalue-&uid.values[0]-2-uid.tevstages); -} - -#define WRITE p+=sprintf - -const char *swapColors = "rgba"; -char swapModeTable[4][5]; - -void BuildSwapModeTable() -{ - //bpmem.tevregs[0]. - for (int i=0; i<4; i++) - { - swapModeTable[i][0]=swapColors[bpmem.tevksel[i*2].swap1]; - swapModeTable[i][1]=swapColors[bpmem.tevksel[i*2].swap2]; - swapModeTable[i][2]=swapColors[bpmem.tevksel[i*2+1].swap1]; - swapModeTable[i][3]=swapColors[bpmem.tevksel[i*2+1].swap2]; - swapModeTable[i][4]=0; - } -} - -static char text[16384]; -bool PixelShaderMngr::GeneratePixelShader(FRAGMENTSHADER& ps) -{ - DVSTARTPROFILE(); - - BuildSwapModeTable(); - int numStages = bpmem.genMode.numtevstages + 1; - int numTexgen = bpmem.genMode.numtexgens; - - char *p = text; - WRITE(p,"//Pixel Shader for TEV stages\n"); - WRITE(p,"//%i TEV stages, %i texgens, %i IND stages\n", - numStages,numTexgen,bpmem.genMode.numindstages); - - bool bRenderZ = Renderer::GetZBufferTarget() != 0 && bpmem.zmode.updateenable; - bool bOutputZ = bpmem.ztex2.op != ZTEXTURE_DISABLE; - bool bInputZ = bpmem.ztex2.op==ZTEXTURE_ADD || bRenderZ; - - bool bRenderZToCol0 = Renderer::GetRenderMode()!=Renderer::RM_Normal; // output z and alpha to color0 - assert( !bRenderZToCol0 || bRenderZ ); - - int ztexcoord = -1; - if( bInputZ ) - ztexcoord = numTexgen == 0 ? 0 : numTexgen-1; - - int nIndirectStagesUsed = 0; - if( bpmem.genMode.numindstages > 0 ) { - for(int i = 0; i < numStages; ++i) { - if( bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages ) { - nIndirectStagesUsed |= 1< %s.%s) ? %s : float3(0.0f,0.0f,0.0f));\n", - tevCInputTable[cc.d],tevCInputTable2[cc.a], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable2[cc.b], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable[cc.c]); - break; - case TEVCMP_R8_EQ: - case TEVCMP_RGB8_EQ: - WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f));\n", - tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b],epsilon,tevCInputTable[cc.c]); - break; - - case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) - case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r - WRITE(p," %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f));\n", - tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_GT?"16":"24", tevCInputTable[cc.c]); - break; - case TEVCMP_GR16_EQ: - case TEVCMP_BGR24_EQ: - WRITE(p," %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f));\n", - tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon,tevCInputTable[cc.c]); - break; - default: - WRITE(p,"float3(0.0f,0.0f,0.0f);\n"); - break; - } - } - - if( cc.clamp ) - WRITE(p, "%s = clamp(%s,0.0f,1.0f);\n", tevCOutputTable[cc.dest],tevCOutputTable[cc.dest]); - - // combine the alpha channel - WRITE(p,"%s= ", tevAOutputTable[ac.dest]); - - if (ac.bias != 3) { // if not compare - //normal alpha combiner goes here - WRITE(p," %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]); - WRITE(p,"lerp(%s,%s,%s) %s)\n", - tevAInputTable[ac.a],tevAInputTable[ac.b], - tevAInputTable[ac.c],tevBiasTable[ac.bias]); - } - else { - //compare alpha combiner goes here - int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - switch(cmp) { - case TEVCMP_R8_GT: - case TEVCMP_A8_GT: - WRITE(p," %s + ((%s.%s > %s.%s) ? %s : 0)\n", - tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]); - break; - case TEVCMP_R8_EQ: - case TEVCMP_A8_EQ: - WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : 0)\n", - tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon,tevAInputTable[ac.c]); - break; - - case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) - case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r - WRITE(p," %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)\n", - tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]); - break; - case TEVCMP_GR16_EQ: - case TEVCMP_BGR24_EQ: - WRITE(p," %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)\n", - tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon,tevAInputTable[ac.c]); - break; - default: - WRITE(p,"0)\n"); - break; - } - } - - WRITE(p,";\n"); - - if( ac.clamp ) - WRITE(p, "%s = clamp(%s,0.0f,1.0f);\n", tevAOutputTable[ac.dest],tevAOutputTable[ac.dest]); - WRITE(p, "\n"); -} - -void WrapNonPow2Tex(char* &p, const char* var, int texmap) -{ - _assert_(s_texturemask & (1< %s)",alphaRef[num]); break; - case ALPHACMP_LESS: WRITE(p,"(prev.a >= %s+%f)",alphaRef[num],epsilon*0.5f);break; - case ALPHACMP_GEQUAL: WRITE(p,"(prev.a < %s)",alphaRef[num]); break; - case ALPHACMP_GREATER: WRITE(p,"(prev.a <= %s - %f)",alphaRef[num],epsilon*0.5f);break; - case ALPHACMP_EQUAL: WRITE(p,"(abs(prev.a-%s)>%f)",alphaRef[num],epsilon*2); break; - case ALPHACMP_NEQUAL: WRITE(p,"(abs(prev.a-%s)<%f)",alphaRef[num],epsilon*2); break; - } -} - -bool WriteAlphaTest(char *&p) -{ - u32 op = bpmem.alphaFunc.logic; - u32 comp[2] = {bpmem.alphaFunc.comp0,bpmem.alphaFunc.comp1}; - - //first kill all the simple cases - switch(op) { - case 0: // and - if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true; - if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) { - WRITE(p, "discard;\n"); - return false; - } - break; - case 1: // or - if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true; - if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) { - WRITE(p, "discard;\n"); - return false; - } - break; - case 2: // xor - if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS) ) return true; - if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) ) { - WRITE(p, "discard;\n"); - return false; - } - break; - case 3: // xnor - if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS) ) { - WRITE(p, "discard;\n"); - return false; - } - if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) ) - return true; - break; - } - - bool bFirst = false; - WRITE(p, "discard( "); - WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0); - - // negated because testing the inverse condition - switch(bpmem.alphaFunc.logic) { - case 0: WRITE(p, " || "); break; // and - case 1: WRITE(p, " && "); break; // or - case 2: WRITE(p, " == "); break; // xor - case 3: WRITE(p, " != "); break; // xnor - } - WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1); - WRITE(p, ");\n"); - return true; -} +#include "Globals.h" +#include +#include + +#include "VertexShader.h" +#include "PixelShader.h" + +#define I_COLORS "color" +#define I_KCOLORS "k" +#define I_ALPHA "alphaRef" +#define I_TEXDIMS "texdim" +#define I_ZBIAS "czbias" +#define I_INDTEXSCALE "cindscale" +#define I_INDTEXMTX "cindmtx" + +#define C_COLORS 0 +#define C_KCOLORS (C_COLORS+4) +#define C_ALPHA (C_KCOLORS+4) +#define C_TEXDIMS (C_ALPHA+1) +#define C_ZBIAS (C_TEXDIMS+8) +#define C_INDTEXSCALE (C_ZBIAS+2) +#define C_INDTEXMTX (C_INDTEXSCALE+2) +#define C_ENVCONST_END (C_INDTEXMTX+6) + +#define C_COLORMATRIX (C_INDTEXMTX+6) + + +void WriteStage(char *&p, int n); +void WrapNonPow2Tex(char* &p, const char* var, int texmap); +void WriteAlphaCompare(char *&p, int num, int comp); +bool WriteAlphaTest(char *&p); + +PixelShaderMngr::PSCache PixelShaderMngr::pshaders; +FRAGMENTSHADER* PixelShaderMngr::pShaderLast = NULL; +PixelShaderMngr::PIXELSHADERUID PixelShaderMngr::s_curuid; + +static int s_nMaxPixelInstructions; +static int s_nColorsChanged[2]; // 0 - regular colors, 1 - k colors +static int s_nTexDimsChanged[2], s_nIndTexMtxChanged = 0; //min, max +static bool s_bAlphaChanged, s_bZBiasChanged, s_bIndTexScaleChanged; +static float lastRGBAfull[2][4][4] = {0}; +static u32 lastAlpha = 0; +static u32 lastTexDims[8]={0}; +static u32 lastZBias = 0; + +// lower byte describes if a texture is nonpow2 or pow2 +// next byte describes whether the repeat wrap mode is enabled for the s channel +// next byte is for t channel +static u32 s_texturemask = 0; + +static int maptocoord[8]; // indexed by texture map, holds the texcoord associated with the map +static u32 maptocoord_mask=0; + +static GLuint s_ColorMatrixProgram=0; + +void PixelShaderMngr::Init() +{ + s_nColorsChanged[0] = s_nColorsChanged[1] = 0; + s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = -1; + s_nIndTexMtxChanged = 15; + s_bAlphaChanged = s_bZBiasChanged = s_bIndTexScaleChanged = true; + GL_REPORT_ERRORD(); + for(int i = 0; i < 8; ++i) maptocoord[i] = -1; + maptocoord_mask = 0; + memset(lastRGBAfull, 0, sizeof(lastRGBAfull)); + + glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, &s_nMaxPixelInstructions); + + int maxinst, maxattribs; + glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, &maxinst); + glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, &maxattribs); + ERROR_LOG("pixel max_alu=%d, max_inst=%d, max_attrib=%d\n", s_nMaxPixelInstructions, maxinst, maxattribs); + + char pmatrixprog[1024]; + sprintf(pmatrixprog, "!!ARBfp1.0" + "TEMP R0;\n" + "TEMP R1;\n" + "TEX R0, fragment.texcoord[0], texture[0], RECT;\n" + "DP4 R1.w, R0, program.env[%d];\n" + "DP4 R1.z, R0, program.env[%d];\n" + "DP4 R1.x, R0, program.env[%d];\n" + "DP4 R1.y, R0, program.env[%d];\n" + "ADD result.color, R1, program.env[%d];\n" + "END\n", C_COLORMATRIX+3, C_COLORMATRIX+2, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+4); + glGenProgramsARB( 1, &s_ColorMatrixProgram ); + glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, s_ColorMatrixProgram ); + + glProgramStringARB( GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); + + GLenum err=GL_NO_ERROR; + GL_REPORT_ERROR(); + if( err != GL_NO_ERROR ) { + ERROR_LOG("Failed to create color matrix fragment program\n"); + + SAFE_RELEASE_PROG(s_ColorMatrixProgram); + } +} + +void PixelShaderMngr::Shutdown() +{ + SAFE_RELEASE_PROG(s_ColorMatrixProgram); + PSCache::iterator iter = pshaders.begin(); + for (;iter!=pshaders.end();iter++) + iter->second.Destroy(); + pshaders.clear(); +} + +FRAGMENTSHADER* PixelShaderMngr::GetShader() +{ + DVSTARTPROFILE(); + PIXELSHADERUID uid; + GetPixelShaderId(uid); + + PSCache::iterator iter = pshaders.find(uid); + + if (iter != pshaders.end()) { + iter->second.frameCount=frameCount; + PSCacheEntry &entry = iter->second; + if (&entry.shader != pShaderLast) + { + pShaderLast = &entry.shader; + } + return pShaderLast; + } + + PSCacheEntry& newentry = pshaders[uid]; + + if (!GeneratePixelShader(newentry.shader)) { + ERROR_LOG("failed to create pixel shader\n"); + return NULL; + } + + //Make an entry in the table + newentry.frameCount=frameCount; + + pShaderLast = &newentry.shader; + INCSTAT(stats.numPixelShadersCreated); + SETSTAT(stats.numPixelShadersAlive, pshaders.size()); + return pShaderLast; +} + +void PixelShaderMngr::Cleanup() +{ + PSCache::iterator iter = pshaders.begin(); + while(iter != pshaders.end()) { + PSCacheEntry &entry = iter->second; + if (entry.frameCount= 0 ) { + float fdims[4]; + for(int i = s_nTexDimsChanged[0]; i <= s_nTexDimsChanged[1]; ++i) { + if( s_texturemask & (1<= 0 ) { + TCoordInfo& tc = bpmem.texcoords[maptocoord[i]]; + fdims[0] = (float)(lastTexDims[i]&0xffff); + fdims[1] = (float)((lastTexDims[i]>>16)&0xfff); + fdims[2] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[i]&0xffff); + fdims[3] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[i]>>16)&0xfff); + } + else { + fdims[0] = (float)(lastTexDims[i]&0xffff); + fdims[1] = (float)((lastTexDims[i]>>16)&0xfff); + fdims[2] = 1.0f; + fdims[3] = 1.0f; + } + } + else { + if( maptocoord[i] >= 0 ) { + TCoordInfo& tc = bpmem.texcoords[maptocoord[i]]; + fdims[0] = (float)(tc.s.scale_minus_1+1)/(float)(lastTexDims[i]&0xffff); + fdims[1] = (float)(tc.t.scale_minus_1+1)/(float)((lastTexDims[i]>>16)&0xfff); + fdims[2] = 1.0f/(float)(tc.s.scale_minus_1+1); + fdims[3] = 1.0f/(float)(tc.t.scale_minus_1+1); + } + else { + fdims[0] = 1.0f; + fdims[1] = 1.0f; + fdims[2] = 1.0f/(float)(lastTexDims[i]&0xffff); + fdims[3] = 1.0f/(float)((lastTexDims[i]>>16)&0xfff); + } + } + + PRIM_LOG("texdims%d: %f %f %f %f\n", i, fdims[0], fdims[1], fdims[2], fdims[3]); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_TEXDIMS+i, fdims); + } + s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = -1; + } + + if( s_bAlphaChanged ) { + glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); + } + + if( s_bZBiasChanged ) { + u32 bits; + float ffrac = 255.0f/256.0f; + float ftemp[4]; + switch(bpmem.ztex2.type) { + case 0: + bits = 8; + ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; + break; + case 1: + bits = 16; + ftemp[0] = 0; ftemp[1] = ffrac/(256.0f*256.0f); ftemp[2] = ffrac/256.0f; ftemp[3] = ffrac; + break; + case 2: + bits = 24; + ftemp[0] = ffrac/(256.0f*256.0f); ftemp[1] = ffrac/256.0f; ftemp[2] = ffrac; ftemp[3] = 0; + break; + } + //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_ZBIAS, ftemp); + glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_ZBIAS+1, 0, 0, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); + } + + // indirect incoming texture scales, update all! + if( s_bIndTexScaleChanged ) { + float f[8]; + + for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { + int srctexmap = bpmem.tevindref.getTexMap(i); + int texcoord = bpmem.tevindref.getTexCoord(i); + TCoordInfo& tc = bpmem.texcoords[texcoord]; + + f[2*i] = bpmem.texscale[i/2].getScaleS(i&1) * (float)(tc.s.scale_minus_1+1) / (float)(lastTexDims[srctexmap]&0xffff); + f[2*i+1] = bpmem.texscale[i/2].getScaleT(i&1) * (float)(tc.t.scale_minus_1+1) / (float)((lastTexDims[srctexmap]>>16)&0xfff); + + PRIM_LOG("tex indscale%d: %f %f\n", i, f[2*i], f[2*i+1]); + } + + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXSCALE, f); + + if( bpmem.genMode.numindstages > 2 ) + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXSCALE+1, &f[4]); + + s_bIndTexScaleChanged = false; + } + + if( s_nIndTexMtxChanged ) { + for(int i = 0; i < 3; ++i) { + if( s_nIndTexMtxChanged & (1<>16) ) { + lastAlpha = (lastAlpha&~0xff0000)|((alpha.hex&0xff)<<16); + s_bAlphaChanged = true; + } +} + +void PixelShaderMngr::SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt) +{ + u32 wh = width|(height<<16)|(wraps<<28)|(wrapt<<30); + if( lastTexDims[texmapid] != wh ) { + lastTexDims[texmapid] = wh; + if( s_nTexDimsChanged[0] == -1 ) { + s_nTexDimsChanged[0] = s_nTexDimsChanged[1] = texmapid; + } + else { + if( s_nTexDimsChanged[0] > texmapid ) s_nTexDimsChanged[0] = texmapid; + else if( s_nTexDimsChanged[1] < texmapid ) s_nTexDimsChanged[1] = texmapid; + } + } +} + +void PixelShaderMngr::SetZTetureBias(u32 bias) +{ + if( lastZBias != bias ) { + s_bZBiasChanged = true; + lastZBias = bias; + } +} + +void PixelShaderMngr::SetIndTexScaleChanged() +{ + s_bIndTexScaleChanged = true; +} + +void PixelShaderMngr::SetIndMatrixChanged(int matrixidx) +{ + s_nIndTexMtxChanged |= 1 << matrixidx; +} + +void PixelShaderMngr::SetGenModeChanged() +{ +} + +void PixelShaderMngr::SetTevCombinerChanged(int id) +{ +} + +void PixelShaderMngr::SetTevKSelChanged(int id) +{ +} + +void PixelShaderMngr::SetTevOrderChanged(int id) +{ +} + +void PixelShaderMngr::SetTevIndirectChanged(int id) +{ +} + +void PixelShaderMngr::SetZTetureOpChanged() +{ + s_bZBiasChanged = true; +} + +void PixelShaderMngr::SetTexturesUsed(u32 nonpow2tex) +{ + if( s_texturemask != nonpow2tex ) { + u32 mask = s_texturemask ^ nonpow2tex; + for(int i = 0; i < 8; ++i) { + if( mask & (0x10101< i ) s_nTexDimsChanged[0] = i; + else if( s_nTexDimsChanged[1] < i ) s_nTexDimsChanged[1] = i; + } + } + s_texturemask = nonpow2tex; + } +} + +void PixelShaderMngr::SetTexDimsChanged(int texmapid) +{ + if( s_nTexDimsChanged[0] > texmapid ) s_nTexDimsChanged[0] = texmapid; + else if( s_nTexDimsChanged[1] < texmapid ) s_nTexDimsChanged[1] = texmapid; + SetIndTexScaleChanged(); +} + +void PixelShaderMngr::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) +{ + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX, pmatrix); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+1, pmatrix+4); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+2, pmatrix+8); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+3, pmatrix+12); + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+4, pfConstAdd); +} + +GLuint PixelShaderMngr::GetColorMatrixProgram() +{ + return s_ColorMatrixProgram; +} + +// old tev->pixelshader notes +// +// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 +// konstant for this stage (alpha, color) is given by bpmem.tevksel +// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current chan color +// according to GXTevColorArg table above +// output is given by .outreg +// tevtemp is set according to swapmodetables and +const float epsilon = 1.0f/255.0f; + +const char *tevKSelTableC[] = // KCSEL +{ + "1.0f,1.0f,1.0f", //1 = 0x00 + "0.875,0.875,0.875",//7_8 = 0x01 + "0.75,0.75,0.75", //3_4 = 0x02 + "0.625,0.625,0.625",//5_8 = 0x03 + "0.5,0.5,0.5", //1_2 = 0x04 + "0.375,0.375,0.375",//3_8 = 0x05 + "0.25,0.25,0.25", //1_4 = 0x06 + "0.125,0.125,0.125",//1_8 = 0x07 + "ERROR", //0x08 + "ERROR", //0x09 + "ERROR", //0x0a + "ERROR", //0x0b + I_KCOLORS"[0].rgb",//K0 = 0x0C + I_KCOLORS"[1].rgb",//K1 = 0x0D + I_KCOLORS"[2].rgb",//K2 = 0x0E + I_KCOLORS"[3].rgb",//K3 = 0x0F + I_KCOLORS"[0].rrr",//K0_R = 0x10 + I_KCOLORS"[1].rrr",//K1_R = 0x11 + I_KCOLORS"[2].rrr",//K2_R = 0x12 + I_KCOLORS"[3].rrr",//K3_R = 0x13 + I_KCOLORS"[0].ggg",//K0_G = 0x14 + I_KCOLORS"[1].ggg",//K1_G = 0x15 + I_KCOLORS"[2].ggg",//K2_G = 0x16 + I_KCOLORS"[3].ggg",//K3_G = 0x17 + I_KCOLORS"[0].bbb",//K0_B = 0x18 + I_KCOLORS"[1].bbb",//K1_B = 0x19 + I_KCOLORS"[2].bbb",//K2_B = 0x1A + I_KCOLORS"[3].bbb",//K3_B = 0x1B + I_KCOLORS"[0].aaa",//K0_A = 0x1C + I_KCOLORS"[1].aaa",//K1_A = 0x1D + I_KCOLORS"[2].aaa",//K2_A = 0x1E + I_KCOLORS"[3].aaa",//K3_A = 0x1F +}; + +const char *tevKSelTableA[] = // KASEL +{ + "1.0f", //1 = 0x00 + "0.875f",//7_8 = 0x01 + "0.75f", //3_4 = 0x02 + "0.625f",//5_8 = 0x03 + "0.5f", //1_2 = 0x04 + "0.375f",//3_8 = 0x05 + "0.25f", //1_4 = 0x06 + "0.125f",//1_8 = 0x07 + "ERROR", //0x08 + "ERROR", //0x09 + "ERROR", //0x0a + "ERROR", //0x0b + "ERROR", //0x0c + "ERROR", //0x0d + "ERROR", //0x0e + "ERROR", //0x0f + I_KCOLORS"[0].r",//K0_R = 0x10 + I_KCOLORS"[1].r",//K1_R = 0x11 + I_KCOLORS"[2].r",//K2_R = 0x12 + I_KCOLORS"[3].r",//K3_R = 0x13 + I_KCOLORS"[0].g",//K0_G = 0x14 + I_KCOLORS"[1].g",//K1_G = 0x15 + I_KCOLORS"[2].g",//K2_G = 0x16 + I_KCOLORS"[3].g",//K3_G = 0x17 + I_KCOLORS"[0].b",//K0_B = 0x18 + I_KCOLORS"[1].b",//K1_B = 0x19 + I_KCOLORS"[2].b",//K2_B = 0x1A + I_KCOLORS"[3].b",//K3_B = 0x1B + I_KCOLORS"[0].a",//K0_A = 0x1C + I_KCOLORS"[1].a",//K1_A = 0x1D + I_KCOLORS"[2].a",//K2_A = 0x1E + I_KCOLORS"[3].a",//K3_A = 0x1F +}; + +const char *tevScaleTable[] = // CS +{ + "1.0f", //SCALE_1 + "2.0f", //SCALE_2 + "4.0f", //SCALE_4 + "0.5f",//DIVIDE_2 +}; + +const char *tevBiasTable[] = // TB +{ + "", //ZERO, + "+0.5f", //ADDHALF, + "-0.5f", //SUBHALF, + "", +}; + +const char *tevOpTable[] = { // TEV + "+", //TEVOP_ADD = 0, + "-", //TEVOP_SUB = 1, +}; + +const char *tevCompOpTable[] = { ">", "==" }; + +#define TEVCMP_R8 0 +#define TEVCMP_GR16 1 +#define TEVCMP_BGR24 2 +#define TEVCMP_RGB8 3 + +const char *tevCInputTable[] = // CC +{ + "prev.rgb", //CPREV, + "prev.aaa", //APREV, + "c0.rgb", //C0, + "c0.aaa", //A0, + "c1.rgb", //C1, + "c1.aaa", //A1, + "c2.rgb", //C2, + "c2.aaa", //A2, + "textemp.rgb", //TEXC, + "textemp.aaa", //TEXA, + "rastemp.rgb", //RASC, + "rastemp.aaa", //RASA, + "float3(1.0f,1.0f,1.0f)", //ONE, + "float3(.5f,.5f,.5f)", //HALF, + "konsttemp.rgb", //KONST, + "float3(0.0f,0.0f,0.0f)", //ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", +}; +const char *tevCInputTable2[] = // CC +{ + "prev", //CPREV, + "(prev.aaa)", //APREV, + "c0", //C0, + "(c0.aaa)", //A0, + "c1", //C1, + "(c1.aaa)", //A1, + "c2", //C2, + "(c2.aaa)", //A2, + "textemp", //TEXC, + "(textemp.aaa)", //TEXA, + "rastemp", //RASC, + "(rastemp.aaa)", //RASA, + "float3(1.0f,1.0f,1.0f)", //ONE, + "float3(.5f,.5f,.5f)", //HALF, + "konsttemp", //"konsttemp.rgb", //KONST, + "float3(0.0f,0.0f,0.0f)", //ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", +}; + +const char *tevAInputTable[] = // CA +{ + "prev.a", //APREV, + "c0.a", //A0, + "c1.a", //A1, + "c2.a", //A2, + "textemp.a", //TEXA, + "rastemp.a", //RASA, + "konsttemp.a", //KONST + "0.0", //ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", +}; + +const char *tevAInputTable2[] = // CA +{ + "prev", //APREV, + "c0", //A0, + "c1", //A1, + "c2", //A2, + "textemp", //TEXA, + "rastemp", //RASA, + "konsttemp", //KONST, (hw1 had quarter) + "float4(0,0,0,0)", //ZERO + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", + "PADERROR", "PADERROR", "PADERROR", "PADERROR", +}; + +const char *tevRasTable[] = +{ + "colors[0]", + "colors[1]", + "ERROR", //2 + "ERROR", //3 + "ERROR", //4 + "alphabump", // use bump alpha + "(alphabump*(255.0f/248.0f))", //normalized + "float4(0,0,0,0)", // zero +}; + +const char *tevTexFunc[] = { "tex2D", "texRECT" }; + +const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; +const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; +const char* tevIndAlphaSel[] = {"", "x", "y", "z"}; +const char* tevIndAlphaScale[] = {"", "*32","*16","*8"}; +const char* tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias +const char* tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt +const char* tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" }; +const char* tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" }; + +void PixelShaderMngr::GetPixelShaderId(PixelShaderMngr::PIXELSHADERUID& uid) +{ + u32 projtexcoords = 0; + for (u32 i = 0; i < bpmem.genMode.numtevstages+1; i++) { + if( bpmem.tevorders[i/2].getEnable(i&1) ) { + int texcoord = bpmem.tevorders[i/2].getTexCoord(i&1); + if( xfregs.texcoords[texcoord].texmtxinfo.projection ) + projtexcoords |= 1<>16)&0xff)<<12)|(projtexcoords<<20)|((u32)bpmem.ztex2.op<<28) + |(zbufrender<<30)|(zBufRenderToCol0<<31); + + s_curuid.values[0] = (s_curuid.values[0]&~0x0ff00000)|(projtexcoords<<20); + + // swap table + for(int i = 0; i < 8; i += 2) + ((u8*)&uid.values[1])[i/2] = (bpmem.tevksel[i].hex&0xf)|((bpmem.tevksel[i+1].hex&0xf)<<4); + + uid.values[2] = s_texturemask; + int hdr = 3; + + u32* pcurvalue = &uid.values[hdr]; + for(u32 i = 0; i < bpmem.genMode.numtevstages+1; ++i) { + TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC; + TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC; + + u32 val0 = cc.hex&0xffffff; + u32 val1 = ac.hex&0xffffff; + val0 |= bpmem.tevksel[i/2].getKC(i&1)<<24; + val1 |= bpmem.tevksel[i/2].getKA(i&1)<<24; + + pcurvalue[0] = val0; + pcurvalue[1] = val1; + pcurvalue+=2; + } + + for(u32 i = 0; i < (bpmem.genMode.numtevstages+1)/2; ++i) { + u32 val0, val1; + if( bpmem.tevorders[i].hex&0x40 ) val0 = bpmem.tevorders[i].hex&0x3ff; + else val0 = bpmem.tevorders[i].hex&0x380; + if( bpmem.tevorders[i].hex&0x40000 ) val1 = (bpmem.tevorders[i].hex&0x3ff000)>>12; + else val1 = (bpmem.tevorders[i].hex&0x380000)>>12; + + switch(i % 3) { + case 0: pcurvalue[0] = val0|(val1<<10); break; + case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break; + case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break; + } + } + + if( (bpmem.genMode.numtevstages+1)&1 ) { // odd + u32 val0; + if( bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x40 ) val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x3ff; + else val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex&0x380; + + switch(bpmem.genMode.numtevstages % 3) { + case 0: pcurvalue[0] = val0; break; + case 1: pcurvalue[0] |= val0<<20; break; + case 2: pcurvalue[1] |= (val0<<10); pcurvalue++; break; + } + } + + if( (bpmem.genMode.numtevstages % 3) != 2 ) + ++pcurvalue; + + uid.tevstages = (u32)(pcurvalue-&uid.values[0]-hdr); + + for(u32 i = 0; i < bpmem.genMode.numindstages; ++i) { + u32 val = bpmem.tevind[i].hex&0x1fffff; // 21 bits + switch(i%3) { + case 0: pcurvalue[0] = val; break; + case 1: pcurvalue[0] |= val<<21; pcurvalue[1] = val>>11; ++pcurvalue; break; + case 2: pcurvalue[0] |= val<<10; ++pcurvalue; break; + } + } + + uid.indstages = (u32)(pcurvalue-&uid.values[0]-2-uid.tevstages); +} + +#define WRITE p+=sprintf + +const char *swapColors = "rgba"; +char swapModeTable[4][5]; + +void BuildSwapModeTable() +{ + //bpmem.tevregs[0]. + for (int i=0; i<4; i++) + { + swapModeTable[i][0]=swapColors[bpmem.tevksel[i*2].swap1]; + swapModeTable[i][1]=swapColors[bpmem.tevksel[i*2].swap2]; + swapModeTable[i][2]=swapColors[bpmem.tevksel[i*2+1].swap1]; + swapModeTable[i][3]=swapColors[bpmem.tevksel[i*2+1].swap2]; + swapModeTable[i][4]=0; + } +} + +static char text[16384]; +bool PixelShaderMngr::GeneratePixelShader(FRAGMENTSHADER& ps) +{ + DVSTARTPROFILE(); + + BuildSwapModeTable(); + int numStages = bpmem.genMode.numtevstages + 1; + int numTexgen = bpmem.genMode.numtexgens; + + char *p = text; + WRITE(p,"//Pixel Shader for TEV stages\n"); + WRITE(p,"//%i TEV stages, %i texgens, %i IND stages\n", + numStages,numTexgen,bpmem.genMode.numindstages); + + bool bRenderZ = Renderer::GetZBufferTarget() != 0 && bpmem.zmode.updateenable; + bool bOutputZ = bpmem.ztex2.op != ZTEXTURE_DISABLE; + bool bInputZ = bpmem.ztex2.op==ZTEXTURE_ADD || bRenderZ; + + bool bRenderZToCol0 = Renderer::GetRenderMode()!=Renderer::RM_Normal; // output z and alpha to color0 + assert( !bRenderZToCol0 || bRenderZ ); + + int ztexcoord = -1; + if( bInputZ ) + ztexcoord = numTexgen == 0 ? 0 : numTexgen-1; + + int nIndirectStagesUsed = 0; + if( bpmem.genMode.numindstages > 0 ) { + for(int i = 0; i < numStages; ++i) { + if( bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages ) { + nIndirectStagesUsed |= 1< %s.%s) ? %s : float3(0.0f,0.0f,0.0f));\n", + tevCInputTable[cc.d],tevCInputTable2[cc.a], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable2[cc.b], cmp==TEVCMP_R8_GT?"r":"rgb", tevCInputTable[cc.c]); + break; + case TEVCMP_R8_EQ: + case TEVCMP_RGB8_EQ: + WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : float3(0.0f,0.0f,0.0f));\n", + tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b],epsilon,tevCInputTable[cc.c]); + break; + + case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) + case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r + WRITE(p," %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : float3(0.0f,0.0f,0.0f));\n", + tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b], cmp==TEVCMP_GR16_GT?"16":"24", tevCInputTable[cc.c]); + break; + case TEVCMP_GR16_EQ: + case TEVCMP_BGR24_EQ: + WRITE(p," %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : float3(0.0f,0.0f,0.0f));\n", + tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon,tevCInputTable[cc.c]); + break; + default: + WRITE(p,"float3(0.0f,0.0f,0.0f);\n"); + break; + } + } + + if( cc.clamp ) + WRITE(p, "%s = clamp(%s,0.0f,1.0f);\n", tevCOutputTable[cc.dest],tevCOutputTable[cc.dest]); + + // combine the alpha channel + WRITE(p,"%s= ", tevAOutputTable[ac.dest]); + + if (ac.bias != 3) { // if not compare + //normal alpha combiner goes here + WRITE(p," %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]); + WRITE(p,"lerp(%s,%s,%s) %s)\n", + tevAInputTable[ac.a],tevAInputTable[ac.b], + tevAInputTable[ac.c],tevBiasTable[ac.bias]); + } + else { + //compare alpha combiner goes here + int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here + switch(cmp) { + case TEVCMP_R8_GT: + case TEVCMP_A8_GT: + WRITE(p," %s + ((%s.%s > %s.%s) ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]); + break; + case TEVCMP_R8_EQ: + case TEVCMP_A8_EQ: + WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon,tevAInputTable[ac.c]); + break; + + case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) + case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r + WRITE(p," %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]); + break; + case TEVCMP_GR16_EQ: + case TEVCMP_BGR24_EQ: + WRITE(p," %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)\n", + tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon,tevAInputTable[ac.c]); + break; + default: + WRITE(p,"0)\n"); + break; + } + } + + WRITE(p,";\n"); + + if( ac.clamp ) + WRITE(p, "%s = clamp(%s,0.0f,1.0f);\n", tevAOutputTable[ac.dest],tevAOutputTable[ac.dest]); + WRITE(p, "\n"); +} + +void WrapNonPow2Tex(char* &p, const char* var, int texmap) +{ + _assert_(s_texturemask & (1< %s)",alphaRef[num]); break; + case ALPHACMP_LESS: WRITE(p,"(prev.a >= %s+%f)",alphaRef[num],epsilon*0.5f);break; + case ALPHACMP_GEQUAL: WRITE(p,"(prev.a < %s)",alphaRef[num]); break; + case ALPHACMP_GREATER: WRITE(p,"(prev.a <= %s - %f)",alphaRef[num],epsilon*0.5f);break; + case ALPHACMP_EQUAL: WRITE(p,"(abs(prev.a-%s)>%f)",alphaRef[num],epsilon*2); break; + case ALPHACMP_NEQUAL: WRITE(p,"(abs(prev.a-%s)<%f)",alphaRef[num],epsilon*2); break; + } +} + +bool WriteAlphaTest(char *&p) +{ + u32 op = bpmem.alphaFunc.logic; + u32 comp[2] = {bpmem.alphaFunc.comp0,bpmem.alphaFunc.comp1}; + + //first kill all the simple cases + switch(op) { + case 0: // and + if (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) return true; + if (comp[0] == ALPHACMP_NEVER || comp[1] == ALPHACMP_NEVER) { + WRITE(p, "discard;\n"); + return false; + } + break; + case 1: // or + if (comp[0] == ALPHACMP_ALWAYS || comp[1] == ALPHACMP_ALWAYS) return true; + if (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) { + WRITE(p, "discard;\n"); + return false; + } + break; + case 2: // xor + if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS) ) return true; + if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) ) { + WRITE(p, "discard;\n"); + return false; + } + break; + case 3: // xnor + if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_NEVER) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_ALWAYS) ) { + WRITE(p, "discard;\n"); + return false; + } + if ( (comp[0] == ALPHACMP_ALWAYS && comp[1] == ALPHACMP_ALWAYS) || (comp[0] == ALPHACMP_NEVER && comp[1] == ALPHACMP_NEVER) ) + return true; + break; + } + + bool bFirst = false; + WRITE(p, "discard( "); + WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0); + + // negated because testing the inverse condition + switch(bpmem.alphaFunc.logic) { + case 0: WRITE(p, " || "); break; // and + case 1: WRITE(p, " && "); break; // or + case 2: WRITE(p, " == "); break; // xor + case 3: WRITE(p, " != "); break; // xnor + } + WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1); + WRITE(p, ");\n"); + return true; +} diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index b6421086ee..3525c310a3 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -15,564 +15,564 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#ifdef _WIN32 -#include -#endif - -#include "Globals.h" - -#include "Render.h" - -#include "MemoryUtil.h" -#include "BPStructs.h" -#include "TextureDecoder.h" -#include "TextureMngr.h" -#include "PixelShader.h" -#include "VertexShader.h" - -u8 *TextureMngr::temp = NULL; -TextureMngr::TexCache TextureMngr::textures; -std::map TextureMngr::mapDepthTargets; -int TextureMngr::nTex2DEnabled, TextureMngr::nTexRECTEnabled; - -extern int frameCount; -static u32 s_TempFramebuffer = 0; -#define TEMP_SIZE (1024*1024*4) - -const GLint c_MinLinearFilter[8] = { - GL_NEAREST, GL_NEAREST_MIPMAP_NEAREST, GL_NEAREST_MIPMAP_LINEAR, GL_NEAREST, - GL_LINEAR, GL_LINEAR_MIPMAP_NEAREST, GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR}; - -const GLint c_WrapSettings[4] = { GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT, GL_REPEAT }; - -void TextureMngr::TCacheEntry::SetTextureParameters(TexMode0& newmode) -{ - mode = newmode; - if( isNonPow2 ) { - // very limited! - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, (newmode.mag_filter||g_Config.bForceFiltering)?GL_LINEAR:GL_NEAREST); - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, (g_Config.bForceFiltering||newmode.min_filter>=4)?GL_LINEAR:GL_NEAREST); - if( newmode.wrap_s == 2 || newmode.wrap_t == 2 ) { - DEBUG_LOG("cannot support mirrorred repeat mode\n"); - } - } - else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, (newmode.mag_filter||g_Config.bForceFiltering)?GL_LINEAR:GL_NEAREST); - - if( bHaveMipMaps ) { - int filt = newmode.min_filter; - if( g_Config.bForceFiltering && newmode.min_filter < 4 ) - newmode.min_filter += 4; // take equivalent forced linear - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, c_MinLinearFilter[filt]); - } - else - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (g_Config.bForceFiltering||newmode.min_filter>=4)?GL_LINEAR:GL_NEAREST); - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, c_WrapSettings[newmode.wrap_s]); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, c_WrapSettings[newmode.wrap_t]); - } - - if (g_Config.bForceMaxAniso) - { - // not used for now, check out GL_EXT_texture_filter_anisotropic - } -} - -void TextureMngr::TCacheEntry::Destroy() -{ - SAFE_RELEASE_TEX(texture); -} - -void TextureMngr::Init() -{ - temp = (u8*)AllocateMemoryPages(TEMP_SIZE); - nTex2DEnabled = nTexRECTEnabled = 0; -} - -void TextureMngr::Invalidate() -{ - TexCache::iterator iter = textures.begin(); - for (;iter!=textures.end();iter++) - iter->second.Destroy(); - textures.clear(); -} - -void TextureMngr::Shutdown() -{ - Invalidate(); - std::map::iterator itdepth = mapDepthTargets.begin(); - for (itdepth = mapDepthTargets.begin(); itdepth != mapDepthTargets.end(); ++itdepth) { - glDeleteRenderbuffersEXT(1, &itdepth->second.targ); - } - mapDepthTargets.clear(); - - if( s_TempFramebuffer ) { - glDeleteFramebuffersEXT(1, &s_TempFramebuffer); - s_TempFramebuffer = 0; - } - - FreeMemoryPages(temp, TEMP_SIZE); - temp = NULL; -} - -void TextureMngr::Cleanup() -{ - TexCache::iterator iter = textures.begin(); - - while(iter!=textures.end()) { - if (frameCount > 20 + iter->second.frameCount) { - if (!iter->second.isRenderTarget) { - u32 *ptr = (u32*)g_VideoInitialize.pGetMemoryPointer(iter->second.addr + iter->second.hashoffset*4); - if (*ptr == iter->second.hash) - *ptr = iter->second.oldpixel; - iter->second.Destroy(); -#ifdef _WIN32 - iter = textures.erase(iter); -#else - textures.erase(iter++); -#endif - } - else { - iter->second.Destroy(); -#ifdef _WIN32 - iter = textures.erase(iter); -#else - textures.erase(iter++); -#endif - } - } - else - iter++; - } - - std::map::iterator itdepth = mapDepthTargets.begin(); - while(itdepth != mapDepthTargets.end()) { - if( frameCount > 20 + itdepth->second.framecount) { -#ifdef _WIN32 - itdepth = mapDepthTargets.erase(itdepth); -#else - mapDepthTargets.erase(itdepth++); -#endif - } - else ++itdepth; - } -} - -#ifndef _WIN32 -inline u32 _rotl(u32 x, int shift) { - return (x << shift) | (x >> (32 - shift)); -} -#endif -TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width, int height, int format, int tlutaddr, int tlutfmt) -{ - if (address == 0 ) - return NULL; - - TexCache::iterator iter = textures.find(address); - TexMode0 &tm0 = bpmem.tex[texstage>3].texMode0[texstage&3]; - u8 *ptr = g_VideoInitialize.pGetMemoryPointer(address); - - int palSize = TexDecoder_GetPaletteSize(format); - u32 palhash = 0xc0debabe; - - if (palSize) { - if (palSize>16) - palSize = 16; //let's not do excessive amount of checking - u8 *pal = g_VideoInitialize.pGetMemoryPointer(tlutaddr); - if (pal != 0) { - for (int i=0; isecond; - - if( entry.isRenderTarget || ((u32 *)ptr)[entry.hashoffset] == entry.hash && palhash == entry.paletteHash) { //stupid, improve - entry.frameCount = frameCount; - //glEnable(entry.isNonPow2?GL_TEXTURE_RECTANGLE_NV:GL_TEXTURE_2D); - glBindTexture(entry.isNonPow2?GL_TEXTURE_RECTANGLE_NV:GL_TEXTURE_2D, entry.texture); - if (entry.mode.hex != tm0.hex) - entry.SetTextureParameters(tm0); - return &entry; - } - else - { - // can potentially do some caching - - //TCacheEntry &entry = entry; - /*if (width == entry.w && height==entry.h && format==entry.fmt) - { - LPDIRECT3DTEXTURE9 tex = entry.texture; - int bs = TexDecoder_GetBlockWidthInTexels(format)-1; //TexelSizeInNibbles(format)*width*height/16; - int expandedWidth = (width+bs) & (~bs); - D3DFORMAT dfmt = TexDecoder_Decode(temp,ptr,expandedWidth,height,format, tlutaddr, tlutfmt); - ReplaceTexture2D(tex,temp,width,height,expandedWidth,dfmt); - dev->SetTexture(texstage, stage,tex); - return; - } - else - {*/ - entry.Destroy(); - textures.erase(iter); - //} - } - } - - int bs = TexDecoder_GetBlockWidthInTexels(format)-1; //TexelSizeInNibbles(format)*width*height/16; - int expandedWidth = (width+bs) & (~bs); - TEXTUREFMT dfmt = TexDecoder_Decode(temp,ptr,expandedWidth,height,format, tlutaddr, tlutfmt); - - //Make an entry in the table - TCacheEntry& entry = textures[address]; - - entry.hashoffset = 0; - entry.hash = (u32)(((double)rand() / RAND_MAX) * 0xFFFFFFFF); - entry.paletteHash = palhash; - entry.oldpixel = ((u32 *)ptr)[entry.hashoffset]; - ((u32 *)ptr)[entry.hashoffset] = entry.hash; - - entry.addr = address; - entry.isRenderTarget=false; - - entry.isNonPow2 = ((width&(width-1)) || (height&(height-1))); - - glGenTextures(1, &entry.texture); - GLenum target = entry.isNonPow2 ? GL_TEXTURE_RECTANGLE_NV : GL_TEXTURE_2D; - glBindTexture(target, entry.texture); - - if (expandedWidth != width) - glPixelStorei(GL_UNPACK_ROW_LENGTH, expandedWidth); - - if( !entry.isNonPow2 && ((tm0.min_filter&3)==1||(tm0.min_filter&3)==2) ) { - gluBuild2DMipmaps(GL_TEXTURE_2D, 4, width, height, GL_BGRA, GL_UNSIGNED_BYTE, temp); - entry.bHaveMipMaps = true; - } - else - glTexImage2D(target, 0, 4, width, height, 0, dfmt.format, dfmt.type, temp); - - if (expandedWidth != width) // reset - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - - entry.frameCount = frameCount; - entry.w=width; - entry.h=height; - entry.fmt=format; - entry.SetTextureParameters(tm0); - - if (g_Config.bDumpTextures) { // dump texture to file - static int counter = 0; - char szTemp[MAX_PATH]; - sprintf(szTemp, "%s\\txt_%04i_%i.png", g_Config.texDumpPath, counter++, format); - - SaveTexture(szTemp,target, entry.texture, width, height); - } - - INCSTAT(stats.numTexturesCreated); - SETSTAT(stats.numTexturesAlive,textures.size()); - - //glEnable(entry.isNonPow2?GL_TEXTURE_RECTANGLE_NV:GL_TEXTURE_2D); - - //SaveTexture("tex.tga", target, entry.texture, entry.w, entry.h); - return &entry; -} - -void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, TRectangle *source) -{ - DVSTARTPROFILE(); - GL_REPORT_ERRORD(); - - // for intensity values, use Y of YUV format! - // for all purposes, treat 4bit equivalents as 8bit (probably just used for compression) - // RGBA8 - RGBA8 - // RGB565 - RGB565 - // RGB5A3 - RGB5A3 - // I4,R4,Z4 - I4 - // IA4,RA4 - IA4 - // Z8M,G8,I8,A8,Z8,R8,B8,Z8L - I8 - // Z16,GB8,RG8,Z16L,IA8,RA8 - IA8 - bool bIsInit = textures.find(address) != textures.end(); - - PRIM_LOG("copytarg: addr=0x%x, fromz=%d, intfmt=%d, copyfmt=%d\n", address, (int)bFromZBuffer,(int)bIsIntensityFmt,copyfmt); - - TCacheEntry& entry = textures[address]; - entry.isNonPow2 = true; - entry.hash = 0; - entry.hashoffset = 0; - entry.frameCount = frameCount; - - int mult = bScaleByHalf?2:1; - int w = (abs(source->right-source->left)/mult+7)&~7; - int h = (abs(source->bottom-source->top)/mult+7)&~7; - - GL_REPORT_ERRORD(); - - if( !bIsInit ) { - glGenTextures(1, &entry.texture); - glBindTexture(GL_TEXTURE_RECTANGLE_NV, entry.texture); - glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - GL_REPORT_ERRORD(); - } - else { - _assert_(entry.texture); - bool bReInit = true; - - if( entry.w == w && entry.h == h ) { - glBindTexture(GL_TEXTURE_RECTANGLE_NV, entry.texture); - // for some reason mario sunshine errors here... - GLenum err = GL_NO_ERROR; - GL_REPORT_ERROR(); - if( err == GL_NO_ERROR ) - bReInit = false; - } - - if( bReInit ) { - // necessary, for some reason opengl gives errors when texture isn't deleted - glDeleteTextures(1,&entry.texture); - glGenTextures(1, &entry.texture); - glBindTexture(GL_TEXTURE_RECTANGLE_NV, entry.texture); - glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - GL_REPORT_ERRORD(); - } - } - - if( !bIsInit || !entry.isRenderTarget ) { - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - if( glGetError() != GL_NO_ERROR) { - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP); - glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP); - GL_REPORT_ERRORD(); - } - } - - entry.w = w; - entry.h = h; - entry.isRenderTarget=true; - entry.fmt = copyfmt; - - float colmat[16]; - float fConstAdd[4] = {0}; - memset(colmat, 0, sizeof(colmat)); - - if( bFromZBuffer ) { - switch(copyfmt) { - case 0: // Z4 - case 1: // Z8 - colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; - break; - - case 3: // Z16 //? - case 11: // Z16 - colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; - break; - case 6: // Z24X8 - colmat[0] = 1; - colmat[5] = 1; - colmat[10] = 1; - break; - case 9: // Z8M - colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; - break; - case 10: // Z8L - colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; - break; - case 12: // Z16L - colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; - break; - default: - ERROR_LOG("Unknown copy zbuf format: 0x%x\n", copyfmt); - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - break; - } - } - else if( bIsIntensityFmt ) { - fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f/255.0f; - switch(copyfmt) { - case 0: // I4 - case 1: // I8 - case 2: // IA4 - case 3: // IA8 - colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f; - colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f; - colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f; - if( copyfmt < 2 ) { - fConstAdd[3] = 16.0f/255.0f; - colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f; - } - else { // alpha - colmat[15] = 1; - } - break; - default: - ERROR_LOG("Unknown copy intensity format: 0x%x\n", copyfmt); - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - break; - } - } - else { - switch(copyfmt) { - case 0: // R4 - case 8: // R8 - colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; - break; - case 2: // RA4 - case 3: // RA8 - colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1; - break; - - case 7: // A8 - colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1; - break; - case 9: // G8 - colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; - break; - case 10: // B8 - colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; - break; - case 11: // RG8 - colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; - break; - case 12: // GB8 - colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; - break; - - case 4: // RGB565 - colmat[0] = colmat[5] = colmat[10] = 1; - fConstAdd[3] = 1; // set alpha to 1 - break; - case 5: // RGB5A3 - case 6: // RGBA8 - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - break; - - default: - ERROR_LOG("Unknown copy color format: 0x%x\n", copyfmt); - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - break; - } - } - -// if( bCopyToTarget ) { -// _assert_( glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) == GL_FRAMEBUFFER_COMPLETE_EXT ); -// glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); -// GL_REPORT_ERRORD(); -// glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, source->left, source->top, source->right-source->left, source->bottom-source->top); -// entry.isUpsideDown = true; // note that the copy is upside down!! -// GL_REPORT_ERRORD(); -// return; -// } - - Renderer::SetRenderMode(Renderer::RM_Normal); // set back to normal - GL_REPORT_ERRORD(); - - // have to run a pixel shader - - Renderer::ResetGLState(); // reset any game specific settings - - if( s_TempFramebuffer == 0 ) - glGenFramebuffersEXT( 1, &s_TempFramebuffer); - - Renderer::SetFramebuffer(s_TempFramebuffer); - Renderer::SetRenderTarget(entry.texture); - GL_REPORT_ERRORD(); - - // create and attach the render target - std::map::iterator itdepth = mapDepthTargets.find((h<<16)|w); - - if( itdepth == mapDepthTargets.end() ) { - DEPTHTARGET& depth = mapDepthTargets[(h<<16)|w]; - depth.framecount = frameCount; - glGenRenderbuffersEXT( 1, &depth.targ); - glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, depth.targ); - glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT/*GL_DEPTH24_STENCIL8_EXT*/, w, h); - GL_REPORT_ERRORD(); - Renderer::SetDepthTarget(depth.targ); - GL_REPORT_ERRORD(); - } - else { - itdepth->second.framecount = frameCount; - Renderer::SetDepthTarget(itdepth->second.targ); - GL_REPORT_ERRORD(); - } - - glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_RECTANGLE_NV, bFromZBuffer?Renderer::GetZBufferTarget():Renderer::GetRenderTarget()); - TextureMngr::EnableTexRECT(0); - - glViewport(0, 0, w, h); - - glEnable(GL_FRAGMENT_PROGRAM_ARB); - glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, PixelShaderMngr::GetColorMatrixProgram()); - PixelShaderMngr::SetColorMatrix(colmat, fConstAdd); // set transformation - GL_REPORT_ERRORD(); - - glBegin(GL_QUADS); - glTexCoord2f((float)source->left, Renderer::GetTargetHeight()-(float)source->bottom); glVertex2f(-1,1); - glTexCoord2f((float)source->left, Renderer::GetTargetHeight()-(float)source->top); glVertex2f(-1,-1); - glTexCoord2f((float)source->right, Renderer::GetTargetHeight()-(float)source->top); glVertex2f(1,-1); - glTexCoord2f((float)source->right, Renderer::GetTargetHeight()-(float)source->bottom); glVertex2f(1,1); - glEnd(); - - GL_REPORT_ERRORD(); - - Renderer::SetFramebuffer(0); - Renderer::RestoreGLState(); - VertexShaderMngr::SetViewportChanged(); - - TextureMngr::DisableStage(0); - - if( bFromZBuffer ) - Renderer::SetZBufferRender(); // notify for future settings - - GL_REPORT_ERRORD(); - //SaveTexture("frame.tga", GL_TEXTURE_RECTANGLE_NV, entry.texture, entry.w, entry.h); - //SaveTexture("tex.tga", GL_TEXTURE_RECTANGLE_NV, Renderer::GetZBufferTarget(), Renderer::GetTargetWidth(), Renderer::GetTargetHeight()); -} - -void TextureMngr::EnableTex2D(int stage) -{ - if( !(nTex2DEnabled & (1< +#endif + +#include "Globals.h" + +#include "Render.h" + +#include "MemoryUtil.h" +#include "BPStructs.h" +#include "TextureDecoder.h" +#include "TextureMngr.h" +#include "PixelShader.h" +#include "VertexShader.h" + +u8 *TextureMngr::temp = NULL; +TextureMngr::TexCache TextureMngr::textures; +std::map TextureMngr::mapDepthTargets; +int TextureMngr::nTex2DEnabled, TextureMngr::nTexRECTEnabled; + +extern int frameCount; +static u32 s_TempFramebuffer = 0; +#define TEMP_SIZE (1024*1024*4) + +const GLint c_MinLinearFilter[8] = { + GL_NEAREST, GL_NEAREST_MIPMAP_NEAREST, GL_NEAREST_MIPMAP_LINEAR, GL_NEAREST, + GL_LINEAR, GL_LINEAR_MIPMAP_NEAREST, GL_LINEAR_MIPMAP_LINEAR, GL_LINEAR}; + +const GLint c_WrapSettings[4] = { GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT, GL_REPEAT }; + +void TextureMngr::TCacheEntry::SetTextureParameters(TexMode0& newmode) +{ + mode = newmode; + if( isNonPow2 ) { + // very limited! + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, (newmode.mag_filter||g_Config.bForceFiltering)?GL_LINEAR:GL_NEAREST); + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, (g_Config.bForceFiltering||newmode.min_filter>=4)?GL_LINEAR:GL_NEAREST); + if( newmode.wrap_s == 2 || newmode.wrap_t == 2 ) { + DEBUG_LOG("cannot support mirrorred repeat mode\n"); + } + } + else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, (newmode.mag_filter||g_Config.bForceFiltering)?GL_LINEAR:GL_NEAREST); + + if( bHaveMipMaps ) { + int filt = newmode.min_filter; + if( g_Config.bForceFiltering && newmode.min_filter < 4 ) + newmode.min_filter += 4; // take equivalent forced linear + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, c_MinLinearFilter[filt]); + } + else + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (g_Config.bForceFiltering||newmode.min_filter>=4)?GL_LINEAR:GL_NEAREST); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, c_WrapSettings[newmode.wrap_s]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, c_WrapSettings[newmode.wrap_t]); + } + + if (g_Config.bForceMaxAniso) + { + // not used for now, check out GL_EXT_texture_filter_anisotropic + } +} + +void TextureMngr::TCacheEntry::Destroy() +{ + SAFE_RELEASE_TEX(texture); +} + +void TextureMngr::Init() +{ + temp = (u8*)AllocateMemoryPages(TEMP_SIZE); + nTex2DEnabled = nTexRECTEnabled = 0; +} + +void TextureMngr::Invalidate() +{ + TexCache::iterator iter = textures.begin(); + for (;iter!=textures.end();iter++) + iter->second.Destroy(); + textures.clear(); +} + +void TextureMngr::Shutdown() +{ + Invalidate(); + std::map::iterator itdepth = mapDepthTargets.begin(); + for (itdepth = mapDepthTargets.begin(); itdepth != mapDepthTargets.end(); ++itdepth) { + glDeleteRenderbuffersEXT(1, &itdepth->second.targ); + } + mapDepthTargets.clear(); + + if( s_TempFramebuffer ) { + glDeleteFramebuffersEXT(1, &s_TempFramebuffer); + s_TempFramebuffer = 0; + } + + FreeMemoryPages(temp, TEMP_SIZE); + temp = NULL; +} + +void TextureMngr::Cleanup() +{ + TexCache::iterator iter = textures.begin(); + + while(iter!=textures.end()) { + if (frameCount > 20 + iter->second.frameCount) { + if (!iter->second.isRenderTarget) { + u32 *ptr = (u32*)g_VideoInitialize.pGetMemoryPointer(iter->second.addr + iter->second.hashoffset*4); + if (*ptr == iter->second.hash) + *ptr = iter->second.oldpixel; + iter->second.Destroy(); +#ifdef _WIN32 + iter = textures.erase(iter); +#else + textures.erase(iter++); +#endif + } + else { + iter->second.Destroy(); +#ifdef _WIN32 + iter = textures.erase(iter); +#else + textures.erase(iter++); +#endif + } + } + else + iter++; + } + + std::map::iterator itdepth = mapDepthTargets.begin(); + while(itdepth != mapDepthTargets.end()) { + if( frameCount > 20 + itdepth->second.framecount) { +#ifdef _WIN32 + itdepth = mapDepthTargets.erase(itdepth); +#else + mapDepthTargets.erase(itdepth++); +#endif + } + else ++itdepth; + } +} + +#ifndef _WIN32 +inline u32 _rotl(u32 x, int shift) { + return (x << shift) | (x >> (32 - shift)); +} +#endif +TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width, int height, int format, int tlutaddr, int tlutfmt) +{ + if (address == 0 ) + return NULL; + + TexCache::iterator iter = textures.find(address); + TexMode0 &tm0 = bpmem.tex[texstage>3].texMode0[texstage&3]; + u8 *ptr = g_VideoInitialize.pGetMemoryPointer(address); + + int palSize = TexDecoder_GetPaletteSize(format); + u32 palhash = 0xc0debabe; + + if (palSize) { + if (palSize>16) + palSize = 16; //let's not do excessive amount of checking + u8 *pal = g_VideoInitialize.pGetMemoryPointer(tlutaddr); + if (pal != 0) { + for (int i=0; isecond; + + if( entry.isRenderTarget || ((u32 *)ptr)[entry.hashoffset] == entry.hash && palhash == entry.paletteHash) { //stupid, improve + entry.frameCount = frameCount; + //glEnable(entry.isNonPow2?GL_TEXTURE_RECTANGLE_NV:GL_TEXTURE_2D); + glBindTexture(entry.isNonPow2?GL_TEXTURE_RECTANGLE_NV:GL_TEXTURE_2D, entry.texture); + if (entry.mode.hex != tm0.hex) + entry.SetTextureParameters(tm0); + return &entry; + } + else + { + // can potentially do some caching + + //TCacheEntry &entry = entry; + /*if (width == entry.w && height==entry.h && format==entry.fmt) + { + LPDIRECT3DTEXTURE9 tex = entry.texture; + int bs = TexDecoder_GetBlockWidthInTexels(format)-1; //TexelSizeInNibbles(format)*width*height/16; + int expandedWidth = (width+bs) & (~bs); + D3DFORMAT dfmt = TexDecoder_Decode(temp,ptr,expandedWidth,height,format, tlutaddr, tlutfmt); + ReplaceTexture2D(tex,temp,width,height,expandedWidth,dfmt); + dev->SetTexture(texstage, stage,tex); + return; + } + else + {*/ + entry.Destroy(); + textures.erase(iter); + //} + } + } + + int bs = TexDecoder_GetBlockWidthInTexels(format)-1; //TexelSizeInNibbles(format)*width*height/16; + int expandedWidth = (width+bs) & (~bs); + TEXTUREFMT dfmt = TexDecoder_Decode(temp,ptr,expandedWidth,height,format, tlutaddr, tlutfmt); + + //Make an entry in the table + TCacheEntry& entry = textures[address]; + + entry.hashoffset = 0; + entry.hash = (u32)(((double)rand() / RAND_MAX) * 0xFFFFFFFF); + entry.paletteHash = palhash; + entry.oldpixel = ((u32 *)ptr)[entry.hashoffset]; + ((u32 *)ptr)[entry.hashoffset] = entry.hash; + + entry.addr = address; + entry.isRenderTarget=false; + + entry.isNonPow2 = ((width&(width-1)) || (height&(height-1))); + + glGenTextures(1, &entry.texture); + GLenum target = entry.isNonPow2 ? GL_TEXTURE_RECTANGLE_NV : GL_TEXTURE_2D; + glBindTexture(target, entry.texture); + + if (expandedWidth != width) + glPixelStorei(GL_UNPACK_ROW_LENGTH, expandedWidth); + + if( !entry.isNonPow2 && ((tm0.min_filter&3)==1||(tm0.min_filter&3)==2) ) { + gluBuild2DMipmaps(GL_TEXTURE_2D, 4, width, height, GL_BGRA, GL_UNSIGNED_BYTE, temp); + entry.bHaveMipMaps = true; + } + else + glTexImage2D(target, 0, 4, width, height, 0, dfmt.format, dfmt.type, temp); + + if (expandedWidth != width) // reset + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + entry.frameCount = frameCount; + entry.w=width; + entry.h=height; + entry.fmt=format; + entry.SetTextureParameters(tm0); + + if (g_Config.bDumpTextures) { // dump texture to file + static int counter = 0; + char szTemp[MAX_PATH]; + sprintf(szTemp, "%s\\txt_%04i_%i.png", g_Config.texDumpPath, counter++, format); + + SaveTexture(szTemp,target, entry.texture, width, height); + } + + INCSTAT(stats.numTexturesCreated); + SETSTAT(stats.numTexturesAlive,textures.size()); + + //glEnable(entry.isNonPow2?GL_TEXTURE_RECTANGLE_NV:GL_TEXTURE_2D); + + //SaveTexture("tex.tga", target, entry.texture, entry.w, entry.h); + return &entry; +} + +void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, TRectangle *source) +{ + DVSTARTPROFILE(); + GL_REPORT_ERRORD(); + + // for intensity values, use Y of YUV format! + // for all purposes, treat 4bit equivalents as 8bit (probably just used for compression) + // RGBA8 - RGBA8 + // RGB565 - RGB565 + // RGB5A3 - RGB5A3 + // I4,R4,Z4 - I4 + // IA4,RA4 - IA4 + // Z8M,G8,I8,A8,Z8,R8,B8,Z8L - I8 + // Z16,GB8,RG8,Z16L,IA8,RA8 - IA8 + bool bIsInit = textures.find(address) != textures.end(); + + PRIM_LOG("copytarg: addr=0x%x, fromz=%d, intfmt=%d, copyfmt=%d\n", address, (int)bFromZBuffer,(int)bIsIntensityFmt,copyfmt); + + TCacheEntry& entry = textures[address]; + entry.isNonPow2 = true; + entry.hash = 0; + entry.hashoffset = 0; + entry.frameCount = frameCount; + + int mult = bScaleByHalf?2:1; + int w = (abs(source->right-source->left)/mult+7)&~7; + int h = (abs(source->bottom-source->top)/mult+7)&~7; + + GL_REPORT_ERRORD(); + + if( !bIsInit ) { + glGenTextures(1, &entry.texture); + glBindTexture(GL_TEXTURE_RECTANGLE_NV, entry.texture); + glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + GL_REPORT_ERRORD(); + } + else { + _assert_(entry.texture); + bool bReInit = true; + + if( entry.w == w && entry.h == h ) { + glBindTexture(GL_TEXTURE_RECTANGLE_NV, entry.texture); + // for some reason mario sunshine errors here... + GLenum err = GL_NO_ERROR; + GL_REPORT_ERROR(); + if( err == GL_NO_ERROR ) + bReInit = false; + } + + if( bReInit ) { + // necessary, for some reason opengl gives errors when texture isn't deleted + glDeleteTextures(1,&entry.texture); + glGenTextures(1, &entry.texture); + glBindTexture(GL_TEXTURE_RECTANGLE_NV, entry.texture); + glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 4, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + GL_REPORT_ERRORD(); + } + } + + if( !bIsInit || !entry.isRenderTarget ) { + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + if( glGetError() != GL_NO_ERROR) { + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP); + GL_REPORT_ERRORD(); + } + } + + entry.w = w; + entry.h = h; + entry.isRenderTarget=true; + entry.fmt = copyfmt; + + float colmat[16]; + float fConstAdd[4] = {0}; + memset(colmat, 0, sizeof(colmat)); + + if( bFromZBuffer ) { + switch(copyfmt) { + case 0: // Z4 + case 1: // Z8 + colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; + break; + + case 3: // Z16 //? + case 11: // Z16 + colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; + break; + case 6: // Z24X8 + colmat[0] = 1; + colmat[5] = 1; + colmat[10] = 1; + break; + case 9: // Z8M + colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; + break; + case 10: // Z8L + colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; + break; + case 12: // Z16L + colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; + break; + default: + ERROR_LOG("Unknown copy zbuf format: 0x%x\n", copyfmt); + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; + break; + } + } + else if( bIsIntensityFmt ) { + fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f/255.0f; + switch(copyfmt) { + case 0: // I4 + case 1: // I8 + case 2: // IA4 + case 3: // IA8 + colmat[0] = 0.257f; colmat[1] = 0.504f; colmat[2] = 0.098f; + colmat[4] = 0.257f; colmat[5] = 0.504f; colmat[6] = 0.098f; + colmat[8] = 0.257f; colmat[9] = 0.504f; colmat[10] = 0.098f; + if( copyfmt < 2 ) { + fConstAdd[3] = 16.0f/255.0f; + colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f; + } + else { // alpha + colmat[15] = 1; + } + break; + default: + ERROR_LOG("Unknown copy intensity format: 0x%x\n", copyfmt); + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; + break; + } + } + else { + switch(copyfmt) { + case 0: // R4 + case 8: // R8 + colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; + break; + case 2: // RA4 + case 3: // RA8 + colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1; + break; + + case 7: // A8 + colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1; + break; + case 9: // G8 + colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; + break; + case 10: // B8 + colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; + break; + case 11: // RG8 + colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; + break; + case 12: // GB8 + colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; + break; + + case 4: // RGB565 + colmat[0] = colmat[5] = colmat[10] = 1; + fConstAdd[3] = 1; // set alpha to 1 + break; + case 5: // RGB5A3 + case 6: // RGBA8 + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; + break; + + default: + ERROR_LOG("Unknown copy color format: 0x%x\n", copyfmt); + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; + break; + } + } + +// if( bCopyToTarget ) { +// _assert_( glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) == GL_FRAMEBUFFER_COMPLETE_EXT ); +// glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); +// GL_REPORT_ERRORD(); +// glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, source->left, source->top, source->right-source->left, source->bottom-source->top); +// entry.isUpsideDown = true; // note that the copy is upside down!! +// GL_REPORT_ERRORD(); +// return; +// } + + Renderer::SetRenderMode(Renderer::RM_Normal); // set back to normal + GL_REPORT_ERRORD(); + + // have to run a pixel shader + + Renderer::ResetGLState(); // reset any game specific settings + + if( s_TempFramebuffer == 0 ) + glGenFramebuffersEXT( 1, &s_TempFramebuffer); + + Renderer::SetFramebuffer(s_TempFramebuffer); + Renderer::SetRenderTarget(entry.texture); + GL_REPORT_ERRORD(); + + // create and attach the render target + std::map::iterator itdepth = mapDepthTargets.find((h<<16)|w); + + if( itdepth == mapDepthTargets.end() ) { + DEPTHTARGET& depth = mapDepthTargets[(h<<16)|w]; + depth.framecount = frameCount; + glGenRenderbuffersEXT( 1, &depth.targ); + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, depth.targ); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT/*GL_DEPTH24_STENCIL8_EXT*/, w, h); + GL_REPORT_ERRORD(); + Renderer::SetDepthTarget(depth.targ); + GL_REPORT_ERRORD(); + } + else { + itdepth->second.framecount = frameCount; + Renderer::SetDepthTarget(itdepth->second.targ); + GL_REPORT_ERRORD(); + } + + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_RECTANGLE_NV, bFromZBuffer?Renderer::GetZBufferTarget():Renderer::GetRenderTarget()); + TextureMngr::EnableTexRECT(0); + + glViewport(0, 0, w, h); + + glEnable(GL_FRAGMENT_PROGRAM_ARB); + glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, PixelShaderMngr::GetColorMatrixProgram()); + PixelShaderMngr::SetColorMatrix(colmat, fConstAdd); // set transformation + GL_REPORT_ERRORD(); + + glBegin(GL_QUADS); + glTexCoord2f((float)source->left, Renderer::GetTargetHeight()-(float)source->bottom); glVertex2f(-1,1); + glTexCoord2f((float)source->left, Renderer::GetTargetHeight()-(float)source->top); glVertex2f(-1,-1); + glTexCoord2f((float)source->right, Renderer::GetTargetHeight()-(float)source->top); glVertex2f(1,-1); + glTexCoord2f((float)source->right, Renderer::GetTargetHeight()-(float)source->bottom); glVertex2f(1,1); + glEnd(); + + GL_REPORT_ERRORD(); + + Renderer::SetFramebuffer(0); + Renderer::RestoreGLState(); + VertexShaderMngr::SetViewportChanged(); + + TextureMngr::DisableStage(0); + + if( bFromZBuffer ) + Renderer::SetZBufferRender(); // notify for future settings + + GL_REPORT_ERRORD(); + //SaveTexture("frame.tga", GL_TEXTURE_RECTANGLE_NV, entry.texture, entry.w, entry.h); + //SaveTexture("tex.tga", GL_TEXTURE_RECTANGLE_NV, Renderer::GetZBufferTarget(), Renderer::GetTargetWidth(), Renderer::GetTargetHeight()); +} + +void TextureMngr::EnableTex2D(int stage) +{ + if( !(nTex2DEnabled & (1<