#include "Globals.h" #include "D3DShader.h" #include "PixelShader.h" #include "BPStructs.h" #include "XFStructs.h" #include "W32Util/Misc.h" #include "Utils.h" /* old tev->pixelshader notes color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 konstant for this stage (alpha, color) is given by bpmem.tevksel inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current chan color according to GXTevColorArg table above output is given by .outreg tevtemp is set according to swapmodetables and */ const float epsilon = 1.0f/255.0f; const char *tevKSelTableC[] = { "1,1,1", //KCSEL_1 = 0x00 "0.875,0.875,0.875",//KCSEL_7_8 = 0x01 "0.75,0.75,0.75", //KCSEL_3_4 = 0x02 "0.625,0.625,0.625",//KCSEL_5_8 = 0x03 "0.5,0.5,0.5", //KCSEL_1_2 = 0x04 "0.375,0.375,0.375",//KCSEL_3_8 = 0x05 "0.25,0.25,0.25", //KCSEL_1_4 = 0x06 "0.125,0.125,0.125",//KCSEL_1_8 = 0x07 "ERROR", //0x08 "ERROR", //0x09 "ERROR", //0x0a "ERROR", //0x0b "k0.rgb",//KCSEL_K0 = 0x0C "k1.rgb",//KCSEL_K1 = 0x0D "k2.rgb",//KCSEL_K2 = 0x0E "k3.rgb",//KCSEL_K3 = 0x0F "k0.rrr",//KCSEL_K0_R = 0x10 "k1.rrr",//KCSEL_K1_R = 0x11 "k2.rrr",//KCSEL_K2_R = 0x12 "k3.rrr",//KCSEL_K3_R = 0x13 "k0.ggg",//KCSEL_K0_G = 0x14 "k1.ggg",//KCSEL_K1_G = 0x15 "k2.ggg",//KCSEL_K2_G = 0x16 "k3.ggg",//KCSEL_K3_G = 0x17 "k0.bbb",//KCSEL_K0_B = 0x18 "k1.bbb",//KCSEL_K1_B = 0x19 "k2.bbb",//KCSEL_K2_B = 0x1A "k3.bbb",//KCSEL_K3_B = 0x1B "k0.aaa",//KCSEL_K0_A = 0x1C "k1.aaa",//KCSEL_K1_A = 0x1D "k2.aaa",//KCSEL_K2_A = 0x1E "k3.aaa",//KCSEL_K3_A = 0x1F }; const char *tevKSelTableA[] = { "1", //KASEL_1 = 0x00 "0.875",//KASEL_7_8 = 0x01 "0.75", //KASEL_3_4 = 0x02 "0.625",//KASEL_5_8 = 0x03 "0.5", //KASEL_1_2 = 0x04 "0.375",//KASEL_3_8 = 0x05 "0.25", //KASEL_1_4 = 0x06 "0.125",//KASEL_1_8 = 0x07 "ERROR",//0x08 "ERROR",//0x09 "ERROR",//0x0a "ERROR",//0x0b "ERROR",//0x0c "ERROR",//0x0d "ERROR",//0x0e "ERROR",//0x0f "k0.r", //KASEL_K0_R = 0x10 "k1.r", //KASEL_K1_R = 0x11 "k2.r", //KASEL_K2_R = 0x12 "k3.r", //KASEL_K3_R = 0x13 "k0.g", //KASEL_K0_G = 0x14 "k1.g", //KASEL_K1_G = 0x15 "k2.g", //KASEL_K2_G = 0x16 "k3.g", //KASEL_K3_G = 0x17 "k0.b", //KASEL_K0_B = 0x18 "k1.b", //KASEL_K1_B = 0x19 "k2.b", //KASEL_K2_B = 0x1A "k3.b", //KASEL_K3_B = 0x1B "k0.a", //KASEL_K0_A = 0x1C "k1.a", //KASEL_K1_A = 0x1D "k2.a", //KASEL_K2_A = 0x1E "k3.a", //KASEL_K3_A = 0x1F }; const char *tevScaleTable[] = { "1", //SCALE_1 "2", //SCALE_2 "4", //SCALE_4 "0.5", //DIVIDE_2 }; const char *tevBiasTable[] = { "", //ZERO, "+0.5", //ADD_HALF, "-0.5", //SUB_HALF, "", //WTF? seen in shadow2 }; const char *tevOpTable[] = { "+", //ADD = 0, "-", //SUB = 1, }; const char *tevCompOpTable[] = { ">", "==", }; #define TEV_COMP_R8 0 #define TEV_COMP_GR16 1 #define TEV_COMP_BGR24 2 #define TEV_COMP_RGB8 3 const char *tevCInputTable[] = { "prev.rgb", //CPREV, "prev.aaa", //APREV, "c0.rgb", //C0, "c0.aaa", //A0, "c1.rgb", //C1, "c1.aaa", //A1, "c2.rgb", //C2, "c2.aaa", //A2, "textemp.rgb", //TEXC, "textemp.aaa", //TEXA, "rastemp.rgb", //RASC, "rastemp.aaa", //RASA, "float3(1,1,1)", //ONE, "float3(.5,.5,.5)", //HALF, "konsttemp.rgb", //KONST, "float3(0,0,0)", //ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", }; const char *tevCInputTable2[] = { "prev", //CPREV, "(prev.aaa)", //APREV, "c0", //C0, "(c0.aaa)", //A0, "c1", //C1, "(c1.aaa)", //A1, "c2", //C2, "(c2.aaa)", //A2, "textemp", //TEXC, "(textemp.aaa)", //TEXA, "rastemp", //RASC, "(rastemp.aaa)", //RASA, "float3(1,1,1)", //ONE, "float3(.5,.5,.5)", //HALF, "konsttemp", //KONST, "float3(0,0,0)", //ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", }; const char *tevAInputTable[] = { "prev.a", //APREV, "c0.a", //A0, "c1.a", //A1, "c2.a", //A2, "textemp.a", //TEXA, "rastemp.a", //RASA, "konsttemp.a", //KONST, (hw1 had quarter) "0.0", //ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", }; const char *tevAInputTable2[] = { "prev", //APREV, "c0", //A0, "c1", //A1, "c2", //A2, "textemp", //TEXA, "rastemp", //RASA, "konsttemp", //KONST, (hw1 had quarter) "0.0", //ZERO "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", }; const char *tevRasTable[] = { "colors[0]",//RAS1_CC_0 0x00000000 /* color channel 0 */ "colors[1]",//RAS1_CC_1 0x00000001 /* color channel 1 */ "ERROR", //2 "ERROR", //3 "ERROR", //4 "alphabump", //RAS1_CC_B 0x00000005 /* indirect texture bump alpha */ //green cuz unsupported "(alphabump*(255.0f/248.0f))", //RAS1_CC_BN 0x00000006 /* ind tex bump alpha, normalized 0-255 *///green cuz unsupported "float4(0,0,0,0)", //RAS1_CC_Z 0x00000007 /* set color value to zero */ }; const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; const char *tevIndAlphaScale[] = {"", "*32","*16","*8"}; const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt const char *tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" }; const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" }; const char *tevTexFuncs[] = { "tex2D", "tex2Dproj" }; const char *alphaRef[2] = { "alphaRef.x", "alphaRef.y" }; //I hope we don't get too many hash collisions :p //all these magic numbers are primes, it should help a bit tevhash GetCurrentTEV() { u32 hash = bpmem.genMode.numindstages + bpmem.genMode.numtevstages*11 + bpmem.genMode.numtexgens*8*17; for (int i = 0; i < (int)bpmem.genMode.numtevstages+1; i++) { hash = _rotl(hash,3) ^ (bpmem.combiners[i].colorC.hex*13); hash = _rotl(hash,7) ^ ((bpmem.combiners[i].alphaC.hex&0xFFFFFFFC)*3); hash = _rotl(hash,9) ^ texcoords[i].texmtxinfo.projection*451; } for (int i = 0; i < (int)bpmem.genMode.numtevstages/2+1; i++) { hash = _rotl(hash,13) ^ (bpmem.tevorders[i].hex*7); } for (int i = 0; i < 8; i++) { hash = _rotl(hash,3) ^ bpmem.tevksel[i].swap1; hash = _rotl(hash,3) ^ bpmem.tevksel[i].swap2; } hash ^= bpmem.dstalpha.enable ^ 0xc0debabe; hash = _rotl(hash,4) ^ bpmem.alphaFunc.comp0*7; hash = _rotl(hash,4) ^ bpmem.alphaFunc.comp1*13; hash = _rotl(hash,4) ^ bpmem.alphaFunc.logic*11; return hash; } char text[65536]; #define WRITE p+=sprintf void WriteStage(char *&p, int n); void WriteAlphaTest(char *&p); char *swapColors = "rgba"; char swapModeTable[4][5]; void BuildSwapModeTable() { //bpmem.tevregs[0]. for (int i = 0; i < 4; i++) { swapModeTable[i][0]=swapColors[bpmem.tevksel[i*2].swap1]; swapModeTable[i][1]=swapColors[bpmem.tevksel[i*2].swap2]; swapModeTable[i][2]=swapColors[bpmem.tevksel[i*2+1].swap1]; swapModeTable[i][3]=swapColors[bpmem.tevksel[i*2+1].swap2]; swapModeTable[i][4]=0; } } LPDIRECT3DPIXELSHADER9 GeneratePixelShader() { DVSTARTPROFILE(); BuildSwapModeTable(); int numStages = bpmem.genMode.numtevstages + 1; int numTexgen = bpmem.genMode.numtexgens; int numSamplers = 8; char *p = text; WRITE(p,"//Pixel Shader for TEV stages\n"); WRITE(p,"//%i TEV stages, %i texgens, %i IND stages, %i COL channels\n", bpmem.genMode.numtevstages,bpmem.genMode.numtexgens,bpmem.genMode.numindstages,bpmem.genMode.numcolchans); //write kcolor declarations for (int i = 0; i < 4; i++) WRITE(p,"float4 k%i : register(c%i);\n",i,PS_CONST_KCOLORS+i); for (int i = 0; i < 3; i++) WRITE(p,"float4 color%i : register(c%i);\n",i,PS_CONST_COLORS+i+1); WRITE(p,"float constalpha : register(c%i);\n",PS_CONST_CONSTALPHA); WRITE(p,"float2 alphaRef : register(c%i);\n",PS_CONST_ALPHAREF); WRITE(p,"\n"); WRITE(p,"sampler samp[%i] : register(s0);\n",numSamplers,numSamplers); WRITE(p,"\n"); WRITE(p,"float4 main(in float4 colors[2] : COLOR0"); if (numTexgen) WRITE(p,", float4 uv[%i] : TEXCOORD0",numTexgen); else WRITE(p,", float4 uv[1] : TEXCOORD0"); //HACK WRITE(p,") : COLOR\n"); WRITE(p,"{\n"); WRITE(p,"float4 c0=color0,c1=color1,c2=color2,prev=float4(0.0f,0.0f,0.0f,0.0f),textemp,rastemp,konsttemp;\n"); WRITE(p,"\n"); //WRITE(p, "return 1;}\n"); //return D3D::CompilePShader(text,(int)(p-text)); for (int i = 0; i < numStages; i++) WriteStage(p,i); //build the equation for this stage //WRITE(p, "prev = textemp;\n"); //WRITE(p, "prev = float4(uv[0].x,uv[0].y,0,1);\n"); WriteAlphaTest(p); if (bpmem.dstalpha.enable) WRITE(p," return float4(prev.rgb,constalpha.x);\n"); else WRITE(p," return prev;\n"); WRITE(p,"}\n"); WRITE(p,"\0"); //#ifndef TEASER /* FILE *f=fopen("D:\\dlistlogs.txt","a"); fprintf(f,"===========================================\n"); fprintf(f,"%s",text); fclose(f); */ //W32Util::CopyTextToClipboard(0,text); //#endif //MessageBox(0,text,0,0); return D3D::CompilePShader(text,(int)(p-text)); } void WriteStage(char *&p, int n) { char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; int texfun = texcoords[n].texmtxinfo.projection; WRITE(p,"rastemp=%s.%s;\n",tevRasTable[bpmem.tevorders[n/2].getColorChan(n&1)],rasswap); if (bpmem.tevorders[n/2].getEnable(n&1)) WRITE(p,"textemp=%s(samp[%i],uv[%i]).%s;\n", tevTexFuncs[texfun], bpmem.tevorders[n/2].getTexMap(n&1), bpmem.tevorders[n/2].getTexCoord(n&1),texswap); else WRITE(p,"textemp=float4(1,1,1,1);\n"); int kc = bpmem.tevksel[n/2].getKC(n&1); int ka = bpmem.tevksel[n/2].getKA(n&1); WRITE(p,"konsttemp=float4(%s,%s);\n",tevKSelTableC[kc],tevKSelTableA[ka]); TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC; WRITE(p,"float4(%s,%s)=", tevCOutputTable[cc.outreg], tevAOutputTable[ac.outreg]); ////////////////////////////////////////////////////////////////////////// //start of color ////////////////////////////////////////////////////////////////////////// WRITE(p,"float4(\n"); if (cc.bias != TB_COMPARE) { //normal color combiner goes here WRITE(p," %s*(%s%s",tevScaleTable[cc.shift],tevCInputTable[cc.d],tevOpTable[cc.op]); WRITE(p,"(lerp(%s,%s,%s)%s)),\n", tevCInputTable[cc.a],tevCInputTable[cc.b], tevCInputTable[cc.c],tevBiasTable[cc.bias]); } else { //compare color combiner goes here switch(cc.shift) // yep comparemode stored here :P { case TEV_COMP_R8: if (cc.op == 0) //equality check needs tolerance, fp in gpu has drawbacks :( WRITE(p," %s + ((%s.r > %s.r) ? %s : float3(0,0,0)),\n", tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b],tevCInputTable[cc.c]); else WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : float3(0,0,0)),\n", tevCInputTable[cc.d],tevCInputTable2[cc.a], tevCInputTable2[cc.b],epsilon,tevCInputTable[cc.c]); break; default: WRITE(p,"float3(0,0,0),\n"); break; } } //end of color ////////////////////////////////////////////////////////////////////////// //start of alpha ////////////////////////////////////////////////////////////////////////// if (ac.bias != TB_COMPARE) { //normal alpha combiner goes here WRITE(p," %s*(%s%s",tevScaleTable[ac.shift],tevAInputTable[ac.d],tevOpTable[ac.op]); WRITE(p,"lerp(%s,%s,%s) %s)\n", tevAInputTable[ac.a],tevAInputTable[ac.b], tevAInputTable[ac.c],tevBiasTable[ac.bias]); } else { int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here //compare alpha combiner goes here switch(cmp) { case TEVCMP_R8_GT: case TEVCMP_A8_GT: WRITE(p," %s + ((%s.%s > %s.%s) ? %s : 0)\n", tevAInputTable[ac.d],tevAInputTable2[ac.a], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable2[ac.b], cmp==TEVCMP_R8_GT?"r":"a", tevAInputTable[ac.c]); break; case TEVCMP_R8_EQ: case TEVCMP_A8_EQ: WRITE(p," %s + (abs(%s.r - %s.r)<%f ? %s : 0)\n", tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],epsilon,tevAInputTable[ac.c]); break; case TEVCMP_GR16_GT: // 16 bit compares: 255*g+r (probably used for ztextures, so make sure in ztextures, g is the most significant byte) case TEVCMP_BGR24_GT: // 24 bit compares: 255*255*b+255*g+r WRITE(p," %s + (( dot(%s.rgb-%s.rgb, comp%s) > 0) ? %s : 0)\n", tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b], cmp==TEVCMP_GR16_GT?"16":"24", tevAInputTable[ac.c]); break; case TEVCMP_GR16_EQ: case TEVCMP_BGR24_EQ: WRITE(p," %s + (abs(dot(%s.rgb - %s.rgb, comp%s))<%f ? %s : 0)\n", tevAInputTable[ac.d],tevAInputTable2[ac.a], tevAInputTable2[ac.b],cmp==TEVCMP_GR16_GT?"16":"24",epsilon,tevAInputTable[ac.c]); break; default: WRITE(p,"0)\n"); break; } } WRITE(p, ");"); if (ac.clamp) WRITE(p, "%s = clamp(%s, 0.0f, 1.0f);\n", tevAOutputTable[ac.outreg], tevAOutputTable[ac.outreg]); WRITE(p, "\n"); } void WriteAlphaCompare(char *&p, int num, int comp) { WRITE(p," res%i = ",num); switch(comp) { case COMPARE_ALWAYS: WRITE(p,"0;\n"); break; case COMPARE_NEVER: WRITE(p,"1;\n"); break; case COMPARE_LEQUAL: WRITE(p,"prev.a - %s.x;\n",alphaRef[num]); break; case COMPARE_LESS: WRITE(p,"prev.a - %s.x + %f;\n",alphaRef[num],epsilon*2);break; case COMPARE_GEQUAL: WRITE(p,"%s - prev.a;\n",alphaRef[num]); break; case COMPARE_GREATER: WRITE(p,"%s - prev.a + %f;\n",alphaRef[num],epsilon*2);break; case COMPARE_EQUAL: WRITE(p,"abs(%s-prev.a)-%f;\n",alphaRef[num],epsilon*2); break; case COMPARE_NEQUAL: WRITE(p,"%f-abs(%s-prev.a);\n",epsilon*2,alphaRef[num]); break; } } void WriteAlphaTest(char *&p) { AlphaOp op = (AlphaOp)bpmem.alphaFunc.logic; Compare comp[2] = {(Compare)bpmem.alphaFunc.comp0,(Compare)bpmem.alphaFunc.comp1}; //first kill all the simple cases if (op == ALPHAOP_AND && (comp[0] == COMPARE_ALWAYS && comp[1] == COMPARE_ALWAYS)) return; if (op == ALPHAOP_OR && (comp[0] == COMPARE_ALWAYS || comp[1] == COMPARE_ALWAYS)) return; for (int i = 0; i < 2; i++) { int one = i; int other = 1-i; switch(op) { case ALPHAOP_XOR: if (comp[one] == COMPARE_ALWAYS && comp[other] == COMPARE_NEVER) return; break; case ALPHAOP_XNOR: if (comp[one] == COMPARE_ALWAYS && comp[other] == COMPARE_ALWAYS) return; if (comp[one] == COMPARE_ALWAYS && comp[other] == COMPARE_NEVER) return; break; } } //Ok, didn't get to do the easy way out :P // do the general way WRITE(p,"float res0, res1;\n"); WriteAlphaCompare(p, 0, bpmem.alphaFunc.comp0); WriteAlphaCompare(p, 1, bpmem.alphaFunc.comp1); WRITE(p,"res0 = max(res0, 0);\n"); WRITE(p,"res1 = max(res1, 0);\n"); //probably should use lookup textures for some of these :P switch(bpmem.alphaFunc.logic) { case ALPHAOP_AND: // if both are 0 WRITE(p,"clip(-(res0+res1)+%f);\n",epsilon); break; case ALPHAOP_OR: //if either is 0 WRITE(p,"clip(-res0*res1+%f);\n",epsilon*epsilon); break; case ALPHAOP_XOR: //hmm, this might work: WRITE(p,"res0=(res0>0?1:0)-.5;\n"); WRITE(p,"res1=(res1>0?1:0)-.5;\n"); WRITE(p,"clip(-res0*res1);\n",epsilon); break; case ALPHAOP_XNOR: WRITE(p,"res0=(res0>0?1:0)-.5;\n"); WRITE(p,"res1=(res1>0?1:0)-.5;\n"); WRITE(p,"clip(res0*res1);\n",epsilon); break; } }