diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index a615f5d3a7..312bc2d01d 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -321,7 +321,6 @@ struct TevStageCombiner union TevStageIndirect { - // if mid, sw, tw, and addprev are 0, then no indirect stage is used, mask = 0x17fe00 struct { u32 bt : 2; // Indirect tex stage ID @@ -342,7 +341,9 @@ struct TevStageCombiner u32 unused : 11; }; - bool IsActive() { return (hex & 0x17fe00) != 0; } + // If bs and mid are zero, the result of the stage is independent of + // the texture sample data, so we can skip sampling the texture. + bool IsActive() { return bs != ITBA_OFF || mid != 0; } }; union TwoTevStageOrders diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6211066f4d..f5ccc22a4d 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -550,7 +550,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; - bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages; + bool bHasIndStage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; // HACK to handle cases where the tex gen is not enabled if (!bHasTexCoord) texcoord = 0; @@ -579,24 +579,24 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // TODO: Should we reset alphabump to 0 here? } - // format - const char *tevIndFmtMask[] = {"255", "31", "15", "7" }; - out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); - - // bias - TODO: Check if this needs to be this complicated.. - const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias - const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt - if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U) - out.Write("\tiindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); - else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU) - out.Write("\tiindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); - else if (bpmem.tevind[n].bias == ITB_STU) - out.Write("\tiindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); - - // multiply by offset matrix and scale - calculations are likely to overflow badly, - // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result if (bpmem.tevind[n].mid != 0) { + // format + const char *tevIndFmtMask[] = { "255", "31", "15", "7" }; + out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); + + // bias - TODO: Check if this needs to be this complicated.. + const char *tevIndBiasField[] = { "", "x", "y", "xy", "z", "xz", "yz", "xyz" }; // indexed by bias + const char *tevIndBiasAdd[] = { "-128", "1", "1", "1" }; // indexed by fmt + if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U) + out.Write("\tiindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU) + out.Write("\tiindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); + else if (bpmem.tevind[n].bias == ITB_STU) + out.Write("\tiindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); + + // multiply by offset matrix and scale - calculations are likely to overflow badly, + // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1);