From 18c1bf19cac3b4299c55db35117f1383a5c80318 Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Wed, 4 Jan 2017 21:34:27 +1000
Subject: [PATCH 1/2] VideoCommon: Use constant for number of color channels in
 XFMemory

---
 Source/Core/VideoCommon/XFMemory.h | 74 +++++++++++++++---------------
 1 file changed, 38 insertions(+), 36 deletions(-)

diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h
index 461a63e368..be43d099d8 100644
--- a/Source/Core/VideoCommon/XFMemory.h
+++ b/Source/Core/VideoCommon/XFMemory.h
@@ -10,6 +10,8 @@
 
 class DataReader;
 
+constexpr size_t NUM_XF_COLOR_CHANNELS = 2;
+
 // Lighting
 
 // Projection
@@ -249,42 +251,42 @@ struct Projection
 
 struct XFMemory
 {
-  float posMatrices[256];      // 0x0000 - 0x00ff
-  u32 unk0[768];               // 0x0100 - 0x03ff
-  float normalMatrices[96];    // 0x0400 - 0x045f
-  u32 unk1[160];               // 0x0460 - 0x04ff
-  float postMatrices[256];     // 0x0500 - 0x05ff
-  Light lights[8];             // 0x0600 - 0x067f
-  u32 unk2[2432];              // 0x0680 - 0x0fff
-  u32 error;                   // 0x1000
-  u32 diag;                    // 0x1001
-  u32 state0;                  // 0x1002
-  u32 state1;                  // 0x1003
-  u32 xfClock;                 // 0x1004
-  u32 clipDisable;             // 0x1005
-  u32 perf0;                   // 0x1006
-  u32 perf1;                   // 0x1007
-  INVTXSPEC hostinfo;          // 0x1008 number of textures,colors,normals from vertex input
-  NumColorChannel numChan;     // 0x1009
-  u32 ambColor[2];             // 0x100a, 0x100b
-  u32 matColor[2];             // 0x100c, 0x100d
-  LitChannel color[2];         // 0x100e, 0x100f
-  LitChannel alpha[2];         // 0x1010, 0x1011
-  DualTexInfo dualTexTrans;    // 0x1012
-  u32 unk3;                    // 0x1013
-  u32 unk4;                    // 0x1014
-  u32 unk5;                    // 0x1015
-  u32 unk6;                    // 0x1016
-  u32 unk7;                    // 0x1017
-  TMatrixIndexA MatrixIndexA;  // 0x1018
-  TMatrixIndexB MatrixIndexB;  // 0x1019
-  Viewport viewport;           // 0x101a - 0x101f
-  Projection projection;       // 0x1020 - 0x1026
-  u32 unk8[24];                // 0x1027 - 0x103e
-  NumTexGen numTexGen;         // 0x103f
-  TexMtxInfo texMtxInfo[8];    // 0x1040 - 0x1047
-  u32 unk9[8];                 // 0x1048 - 0x104f
-  PostMtxInfo postMtxInfo[8];  // 0x1050 - 0x1057
+  float posMatrices[256];    // 0x0000 - 0x00ff
+  u32 unk0[768];             // 0x0100 - 0x03ff
+  float normalMatrices[96];  // 0x0400 - 0x045f
+  u32 unk1[160];             // 0x0460 - 0x04ff
+  float postMatrices[256];   // 0x0500 - 0x05ff
+  Light lights[8];           // 0x0600 - 0x067f
+  u32 unk2[2432];            // 0x0680 - 0x0fff
+  u32 error;                 // 0x1000
+  u32 diag;                  // 0x1001
+  u32 state0;                // 0x1002
+  u32 state1;                // 0x1003
+  u32 xfClock;               // 0x1004
+  u32 clipDisable;           // 0x1005
+  u32 perf0;                 // 0x1006
+  u32 perf1;                 // 0x1007
+  INVTXSPEC hostinfo;        // 0x1008 number of textures,colors,normals from vertex input
+  NumColorChannel numChan;   // 0x1009
+  u32 ambColor[NUM_XF_COLOR_CHANNELS];      // 0x100a, 0x100b
+  u32 matColor[NUM_XF_COLOR_CHANNELS];      // 0x100c, 0x100d
+  LitChannel color[NUM_XF_COLOR_CHANNELS];  // 0x100e, 0x100f
+  LitChannel alpha[NUM_XF_COLOR_CHANNELS];  // 0x1010, 0x1011
+  DualTexInfo dualTexTrans;                 // 0x1012
+  u32 unk3;                                 // 0x1013
+  u32 unk4;                                 // 0x1014
+  u32 unk5;                                 // 0x1015
+  u32 unk6;                                 // 0x1016
+  u32 unk7;                                 // 0x1017
+  TMatrixIndexA MatrixIndexA;               // 0x1018
+  TMatrixIndexB MatrixIndexB;               // 0x1019
+  Viewport viewport;                        // 0x101a - 0x101f
+  Projection projection;                    // 0x1020 - 0x1026
+  u32 unk8[24];                             // 0x1027 - 0x103e
+  NumTexGen numTexGen;                      // 0x103f
+  TexMtxInfo texMtxInfo[8];                 // 0x1040 - 0x1047
+  u32 unk9[8];                              // 0x1048 - 0x104f
+  PostMtxInfo postMtxInfo[8];               // 0x1050 - 0x1057
 };
 
 extern XFMemory xfmem;

From efb97598629b96378edd783af82936185d8a99ce Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Tue, 21 Nov 2017 18:54:11 +1000
Subject: [PATCH 2/2] LightingShaderGen: Always calculate lighting for both
 color channels

Cel-damage uses the color from the lighting stage of the vertex pipeline
as texture coordinates, but sets numColorChans to zero.

We now calculate the colors in all cases, but override the color before
writing it from the vertex shader if numColorChans is set to a lower value.
---
 .../VideoBackends/Software/TransformUnit.cpp  |  2 +-
 Source/Core/VideoCommon/LightingShaderGen.cpp |  6 ++--
 Source/Core/VideoCommon/LightingShaderGen.h   |  2 +-
 Source/Core/VideoCommon/PixelShaderGen.cpp    |  2 +-
 Source/Core/VideoCommon/UberShaderCommon.cpp  |  7 ++--
 Source/Core/VideoCommon/UberShaderVertex.cpp  | 13 +++++++
 Source/Core/VideoCommon/VertexShaderGen.cpp   | 35 +++++++++----------
 7 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/Source/Core/VideoBackends/Software/TransformUnit.cpp b/Source/Core/VideoBackends/Software/TransformUnit.cpp
index e9196798fc..5de48f4f05 100644
--- a/Source/Core/VideoBackends/Software/TransformUnit.cpp
+++ b/Source/Core/VideoBackends/Software/TransformUnit.cpp
@@ -321,7 +321,7 @@ static void LightAlpha(const Vec3& pos, const Vec3& normal, u8 lightNum, const L
 
 void TransformColor(const InputVertexData* src, OutputVertexData* dst)
 {
-  for (u32 chan = 0; chan < xfmem.numChan.numColorChans; chan++)
+  for (u32 chan = 0; chan < NUM_XF_COLOR_CHANNELS; chan++)
   {
     // abgr
     std::array<u8, 4> matcolor;
diff --git a/Source/Core/VideoCommon/LightingShaderGen.cpp b/Source/Core/VideoCommon/LightingShaderGen.cpp
index b5556214f0..4139018552 100644
--- a/Source/Core/VideoCommon/LightingShaderGen.cpp
+++ b/Source/Core/VideoCommon/LightingShaderGen.cpp
@@ -79,9 +79,9 @@ static void GenerateLightShader(ShaderCode& object, const LightingUidData& uid_d
 // inColorName is color in vs and colors_ in ps
 // dest is o.colors_ in vs and colors_ in ps
 void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, int components,
-                                u32 numColorChans, const char* inColorName, const char* dest)
+                                const char* inColorName, const char* dest)
 {
-  for (unsigned int j = 0; j < numColorChans; j++)
+  for (unsigned int j = 0; j < NUM_XF_COLOR_CHANNELS; j++)
   {
     object.Write("{\n");
 
@@ -185,7 +185,7 @@ void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_d
 
 void GetLightingShaderUid(LightingUidData& uid_data)
 {
-  for (unsigned int j = 0; j < xfmem.numChan.numColorChans; j++)
+  for (unsigned int j = 0; j < NUM_XF_COLOR_CHANNELS; j++)
   {
     uid_data.matsource |= xfmem.color[j].matsource << j;
     uid_data.matsource |= xfmem.alpha[j].matsource << (j + 2);
diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h
index 4bd4e52016..096b5d10dd 100644
--- a/Source/Core/VideoCommon/LightingShaderGen.h
+++ b/Source/Core/VideoCommon/LightingShaderGen.h
@@ -45,5 +45,5 @@ static const char s_lighting_struct[] = "struct Light {\n"
                                         "};\n";
 
 void GenerateLightingShaderCode(ShaderCode& object, const LightingUidData& uid_data, int components,
-                                u32 numColorChans, const char* inColorName, const char* dest);
+                                const char* inColorName, const char* dest);
 void GetLightingShaderUid(LightingUidData& uid_data);
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index b217e6f176..0ccfb09679 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -643,7 +643,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
     // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further
     // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3);
     GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT,
-                               uid_data->numColorChans, "colors_", "col");
+                               "colors_", "col");
   }
 
   // HACK to handle cases where the tex gen is not enabled
diff --git a/Source/Core/VideoCommon/UberShaderCommon.cpp b/Source/Core/VideoCommon/UberShaderCommon.cpp
index 58e33e5942..645712fd4f 100644
--- a/Source/Core/VideoCommon/UberShaderCommon.cpp
+++ b/Source/Core/VideoCommon/UberShaderCommon.cpp
@@ -94,8 +94,8 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_po
                          const char* out_color_1_var)
 {
   out.Write("// Lighting\n");
-  out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n",
-            api_type == APIType::D3D ? "[loop] " : "");
+  out.Write("%sfor (uint chan = 0u; chan < %zu; chan++) {\n",
+            api_type == APIType::D3D ? "[loop] " : "", NUM_XF_COLOR_CHANNELS);
   out.Write("  uint colorreg = xfmem_color(chan);\n"
             "  uint alphareg = xfmem_alpha(chan);\n"
             "  int4 mat = " I_MATERIALS "[chan + 2u]; \n"
@@ -196,8 +196,5 @@ void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_po
   out.Write("  }\n"
             "}\n"
             "\n");
-
-  out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1);
-  out.Write("  %s = %s;\n\n", out_color_1_var, out_color_0_var);
 }
 }
diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp
index ebc9c80f0e..bf24f6afda 100644
--- a/Source/Core/VideoCommon/UberShaderVertex.cpp
+++ b/Source/Core/VideoCommon/UberShaderVertex.cpp
@@ -171,6 +171,19 @@ ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
   if (numTexgen > 0)
     GenVertexShaderTexGens(ApiType, numTexgen, out);
 
+  out.Write("if (xfmem_numColorChans == 0u) {\n");
+  out.Write("  if ((components & %uu) != 0u)\n", VB_HAS_COL0);
+  out.Write("    o.colors_0 = rawcolor0;\n");
+  out.Write("  else\n");
+  out.Write("    o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n");
+  out.Write("}\n");
+  out.Write("if (xfmem_numColorChans < 2u) {\n");
+  out.Write("  if ((components & %uu) != 0u)\n", VB_HAS_COL1);
+  out.Write("    o.colors_0 = rawcolor1;\n");
+  out.Write("  else\n");
+  out.Write("    o.colors_1 = float4(1.0, 1.0, 1.0, 1.0);\n");
+  out.Write("}\n");
+
   // clipPos/w needs to be done in pixel shader, not here
   out.Write("o.clipPos = o.pos;\n");
 
diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp
index a02f801896..d6f65b12b4 100644
--- a/Source/Core/VideoCommon/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/VertexShaderGen.cpp
@@ -239,24 +239,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
             "float3 ldir, h, cosAttn, distAttn;\n"
             "float dist, dist2, attn;\n");
 
-  if (uid_data->numColorChans == 0)
-  {
-    if (uid_data->components & VB_HAS_COL0)
-      out.Write("o.colors_0 = rawcolor0;\n");
-    else
-      out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
-  }
-
-  GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, uid_data->numColorChans,
-                             "rawcolor", "o.colors_");
-
-  if (uid_data->numColorChans < 2)
-  {
-    if (uid_data->components & VB_HAS_COL1)
-      out.Write("o.colors_1 = rawcolor1;\n");
-    else
-      out.Write("o.colors_1 = o.colors_0;\n");
-  }
+  GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, "rawcolor",
+                             "o.colors_");
 
   // transform texcoords
   out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
@@ -398,6 +382,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
     out.Write("}\n");
   }
 
+  if (uid_data->numColorChans == 0)
+  {
+    if (uid_data->components & VB_HAS_COL0)
+      out.Write("o.colors_0 = rawcolor0;\n");
+    else
+      out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
+  }
+  if (uid_data->numColorChans < 2)
+  {
+    if (uid_data->components & VB_HAS_COL1)
+      out.Write("o.colors_1 = rawcolor1;\n");
+    else
+      out.Write("o.colors_1 = o.colors_0;\n");
+  }
+
   // clipPos/w needs to be done in pixel shader, not here
   out.Write("o.clipPos = o.pos;\n");