From 33568494afe22f241f381c9741f3a355ab8dcba7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 22 Mar 2015 00:31:40 +0100 Subject: [PATCH] Pica/Shader: Add geometry shader definitions. --- .../debugger/graphics_vertex_shader.cpp | 2 +- src/video_core/command_processor.cpp | 78 +++--- src/video_core/pica.h | 224 ++++++++++-------- src/video_core/primitive_assembly.cpp | 3 +- src/video_core/vertex_shader.cpp | 16 +- src/video_core/vertex_shader.h | 2 +- 6 files changed, 169 insertions(+), 156 deletions(-) diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 14d3f8f39..db622d846 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -259,7 +259,7 @@ void GraphicsVertexShaderModel::OnUpdate() for (auto pattern : Pica::g_state.vs.swizzle_data) info.swizzle_info.push_back({pattern}); - info.labels.insert({ Pica::g_state.regs.vs_main_offset, "main" }); + info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" }); endResetModel(); } diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 2a1c885a7..668127055 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -45,7 +45,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) return; - // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value + // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value u32 old_value = regs[id]; regs[id] = (old_value & ~mask) | (value & mask); @@ -242,7 +242,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { &geometry_dumper, _1, _2, _3)); // Send to vertex shader - VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); + VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); if (is_indexed) { // TODO: Add processed vertex to vertex cache! @@ -281,35 +281,35 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; } - case PICA_REG_INDEX(vs_bool_uniforms): + case PICA_REG_INDEX(vs.bool_uniforms): for (unsigned i = 0; i < 16; ++i) - g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; + g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; break; - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { - int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); - auto values = regs.vs_int_uniforms[index]; + int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); + auto values = regs.vs.int_uniforms[index]; g_state.vs.uniforms.i[index] = Math::Vec4(values.x, values.y, values.z, values.w); LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); break; } - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { - auto& uniform_setup = regs.vs_uniform_setup; + auto& uniform_setup = regs.vs.uniform_setup; // TODO: Does actual hardware indeed keep an intermediate buffer or does // it directly write the values? @@ -392,32 +392,32 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } // Load shader program code - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - g_state.vs.program_code[regs.vs_program.offset] = value; - regs.vs_program.offset++; + g_state.vs.program_code[regs.vs.program.offset] = value; + regs.vs.program.offset++; break; } // Load swizzle pattern data - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; - regs.vs_swizzle_patterns.offset++; + g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; + regs.vs.swizzle_patterns.offset++; break; } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 026b10a62..b32db76eb 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -784,112 +784,119 @@ struct Regs { INSERT_PADDING_WORDS(0x20); enum class TriangleTopology : u32 { - List = 0, - Strip = 1, - Fan = 2, - ListIndexed = 3, // TODO: No idea if this is correct + List = 0, + Strip = 1, + Fan = 2, + Shader = 3, // Programmable setup unit implemented in a geometry shader }; BitField<8, 2, TriangleTopology> triangle_topology; - INSERT_PADDING_WORDS(0x51); + INSERT_PADDING_WORDS(0x21); - BitField<0, 16, u32> vs_bool_uniforms; - union { - BitField< 0, 8, u32> x; - BitField< 8, 8, u32> y; - BitField<16, 8, u32> z; - BitField<24, 8, u32> w; - } vs_int_uniforms[4]; - - INSERT_PADDING_WORDS(0x5); - - // Offset to shader program entry point (in words) - BitField<0, 16, u32> vs_main_offset; - - union { - BitField< 0, 4, u64> attribute0_register; - BitField< 4, 4, u64> attribute1_register; - BitField< 8, 4, u64> attribute2_register; - BitField<12, 4, u64> attribute3_register; - BitField<16, 4, u64> attribute4_register; - BitField<20, 4, u64> attribute5_register; - BitField<24, 4, u64> attribute6_register; - BitField<28, 4, u64> attribute7_register; - BitField<32, 4, u64> attribute8_register; - BitField<36, 4, u64> attribute9_register; - BitField<40, 4, u64> attribute10_register; - BitField<44, 4, u64> attribute11_register; - BitField<48, 4, u64> attribute12_register; - BitField<52, 4, u64> attribute13_register; - BitField<56, 4, u64> attribute14_register; - BitField<60, 4, u64> attribute15_register; - - int GetRegisterForAttribute(int attribute_index) const { - u64 fields[] = { - attribute0_register, attribute1_register, attribute2_register, attribute3_register, - attribute4_register, attribute5_register, attribute6_register, attribute7_register, - attribute8_register, attribute9_register, attribute10_register, attribute11_register, - attribute12_register, attribute13_register, attribute14_register, attribute15_register, - }; - return (int)fields[attribute_index]; - } - } vs_input_register_map; - - INSERT_PADDING_WORDS(0x3); - - struct { - enum Format : u32 - { - FLOAT24 = 0, - FLOAT32 = 1 - }; - - bool IsFloat32() const { - return format == FLOAT32; - } + struct ShaderConfig { + BitField<0, 16, u32> bool_uniforms; union { - // Index of the next uniform to write to - // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices - BitField<0, 7, u32> index; + BitField< 0, 8, u32> x; + BitField< 8, 8, u32> y; + BitField<16, 8, u32> z; + BitField<24, 8, u32> w; + } int_uniforms[4]; - BitField<31, 1, Format> format; - }; + INSERT_PADDING_WORDS(0x5); - // Writing to these registers sets the "current" uniform. - // TODO: It's not clear how the hardware stores what the "current" uniform is. - u32 set_value[8]; + // Offset to shader program entry point (in words) + BitField<0, 16, u32> main_offset; - } vs_uniform_setup; + union { + BitField< 0, 4, u64> attribute0_register; + BitField< 4, 4, u64> attribute1_register; + BitField< 8, 4, u64> attribute2_register; + BitField<12, 4, u64> attribute3_register; + BitField<16, 4, u64> attribute4_register; + BitField<20, 4, u64> attribute5_register; + BitField<24, 4, u64> attribute6_register; + BitField<28, 4, u64> attribute7_register; + BitField<32, 4, u64> attribute8_register; + BitField<36, 4, u64> attribute9_register; + BitField<40, 4, u64> attribute10_register; + BitField<44, 4, u64> attribute11_register; + BitField<48, 4, u64> attribute12_register; + BitField<52, 4, u64> attribute13_register; + BitField<56, 4, u64> attribute14_register; + BitField<60, 4, u64> attribute15_register; - INSERT_PADDING_WORDS(0x2); + int GetRegisterForAttribute(int attribute_index) const { + u64 fields[] = { + attribute0_register, attribute1_register, attribute2_register, attribute3_register, + attribute4_register, attribute5_register, attribute6_register, attribute7_register, + attribute8_register, attribute9_register, attribute10_register, attribute11_register, + attribute12_register, attribute13_register, attribute14_register, attribute15_register, + }; + return (int)fields[attribute_index]; + } + } input_register_map; - struct { - // Offset of the next instruction to write code to. - // Incremented with each instruction write. - u32 offset; + // OUTMAP_MASK, 0x28E, CODETRANSFER_END + INSERT_PADDING_WORDS(0x3); - // Writing to these registers sets the "current" word in the shader program. - // TODO: It's not clear how the hardware stores what the "current" word is. - u32 set_word[8]; - } vs_program; + struct { + enum Format : u32 + { + FLOAT24 = 0, + FLOAT32 = 1 + }; - INSERT_PADDING_WORDS(0x1); + bool IsFloat32() const { + return format == FLOAT32; + } - // This register group is used to load an internal table of swizzling patterns, - // which are indexed by each shader instruction to specify vector component swizzling. - struct { - // Offset of the next swizzle pattern to write code to. - // Incremented with each instruction write. - u32 offset; + union { + // Index of the next uniform to write to + // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices + // TODO: Maybe the uppermost index is for the geometry shader? Investigate! + BitField<0, 7, u32> index; - // Writing to these registers sets the "current" swizzle pattern in the table. - // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is. - u32 set_word[8]; - } vs_swizzle_patterns; + BitField<31, 1, Format> format; + }; - INSERT_PADDING_WORDS(0x22); + // Writing to these registers sets the current uniform. + u32 set_value[8]; + + } uniform_setup; + + INSERT_PADDING_WORDS(0x2); + + struct { + // Offset of the next instruction to write code to. + // Incremented with each instruction write. + u32 offset; + + // Writing to these registers sets the "current" word in the shader program. + u32 set_word[8]; + } program; + + INSERT_PADDING_WORDS(0x1); + + // This register group is used to load an internal table of swizzling patterns, + // which are indexed by each shader instruction to specify vector component swizzling. + struct { + // Offset of the next swizzle pattern to write code to. + // Incremented with each instruction write. + u32 offset; + + // Writing to these registers sets the current swizzle pattern in the table. + u32 set_word[8]; + } swizzle_patterns; + + INSERT_PADDING_WORDS(0x2); + }; + + ShaderConfig gs; + ShaderConfig vs; + + INSERT_PADDING_WORDS(0x20); // Map register indices to names readable by humans // Used for debugging purposes, so performance is not an issue here @@ -936,13 +943,20 @@ struct Regs { ADD_FIELD(vs_default_attributes_setup); ADD_FIELD(command_buffer); ADD_FIELD(triangle_topology); - ADD_FIELD(vs_bool_uniforms); - ADD_FIELD(vs_int_uniforms); - ADD_FIELD(vs_main_offset); - ADD_FIELD(vs_input_register_map); - ADD_FIELD(vs_uniform_setup); - ADD_FIELD(vs_program); - ADD_FIELD(vs_swizzle_patterns); + ADD_FIELD(gs.bool_uniforms); + ADD_FIELD(gs.int_uniforms); + ADD_FIELD(gs.main_offset); + ADD_FIELD(gs.input_register_map); + ADD_FIELD(gs.uniform_setup); + ADD_FIELD(gs.program); + ADD_FIELD(gs.swizzle_patterns); + ADD_FIELD(vs.bool_uniforms); + ADD_FIELD(vs.int_uniforms); + ADD_FIELD(vs.main_offset); + ADD_FIELD(vs.input_register_map); + ADD_FIELD(vs.uniform_setup); + ADD_FIELD(vs.program); + ADD_FIELD(vs.swizzle_patterns); #undef ADD_FIELD @@ -1014,17 +1028,14 @@ ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(triangle_topology, 0x25e); -ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); -ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); -ASSERT_REG_POSITION(vs_main_offset, 0x2ba); -ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); -ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); -ASSERT_REG_POSITION(vs_program, 0x2cb); -ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); +ASSERT_REG_POSITION(gs, 0x280); +ASSERT_REG_POSITION(vs, 0x2b0); #undef ASSERT_REG_POSITION #endif // !defined(_MSC_VER) +static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); + // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); @@ -1134,7 +1145,7 @@ struct State { Regs regs; /// Vertex shader memory - struct { + struct ShaderSetup { struct { Math::Vec4 f[96]; std::array b; @@ -1145,7 +1156,10 @@ struct State { std::array program_code; std::array swizzle_data; - } vs; + }; + + ShaderSetup vs; + ShaderSetup gs; /// Current Pica command list struct { diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 0120f2896..2f22bdcce 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -20,8 +20,9 @@ template void PrimitiveAssembler::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) { switch (topology) { + // TODO: Figure out what's different with TriangleTopology::Shader. case Regs::TriangleTopology::List: - case Regs::TriangleTopology::ListIndexed: + case Regs::TriangleTopology::Shader: if (buffer_index < 2) { buffer[buffer_index++] = vtx; } else { diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 87006a832..c00f0c10b 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -546,20 +546,18 @@ static void ProcessShaderCode(VertexShaderState& state) { static Common::Profiling::TimingCategory shader_category("Vertex Shader"); -OutputVertex RunShader(const InputVertex& input, int num_attributes) { +OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { Common::Profiling::ScopeTimer timer(shader_category); - const auto& regs = g_state.regs; - const auto& vs = g_state.vs; VertexShaderState state; - const u32* main = &vs.program_code[regs.vs_main_offset]; + const u32* main = &setup.program_code[config.main_offset]; state.program_counter = (u32*)main; state.debug.max_offset = 0; state.debug.max_opdesc_id = 0; // Setup input register table - const auto& attribute_register_map = regs.vs_input_register_map; + const auto& attribute_register_map = config.input_register_map; float24 dummy_register; boost::fill(state.input_register_table, &dummy_register); @@ -584,16 +582,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { state.conditional_code[1] = false; ProcessShaderCode(state); - DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), - state.debug.max_opdesc_id, regs.vs_main_offset, - regs.vs_output_attributes); + DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), + state.debug.max_opdesc_id, config.main_offset, + g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here // Setup output data OutputVertex ret; // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to // figure out what those circumstances are and enable the remaining outputs then. for (int i = 0; i < 7; ++i) { - const auto& output_register_map = regs.vs_output_attributes[i]; + const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here u32 semantics[4] = { output_register_map.map_x, output_register_map.map_y, diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index c997e6a77..97f9250dd 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -65,7 +65,7 @@ struct OutputVertex { static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -OutputVertex RunShader(const InputVertex& input, int num_attributes); +OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); } // namespace