From 3cc460ab34ac6f818b7269428c0dbd0522729648 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 17 Apr 2018 09:35:43 +0300 Subject: [PATCH 1/2] shader_jit: change passing ShaderSetup to passing uniforms struct into the program We are going to add private memebers to ShaderSetup, which forbids the usage of offsetof. The JIT program only use the uniform part of the setup, so we can just isolate it. --- src/video_core/shader/shader.h | 24 ++++++++++--------- .../shader/shader_jit_x64_compiler.cpp | 18 +++++++------- .../shader/shader_jit_x64_compiler.h | 2 +- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index dc5072713..8b212100c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -173,27 +173,29 @@ struct GSUnitState : public UnitState { GSEmitter emitter; }; -struct ShaderSetup { - struct { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - alignas(16) Math::Vec4 f[96]; +struct Uniforms { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4 f[96]; - std::array b; - std::array, 4> i; - } uniforms; + std::array b; + std::array, 4> i; static size_t GetFloatUniformOffset(unsigned index) { - return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4); + return offsetof(Uniforms, f) + index * sizeof(Math::Vec4); } static size_t GetBoolUniformOffset(unsigned index) { - return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); + return offsetof(Uniforms, b) + index * sizeof(bool); } static size_t GetIntUniformOffset(unsigned index) { - return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4); + return offsetof(Uniforms, i) + index * sizeof(Math::Vec4); } +}; + +struct ShaderSetup { + Uniforms uniforms; std::array program_code; std::array swizzle_data; diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index f497990f1..06c6f52e5 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -104,7 +104,7 @@ const JitFunction instr_table[64] = { // purposes, as documented below: /// Pointer to the uniform memory -static const Reg64 SETUP = r9; +static const Reg64 UNIFORMS = r9; /// The two 32-bit VS address offset registers set by the MOVA instruction static const Reg64 ADDROFFS_REG_0 = r10; static const Reg64 ADDROFFS_REG_1 = r11; @@ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15; // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed static const BitSet32 persistent_regs = BuildRegSet({ // Pointers to register blocks - SETUP, + UNIFORMS, STATE, // Cached registers ADDROFFS_REG_0, @@ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe size_t src_offset; if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { - src_ptr = SETUP; - src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); + src_ptr = UNIFORMS; + src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex()); } else { src_ptr = STATE; src_offset = UnitState::InputOffset(src_reg); @@ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); - cmp(byte[SETUP + offset], 0); + size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); + cmp(byte[UNIFORMS + offset], 0); } BitSet32 JitShader::PersistentCallerSavedRegs() { @@ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) { // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by // 4 bits) to be used as an offset into the 16-byte vector registers later - size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); - mov(LOOPCOUNT, dword[SETUP + offset]); + size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id); + mov(LOOPCOUNT, dword[UNIFORMS + offset]); mov(LOOPCOUNT_REG, LOOPCOUNT); shr(LOOPCOUNT_REG, 4); and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start @@ -882,7 +882,7 @@ void JitShader::Compile(const std::array* program_ ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); - mov(SETUP, ABI_PARAM1); + mov(UNIFORMS, ABI_PARAM1); mov(STATE, ABI_PARAM2); // Zero address/loop registers diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 93e65fe26..923c34f0d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -34,7 +34,7 @@ public: JitShader(); void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { - program(&setup, &state, instruction_labels[offset].getAddress()); + program(&setup.uniforms, &state, instruction_labels[offset].getAddress()); } void Compile(const std::array* program_code, From d52ddd0ec48a0e8c321070158540a2c5a609d9f8 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 17 Apr 2018 09:47:59 +0300 Subject: [PATCH 2/2] shader: avoid recomputing hash for the same program --- src/video_core/command_processor.cpp | 6 +++++ src/video_core/shader/shader.h | 31 ++++++++++++++++++++++++ src/video_core/shader/shader_jit_x64.cpp | 5 ++-- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 022d8dee3..8612a758f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); } else { g_state.gs.program_code[offset] = value; + g_state.gs.MarkProgramCodeDirty(); offset++; } break; @@ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); } else { g_state.gs.swizzle_data[offset] = value; + g_state.gs.MarkSwizzleDataDirty(); offset++; } break; @@ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); } else { g_state.vs.program_code[offset] = value; + g_state.vs.MarkProgramCodeDirty(); if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { g_state.gs.program_code[offset] = value; + g_state.gs.MarkProgramCodeDirty(); } offset++; } @@ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); } else { g_state.vs.swizzle_data[offset] = value; + g_state.vs.MarkSwizzleDataDirty(); if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { g_state.gs.swizzle_data[offset] = value; + g_state.gs.MarkSwizzleDataDirty(); } offset++; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 8b212100c..86e902a1c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/hash.h" #include "common/vector_math.h" #include "video_core/pica_types.h" #include "video_core/regs_rasterizer.h" @@ -206,6 +207,36 @@ struct ShaderSetup { /// Used by the JIT, points to a compiled shader object. const void* cached_shader = nullptr; } engine_data; + + void MarkProgramCodeDirty() { + program_code_hash_dirty = true; + } + + void MarkSwizzleDataDirty() { + swizzle_data_hash_dirty = true; + } + + u64 GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; + } + + u64 GetSwizzleDataHash() { + if (swizzle_data_hash_dirty) { + swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); + swizzle_data_hash_dirty = false; + } + return swizzle_data_hash; + } + +private: + bool program_code_hash_dirty = true; + bool swizzle_data_hash_dirty = true; + u64 program_code_hash = 0xDEADC0DE; + u64 swizzle_data_hash = 0xDEADC0DE; }; class ShaderEngine { diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 73c21871c..696fe11da 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/hash.h" #include "common/microprofile.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_jit_x64.h" @@ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.engine_data.entry_point = entry_point; - u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); - u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); + u64 code_hash = setup.GetProgramCodeHash(); + u64 swizzle_hash = setup.GetSwizzleDataHash(); u64 cache_key = code_hash ^ swizzle_hash; auto iter = cache.find(cache_key);