diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 022d8dee3..8612a758f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); } else { g_state.gs.program_code[offset] = value; + g_state.gs.MarkProgramCodeDirty(); offset++; } break; @@ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); } else { g_state.gs.swizzle_data[offset] = value; + g_state.gs.MarkSwizzleDataDirty(); offset++; } break; @@ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); } else { g_state.vs.program_code[offset] = value; + g_state.vs.MarkProgramCodeDirty(); if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { g_state.gs.program_code[offset] = value; + g_state.gs.MarkProgramCodeDirty(); } offset++; } @@ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); } else { g_state.vs.swizzle_data[offset] = value; + g_state.vs.MarkSwizzleDataDirty(); if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { g_state.gs.swizzle_data[offset] = value; + g_state.gs.MarkSwizzleDataDirty(); } offset++; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index dc5072713..86e902a1c 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -12,6 +12,7 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/hash.h" #include "common/vector_math.h" #include "video_core/pica_types.h" #include "video_core/regs_rasterizer.h" @@ -173,27 +174,29 @@ struct GSUnitState : public UnitState { GSEmitter emitter; }; -struct ShaderSetup { - struct { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - alignas(16) Math::Vec4 f[96]; +struct Uniforms { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4 f[96]; - std::array b; - std::array, 4> i; - } uniforms; + std::array b; + std::array, 4> i; static size_t GetFloatUniformOffset(unsigned index) { - return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4); + return offsetof(Uniforms, f) + index * sizeof(Math::Vec4); } static size_t GetBoolUniformOffset(unsigned index) { - return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool); + return offsetof(Uniforms, b) + index * sizeof(bool); } static size_t GetIntUniformOffset(unsigned index) { - return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4); + return offsetof(Uniforms, i) + index * sizeof(Math::Vec4); } +}; + +struct ShaderSetup { + Uniforms uniforms; std::array program_code; std::array swizzle_data; @@ -204,6 +207,36 @@ struct ShaderSetup { /// Used by the JIT, points to a compiled shader object. const void* cached_shader = nullptr; } engine_data; + + void MarkProgramCodeDirty() { + program_code_hash_dirty = true; + } + + void MarkSwizzleDataDirty() { + swizzle_data_hash_dirty = true; + } + + u64 GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; + } + + u64 GetSwizzleDataHash() { + if (swizzle_data_hash_dirty) { + swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); + swizzle_data_hash_dirty = false; + } + return swizzle_data_hash; + } + +private: + bool program_code_hash_dirty = true; + bool swizzle_data_hash_dirty = true; + u64 program_code_hash = 0xDEADC0DE; + u64 swizzle_data_hash = 0xDEADC0DE; }; class ShaderEngine { diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 73c21871c..696fe11da 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/hash.h" #include "common/microprofile.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_jit_x64.h" @@ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.engine_data.entry_point = entry_point; - u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); - u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); + u64 code_hash = setup.GetProgramCodeHash(); + u64 swizzle_hash = setup.GetSwizzleDataHash(); u64 cache_key = code_hash ^ swizzle_hash; auto iter = cache.find(cache_key); diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index f497990f1..06c6f52e5 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -104,7 +104,7 @@ const JitFunction instr_table[64] = { // purposes, as documented below: /// Pointer to the uniform memory -static const Reg64 SETUP = r9; +static const Reg64 UNIFORMS = r9; /// The two 32-bit VS address offset registers set by the MOVA instruction static const Reg64 ADDROFFS_REG_0 = r10; static const Reg64 ADDROFFS_REG_1 = r11; @@ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15; // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed static const BitSet32 persistent_regs = BuildRegSet({ // Pointers to register blocks - SETUP, + UNIFORMS, STATE, // Cached registers ADDROFFS_REG_0, @@ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe size_t src_offset; if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { - src_ptr = SETUP; - src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex()); + src_ptr = UNIFORMS; + src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex()); } else { src_ptr = STATE; src_offset = UnitState::InputOffset(src_reg); @@ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); - cmp(byte[SETUP + offset], 0); + size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); + cmp(byte[UNIFORMS + offset], 0); } BitSet32 JitShader::PersistentCallerSavedRegs() { @@ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) { // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by // 4 bits) to be used as an offset into the 16-byte vector registers later - size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id); - mov(LOOPCOUNT, dword[SETUP + offset]); + size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id); + mov(LOOPCOUNT, dword[UNIFORMS + offset]); mov(LOOPCOUNT_REG, LOOPCOUNT); shr(LOOPCOUNT_REG, 4); and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start @@ -882,7 +882,7 @@ void JitShader::Compile(const std::array* program_ ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16); mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL); - mov(SETUP, ABI_PARAM1); + mov(UNIFORMS, ABI_PARAM1); mov(STATE, ABI_PARAM2); // Zero address/loop registers diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 93e65fe26..923c34f0d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -34,7 +34,7 @@ public: JitShader(); void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const { - program(&setup, &state, instruction_labels[offset].getAddress()); + program(&setup.uniforms, &state, instruction_labels[offset].getAddress()); } void Compile(const std::array* program_code,