From 46c3a11d01d37ee7c5ec89d046c50622d1854f9a Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 7 Jun 2015 19:53:40 -0500 Subject: [PATCH] [AArch64] Optimize gatherpipe stores This can be even more optimized in the future when the register cache supports constant caching. --- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index f86f02ff63..8e4aaf59e9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -360,10 +360,17 @@ void JitArm64::stfXX(UGeckoInstruction inst) ARM64Reg XA = EncodeRegTo64(addr_reg); - if (is_immediate) + if (is_immediate && !(jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))) + { MOVI2R(XA, imm_addr); - if (update) + if (update) + { + gpr.BindToRegister(a, false); + MOV(gpr.R(a), addr_reg); + } + } + else if (!is_immediate && update) { gpr.BindToRegister(a, false); MOV(gpr.R(a), addr_reg); @@ -386,25 +393,44 @@ void JitArm64::stfXX(UGeckoInstruction inst) else accessSize = 32; - MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount); - MOVI2R(X1, (u64)GPFifo::m_gatherPipe); - LDR(INDEX_UNSIGNED, W0, X30, 0); - ADD(X1, X1, X0); + u64 base_ptr = std::min((u64)&GPFifo::m_gatherPipeCount, (u64)&GPFifo::m_gatherPipe); + u32 count_off = (u64)&GPFifo::m_gatherPipeCount - base_ptr; + u32 pipe_off = (u64)&GPFifo::m_gatherPipe - base_ptr; + + MOVI2R(X30, base_ptr); + + if (pipe_off) + ADD(X1, X30, pipe_off); + + LDR(INDEX_UNSIGNED, W0, X30, count_off); if (accessSize == 64) { m_float_emit.REV64(8, Q0, V0); - m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0); + if (pipe_off) + m_float_emit.STR(64, Q0, X1, ArithOption(X0)); + else + m_float_emit.STR(64, Q0, X30, ArithOption(X0)); } else if (accessSize == 32) { m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0)); m_float_emit.REV32(8, D0, D0); - m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0); + if (pipe_off) + m_float_emit.STR(32, D0, X1, ArithOption(X0)); + else + m_float_emit.STR(32, D0, X30, ArithOption(X0)); + } ADD(W0, W0, accessSize >> 3); - STR(INDEX_UNSIGNED, W0, X30, 0); + STR(INDEX_UNSIGNED, W0, X30, count_off); jit->js.fifoBytesThisBlock += accessSize >> 3; + if (update) + { + // Chance of this happening is fairly low, but support it + gpr.BindToRegister(a, false); + MOVI2R(gpr.R(a), imm_addr); + } } else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) {