From fdf7744a53f829d5488003a18964c27e39ef5813 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 2 Feb 2021 22:17:44 +0100 Subject: [PATCH] JitArm64: Move float conversion code out of EmitBackpatchRoutine This simplifies some of the following commits. It does require an extra register, but hey, we have 32 of them. Something I think would be nice to add to the register cache in the future is the ability to keep both the single and double version of a guest register in two different host registers when that is useful. That way, the extra register we write to here can be read by a later instruction, saving us from having to perform the same conversion again. --- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 31 +--------- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 37 ++++++------ .../JitArm64/JitArm64_LoadStorePaired.cpp | 57 ++++++++++++------- .../Core/PowerPC/JitArmCommon/BackPatch.h | 17 +++--- 4 files changed, 69 insertions(+), 73 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index de3a8bf683..4f1aca8e60 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -61,23 +61,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT) { if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - m_float_emit.FCVT(32, 64, ARM64Reg::D0, RS); - m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0); - m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr); - } - else if (flags & BackPatchInfo::FLAG_SIZE_F32I) { m_float_emit.REV32(8, ARM64Reg::D0, RS); m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr); } else if (flags & BackPatchInfo::FLAG_SIZE_F32X2) - { - m_float_emit.FCVTN(32, ARM64Reg::D0, RS); - m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0); - m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr); - } - else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I) { m_float_emit.REV32(8, ARM64Reg::D0, RS); m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr); @@ -184,37 +172,22 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT) { if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - m_float_emit.FCVT(32, 64, ARM64Reg::D0, RS); - m_float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0); - MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32); - BLR(ARM64Reg::X8); - } - else if (flags & BackPatchInfo::FLAG_SIZE_F32I) { m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32); BLR(ARM64Reg::X8); } else if (flags & BackPatchInfo::FLAG_SIZE_F32X2) - { - m_float_emit.FCVTN(32, ARM64Reg::D0, RS); - m_float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::D0, 0); - ROR(ARM64Reg::X0, ARM64Reg::X0, 32); - MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); - BLR(ARM64Reg::X8); - } - else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I) { m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0); - ROR(ARM64Reg::X0, ARM64Reg::X0, 32); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); + ROR(ARM64Reg::X0, ARM64Reg::X0, 32); BLR(ARM64Reg::X8); } else { - MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0); + MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); BLR(ARM64Reg::X8); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e881551f64..5056c3b1ca 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -189,6 +189,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) u32 a = inst.RA, b = inst.RB; + bool want_single = false; s32 offset = inst.SIMM_16; u32 flags = BackPatchInfo::FLAG_STORE; bool update = false; @@ -200,10 +201,12 @@ void JitArm64::stfXX(UGeckoInstruction inst) switch (inst.SUBOP10) { case 663: // stfsx + want_single = true; flags |= BackPatchInfo::FLAG_SIZE_F32; offset_reg = b; break; case 695: // stfsux + want_single = true; flags |= BackPatchInfo::FLAG_SIZE_F32; update = true; offset_reg = b; @@ -218,16 +221,19 @@ void JitArm64::stfXX(UGeckoInstruction inst) offset_reg = b; break; case 983: // stfiwx - flags |= BackPatchInfo::FLAG_SIZE_F32I; + // This instruction writes the lower 32 bits of a double. want_single must be false + flags |= BackPatchInfo::FLAG_SIZE_F32; offset_reg = b; break; } break; case 53: // stfsu + want_single = true; flags |= BackPatchInfo::FLAG_SIZE_F32; update = true; break; case 52: // stfs + want_single = true; flags |= BackPatchInfo::FLAG_SIZE_F32; break; case 55: // stfdu @@ -242,19 +248,22 @@ void JitArm64::stfXX(UGeckoInstruction inst) u32 imm_addr = 0; bool is_immediate = false; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0); - const bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true); + const bool have_single = fpr.IsSingle(inst.FS, true); - const ARM64Reg V0 = fpr.R(inst.FS, single ? RegType::LowerPairSingle : RegType::LowerPair); + ARM64Reg V0 = + fpr.R(inst.FS, want_single && have_single ? RegType::LowerPairSingle : RegType::LowerPair); - if (single) + if (want_single && !have_single) { - flags &= ~BackPatchInfo::FLAG_SIZE_F32; - flags |= BackPatchInfo::FLAG_SIZE_F32I; + const ARM64Reg single_reg = fpr.GetReg(); + m_float_emit.FCVT(32, 64, EncodeRegToDouble(single_reg), EncodeRegToDouble(V0)); + V0 = single_reg; } + gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); + ARM64Reg addr_reg = ARM64Reg::W1; if (update) @@ -359,19 +368,11 @@ void JitArm64::stfXX(UGeckoInstruction inst) accessSize = 32; LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + if (flags & BackPatchInfo::FLAG_SIZE_F64) - { m_float_emit.REV64(8, ARM64Reg::Q0, V0); - } else if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - m_float_emit.FCVT(32, 64, ARM64Reg::D0, EncodeRegToDouble(V0)); - m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0); - } - else if (flags & BackPatchInfo::FLAG_SIZE_F32I) - { m_float_emit.REV32(8, ARM64Reg::D0, V0); - } m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, ARM64Reg::X0, accessSize >> 3); @@ -399,6 +400,10 @@ void JitArm64::stfXX(UGeckoInstruction inst) { EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use); } + + if (want_single && !have_single) + fpr.Unlock(V0); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 1b4fcc3f85..c3778e330e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -116,13 +116,44 @@ void JitArm64::psq_st(UGeckoInstruction inst) const bool update = inst.OPCD == 61; const s32 offset = inst.SIMM_12; - gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); - const bool single = fpr.IsSingle(inst.RS); + const bool have_single = fpr.IsSingle(inst.RS); + + ARM64Reg VS = fpr.R(inst.RS, have_single ? RegType::Single : RegType::Register); + + if (js.assumeNoPairedQuantize) + { + if (!have_single) + { + const ARM64Reg single_reg = fpr.GetReg(); + + if (inst.W) + m_float_emit.FCVT(32, 64, EncodeRegToDouble(single_reg), EncodeRegToDouble(VS)); + else + m_float_emit.FCVTN(32, EncodeRegToDouble(single_reg), EncodeRegToDouble(VS)); + + VS = single_reg; + } + } + else + { + if (have_single) + { + m_float_emit.ORR(ARM64Reg::D0, VS, VS); + } + else + { + if (inst.W) + m_float_emit.FCVT(32, 64, ARM64Reg::D0, VS); + else + m_float_emit.FCVTN(32, ARM64Reg::D0, VS); + } + } + + gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); const ARM64Reg arm_addr = gpr.R(inst.RA); - const ARM64Reg VS = fpr.R(inst.RS, single ? RegType::Single : RegType::Register); constexpr ARM64Reg scale_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W1; @@ -157,28 +188,13 @@ void JitArm64::psq_st(UGeckoInstruction inst) { u32 flags = BackPatchInfo::FLAG_STORE; - if (single) - flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32I : BackPatchInfo::FLAG_SIZE_F32X2I); - else - flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2); + flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2); EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, fprs_in_use); } else { - if (single) - { - m_float_emit.ORR(ARM64Reg::D0, VS, VS); - } - else - { - if (inst.W) - m_float_emit.FCVT(32, 64, ARM64Reg::D0, VS); - else - m_float_emit.FCVTN(32, ARM64Reg::D0, VS); - } - LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + inst.I)); UBFM(type_reg, scale_reg, 0, 2); // Type UBFM(scale_reg, scale_reg, 8, 13); // Scale @@ -212,6 +228,9 @@ void JitArm64::psq_st(UGeckoInstruction inst) SetJumpTarget(continue1); } + if (js.assumeNoPairedQuantize && !have_single) + fpr.Unlock(VS); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); } diff --git a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h index 58833eb687..a3a9f8b470 100644 --- a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h +++ b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h @@ -16,14 +16,11 @@ struct BackPatchInfo FLAG_SIZE_32 = (1 << 4), FLAG_SIZE_F32 = (1 << 5), FLAG_SIZE_F32X2 = (1 << 6), - FLAG_SIZE_F32X2I = (1 << 7), - FLAG_SIZE_F64 = (1 << 8), - FLAG_REVERSE = (1 << 9), - FLAG_EXTEND = (1 << 10), - FLAG_SIZE_F32I = (1 << 11), - FLAG_ZERO_256 = (1 << 12), - FLAG_MASK_FLOAT = - FLAG_SIZE_F32 | FLAG_SIZE_F32X2 | FLAG_SIZE_F32X2I | FLAG_SIZE_F64 | FLAG_SIZE_F32I, + FLAG_SIZE_F64 = (1 << 7), + FLAG_REVERSE = (1 << 8), + FLAG_EXTEND = (1 << 9), + FLAG_ZERO_256 = (1 << 10), + FLAG_MASK_FLOAT = FLAG_SIZE_F32 | FLAG_SIZE_F32X2 | FLAG_SIZE_F64, }; static u32 GetFlagSize(u32 flags) @@ -34,8 +31,10 @@ struct BackPatchInfo return 16; if (flags & FLAG_SIZE_32) return 32; - if (flags & FLAG_SIZE_F32 || flags & FLAG_SIZE_F32I) + if (flags & FLAG_SIZE_F32) return 32; + if (flags & FLAG_SIZE_F32X2) + return 64; if (flags & FLAG_SIZE_F64) return 64; if (flags & FLAG_ZERO_256)