From 39a8645ffce5204b248dccc979d47a21687d37df Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 17 Sep 2013 22:07:57 +0000 Subject: [PATCH] [ARM] Fix the FPR cache to not have to dump registers after every instruction. Add mullwox instruction. --- Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp | 1 - .../Src/PowerPC/JitArm32/JitArm_Integer.cpp | 3 +++ .../Src/PowerPC/JitArm32/JitArm_Tables.cpp | 2 +- .../Core/Src/PowerPC/JitArm32/JitFPRCache.cpp | 24 +++++++------------ 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp index c7cdb81d21..8b11840682 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp @@ -483,7 +483,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo BKPT(0x7777); } JitArmTables::CompileInstruction(ops[i]); - fpr.Flush(); if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) { // Don't do this yet diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp index 38a6f0c537..3aeea58e50 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp @@ -249,6 +249,7 @@ void JitArm::arith(UGeckoInstruction inst) isUnsigned = true; case 235: // mullwx case 266: + case 747: // mullwox case 778: // both addx if (gpr.IsImm(a)) { @@ -325,6 +326,7 @@ void JitArm::arith(UGeckoInstruction inst) gpr.SetImmediate(a, ~Or(Imm[0], Imm[1])); dest = a; break; + case 747: case 235: gpr.SetImmediate(d, Mul(Imm[0], Imm[1])); break; @@ -475,6 +477,7 @@ void JitArm::arith(UGeckoInstruction inst) ORR(RA, RS, RB); MVNS(RA, RA); break; + case 747: case 235: RD = gpr.R(d); RA = gpr.R(a); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index 514cce035b..675907d199 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -320,7 +320,7 @@ static GekkoOPTemplate table31_2[] = {75, &JitArm::Default}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {11, &JitArm::mulhwux}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {235, &JitArm::arith}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, - {747, &JitArm::Default}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {747, &JitArm::arith}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, {104, &JitArm::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {40, &JitArm::arith}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {552, &JitArm::arith}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitFPRCache.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitFPRCache.cpp index 802f767f72..27363112fc 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitFPRCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitFPRCache.cpp @@ -53,8 +53,8 @@ ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count) // the ppc side. static ARMReg allocationOrder[] = { - D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, - D14, D15, D16, D17, D18, D19, D20, D21, D22, + D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, + D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 }; count = sizeof(allocationOrder) / sizeof(const int); @@ -126,17 +126,11 @@ bool ArmFPRCache::FindFreeRegister(u32 ®index) ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad) { u32 lastRegIndex = GetLeastUsedRegister(true); - + if (_regs[preg][PS1].GetType() != REG_NOTLOADED) { u8 a = _regs[preg][PS1].GetRegIndex(); ArmCRegs[a].LastLoad = 0; - if (_regs[preg][PS1].GetType() == REG_AWAY && preLoad) - { - s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); - emit->VLDR(ArmCRegs[a].Reg, R9, offset); - _regs[preg][PS1].LoadToReg(a); - } return ArmCRegs[a].Reg; } @@ -144,12 +138,13 @@ ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad) if (FindFreeRegister(regindex)) { s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); - emit->VLDR(ArmCRegs[regindex].Reg, R9, offset); ArmCRegs[regindex].PPCReg = preg; ArmCRegs[regindex].LastLoad = 0; + ArmCRegs[regindex].PS1 = PS1; _regs[preg][PS1].LoadToReg(regindex); + emit->VLDR(ArmCRegs[regindex].Reg, R9, offset); return ArmCRegs[regindex].Reg; } @@ -158,16 +153,15 @@ ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad) s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld); - emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew); - _regs[ArmCRegs[lastRegIndex].PPCReg][PS1].Flush(); + _regs[ArmCRegs[lastRegIndex].PPCReg][ArmCRegs[lastRegIndex].PS1].Flush(); ArmCRegs[lastRegIndex].PPCReg = preg; ArmCRegs[lastRegIndex].LastLoad = 0; ArmCRegs[lastRegIndex].PS1 = PS1; _regs[preg][PS1].LoadToReg(lastRegIndex); - + emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew); return ArmCRegs[lastRegIndex].Reg; } @@ -185,7 +179,7 @@ void ArmFPRCache::Flush() { for (u8 a = 0; a < 32; ++a) { - if (_regs[a][0].GetType() == REG_REG) + if (_regs[a][0].GetType() != REG_NOTLOADED) { s16 offset = PPCSTATE_OFF(ps) + (a * 16); u32 regindex = _regs[a][0].GetRegIndex(); @@ -195,7 +189,7 @@ void ArmFPRCache::Flush() ArmCRegs[regindex].LastLoad = 0; _regs[a][0].Flush(); } - if (_regs[a][1].GetType() == REG_REG) + if (_regs[a][1].GetType() != REG_NOTLOADED) { s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8; u32 regindex = _regs[a][1].GetRegIndex();