diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index ba81004e1a..5d333add40 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -267,7 +267,8 @@ union UGeckoInstruction // paired single quantized load/store struct { - u32 : 7; + u32 : 1; + u32 SUBOP6 : 6; // Graphics quantization register to use u32 Ix : 3; // 0: paired single, 1: scalar diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 704a8cf943..15ddc6b954 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -220,8 +220,8 @@ public: void lfXXX(UGeckoInstruction inst); void stfXXX(UGeckoInstruction inst); void stfiwx(UGeckoInstruction inst); - void psq_l(UGeckoInstruction inst); - void psq_st(UGeckoInstruction inst); + void psq_lXX(UGeckoInstruction inst); + void psq_stXX(UGeckoInstruction inst); void fmaddXX(UGeckoInstruction inst); void fsign(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 3aede22215..dd2331e8e9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -92,10 +92,10 @@ static GekkoOPTemplate primarytable[] = {54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, {55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}}, - {57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, - {60, &Jit64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}}, - {61, &Jit64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + {56, &Jit64::psq_lXX}, //"psq_l", OPTYPE_PS, FL_IN_A}}, + {57, &Jit64::psq_lXX}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + {60, &Jit64::psq_stXX}, //"psq_st", OPTYPE_PS, FL_IN_A}}, + {61, &Jit64::psq_stXX}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, //missing: 0, 5, 6, 9, 22, 30, 62, 58 {0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, @@ -150,10 +150,10 @@ static GekkoOPTemplate table4_2[] = static GekkoOPTemplate table4_3[] = { - {6, &Jit64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, - {7, &Jit64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, - {38, &Jit64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, - {39, &Jit64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, + {6, &Jit64::psq_lXX}, //"psq_lx", OPTYPE_PS, 0}}, + {7, &Jit64::psq_stXX}, //"psq_stx", OPTYPE_PS, 0}}, + {38, &Jit64::psq_lXX}, //"psq_lux", OPTYPE_PS, 0}}, + {39, &Jit64::psq_stXX}, //"psq_stux", OPTYPE_PS, 0}}, }; static GekkoOPTemplate table19[] = diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index b207de9ad4..fbe90fb8e7 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -16,33 +16,41 @@ using namespace Gen; // The big problem is likely instructions that set the quantizers in the same block. // We will have to break block after quantizers are written to. -void Jit64::psq_st(UGeckoInstruction inst) +void Jit64::psq_stXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStorePairedOff); FALLBACK_IF(!inst.RA); s32 offset = inst.SIMM_12; - bool update = inst.OPCD == 61 && offset; + bool indexed = inst.OPCD == 4; + bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32); int a = inst.RA; - int s = inst.RS; + int b = indexed ? inst.RB : a; + int s = inst.FS; + gpr.Lock(a, b); gpr.FlushLockX(RSCRATCH_EXTRA); if (update) gpr.BindToRegister(a, true, true); fpr.BindToRegister(s, true, false); - if (offset && gpr.R(a).IsSimpleReg()) + if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset)) { - LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); + if (indexed) + LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + else + LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); } else { MOV(32, R(RSCRATCH_EXTRA), gpr.R(a)); - if (offset) + if (indexed) + ADD(32, R(RSCRATCH_EXTRA), gpr.R(b)); + else if (offset) ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); } // In memcheck mode, don't update the address until the exception check - if (update && offset && !js.memcheck) + if (update && !js.memcheck) MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Hence, we need to mask out the unused bits. The layout of the GQR register is @@ -67,56 +75,73 @@ void Jit64::psq_st(UGeckoInstruction inst) CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); } - if (update && offset && js.memcheck) + if (update && js.memcheck) { MEMCHECK_START(false) - ADD(32, gpr.R(a), Imm32((u32)offset)); + if (indexed) + ADD(32, gpr.R(a), gpr.R(b)); + else + ADD(32, gpr.R(a), Imm32((u32)offset)); MEMCHECK_END } gpr.UnlockAll(); gpr.UnlockAllX(); } -void Jit64::psq_l(UGeckoInstruction inst) +void Jit64::psq_lXX(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITLoadStorePairedOff); FALLBACK_IF(!inst.RA); s32 offset = inst.SIMM_12; - bool update = inst.OPCD == 57 && offset; + bool indexed = inst.OPCD == 4; + bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32); int a = inst.RA; - int s = inst.RS; + int b = indexed ? inst.RB : a; + int s = inst.FS; + gpr.Lock(a, b); gpr.FlushLockX(RSCRATCH_EXTRA); - gpr.BindToRegister(a, true, update && offset); + gpr.BindToRegister(a, true, update); fpr.BindToRegister(s, false, true); - if (offset && gpr.R(a).IsSimpleReg()) + if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset)) { - LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); + if (indexed) + LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + else + LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); } else { MOV(32, R(RSCRATCH_EXTRA), gpr.R(a)); - if (offset) + if (indexed) + ADD(32, R(RSCRATCH_EXTRA), gpr.R(b)); + else if (offset) ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); } // In memcheck mode, don't update the address until the exception check - if (update && offset && !js.memcheck) + if (update && !js.memcheck) MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); MOV(32, R(RSCRATCH2), Imm32(0x3F07)); - AND(32, R(RSCRATCH2), M(((char *)&GQR(inst.I)) + 2)); - MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); - if (inst.W) - OR(32, R(RSCRATCH), Imm8(8)); - CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); + // Get the high part of the GQR register + OpArg gqr = PPCSTATE(spr[SPR_GQR0 + inst.I]); + gqr.offset += 2; + + AND(32, R(RSCRATCH2), gqr); + MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); + + CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[inst.W * 8]))); MEMCHECK_START(false) CVTPS2PD(fpr.RX(s), R(XMM0)); - if (update && offset && js.memcheck) + if (update && js.memcheck) { - ADD(32, gpr.R(a), Imm32((u32)offset)); + if (indexed) + ADD(32, gpr.R(a), gpr.R(b)); + else + ADD(32, gpr.R(a), Imm32((u32)offset)); } MEMCHECK_END