Merge pull request #1100 from FioraAeterna/psq_insts

JIT: implement remaining psq_l/st instruction variants
This commit is contained in:
Ryan Houdek 2014-09-19 15:16:44 -05:00
commit 7cc586d615
4 changed files with 61 additions and 35 deletions

View File

@ -267,7 +267,8 @@ union UGeckoInstruction
// paired single quantized load/store // paired single quantized load/store
struct struct
{ {
u32 : 7; u32 : 1;
u32 SUBOP6 : 6;
// Graphics quantization register to use // Graphics quantization register to use
u32 Ix : 3; u32 Ix : 3;
// 0: paired single, 1: scalar // 0: paired single, 1: scalar

View File

@ -220,8 +220,8 @@ public:
void lfXXX(UGeckoInstruction inst); void lfXXX(UGeckoInstruction inst);
void stfXXX(UGeckoInstruction inst); void stfXXX(UGeckoInstruction inst);
void stfiwx(UGeckoInstruction inst); void stfiwx(UGeckoInstruction inst);
void psq_l(UGeckoInstruction inst); void psq_lXX(UGeckoInstruction inst);
void psq_st(UGeckoInstruction inst); void psq_stXX(UGeckoInstruction inst);
void fmaddXX(UGeckoInstruction inst); void fmaddXX(UGeckoInstruction inst);
void fsign(UGeckoInstruction inst); void fsign(UGeckoInstruction inst);

View File

@ -92,10 +92,10 @@ static GekkoOPTemplate primarytable[] =
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, {54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, {55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}}, {56, &Jit64::psq_lXX}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, {57, &Jit64::psq_lXX}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{60, &Jit64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}}, {60, &Jit64::psq_stXX}, //"psq_st", OPTYPE_PS, FL_IN_A}},
{61, &Jit64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, {61, &Jit64::psq_stXX}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
//missing: 0, 5, 6, 9, 22, 30, 62, 58 //missing: 0, 5, 6, 9, 22, 30, 62, 58
{0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, {0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
@ -150,10 +150,10 @@ static GekkoOPTemplate table4_2[] =
static GekkoOPTemplate table4_3[] = static GekkoOPTemplate table4_3[] =
{ {
{6, &Jit64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, {6, &Jit64::psq_lXX}, //"psq_lx", OPTYPE_PS, 0}},
{7, &Jit64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, {7, &Jit64::psq_stXX}, //"psq_stx", OPTYPE_PS, 0}},
{38, &Jit64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, {38, &Jit64::psq_lXX}, //"psq_lux", OPTYPE_PS, 0}},
{39, &Jit64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, {39, &Jit64::psq_stXX}, //"psq_stux", OPTYPE_PS, 0}},
}; };
static GekkoOPTemplate table19[] = static GekkoOPTemplate table19[] =

View File

@ -16,33 +16,41 @@ using namespace Gen;
// The big problem is likely instructions that set the quantizers in the same block. // The big problem is likely instructions that set the quantizers in the same block.
// We will have to break block after quantizers are written to. // We will have to break block after quantizers are written to.
void Jit64::psq_st(UGeckoInstruction inst) void Jit64::psq_stXX(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(!inst.RA); FALLBACK_IF(!inst.RA);
s32 offset = inst.SIMM_12; s32 offset = inst.SIMM_12;
bool update = inst.OPCD == 61 && offset; bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int b = indexed ? inst.RB : a;
int s = inst.FS;
gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA); gpr.FlushLockX(RSCRATCH_EXTRA);
if (update) if (update)
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
fpr.BindToRegister(s, true, false); fpr.BindToRegister(s, true, false);
if (offset && gpr.R(a).IsSimpleReg()) if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{ {
if (indexed)
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
} }
else else
{ {
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a)); MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (offset) if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
} }
// In memcheck mode, don't update the address until the exception check // In memcheck mode, don't update the address until the exception check
if (update && offset && !js.memcheck) if (update && !js.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
// Hence, we need to mask out the unused bits. The layout of the GQR register is // Hence, we need to mask out the unused bits. The layout of the GQR register is
@ -67,9 +75,12 @@ void Jit64::psq_st(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
} }
if (update && offset && js.memcheck) if (update && js.memcheck)
{ {
MEMCHECK_START(false) MEMCHECK_START(false)
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END MEMCHECK_END
} }
@ -77,45 +88,59 @@ void Jit64::psq_st(UGeckoInstruction inst)
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
void Jit64::psq_l(UGeckoInstruction inst) void Jit64::psq_lXX(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(!inst.RA); FALLBACK_IF(!inst.RA);
s32 offset = inst.SIMM_12; s32 offset = inst.SIMM_12;
bool update = inst.OPCD == 57 && offset; bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int b = indexed ? inst.RB : a;
int s = inst.FS;
gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA); gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update && offset); gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true); fpr.BindToRegister(s, false, true);
if (offset && gpr.R(a).IsSimpleReg()) if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{ {
if (indexed)
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
} }
else else
{ {
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a)); MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (offset) if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
} }
// In memcheck mode, don't update the address until the exception check // In memcheck mode, don't update the address until the exception check
if (update && offset && !js.memcheck) if (update && !js.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, R(RSCRATCH2), Imm32(0x3F07)); MOV(32, R(RSCRATCH2), Imm32(0x3F07));
AND(32, R(RSCRATCH2), M(((char *)&GQR(inst.I)) + 2));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
if (inst.W)
OR(32, R(RSCRATCH), Imm8(8));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); // Get the high part of the GQR register
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + inst.I]);
gqr.offset += 2;
AND(32, R(RSCRATCH2), gqr);
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[inst.W * 8])));
MEMCHECK_START(false) MEMCHECK_START(false)
CVTPS2PD(fpr.RX(s), R(XMM0)); CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && offset && js.memcheck) if (update && js.memcheck)
{ {
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
} }
MEMCHECK_END MEMCHECK_END