mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-11 08:39:13 +01:00
Merge pull request #1100 from FioraAeterna/psq_insts
JIT: implement remaining psq_l/st instruction variants
This commit is contained in:
commit
7cc586d615
@ -267,7 +267,8 @@ union UGeckoInstruction
|
|||||||
// paired single quantized load/store
|
// paired single quantized load/store
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
u32 : 7;
|
u32 : 1;
|
||||||
|
u32 SUBOP6 : 6;
|
||||||
// Graphics quantization register to use
|
// Graphics quantization register to use
|
||||||
u32 Ix : 3;
|
u32 Ix : 3;
|
||||||
// 0: paired single, 1: scalar
|
// 0: paired single, 1: scalar
|
||||||
|
@ -220,8 +220,8 @@ public:
|
|||||||
void lfXXX(UGeckoInstruction inst);
|
void lfXXX(UGeckoInstruction inst);
|
||||||
void stfXXX(UGeckoInstruction inst);
|
void stfXXX(UGeckoInstruction inst);
|
||||||
void stfiwx(UGeckoInstruction inst);
|
void stfiwx(UGeckoInstruction inst);
|
||||||
void psq_l(UGeckoInstruction inst);
|
void psq_lXX(UGeckoInstruction inst);
|
||||||
void psq_st(UGeckoInstruction inst);
|
void psq_stXX(UGeckoInstruction inst);
|
||||||
|
|
||||||
void fmaddXX(UGeckoInstruction inst);
|
void fmaddXX(UGeckoInstruction inst);
|
||||||
void fsign(UGeckoInstruction inst);
|
void fsign(UGeckoInstruction inst);
|
||||||
|
@ -92,10 +92,10 @@ static GekkoOPTemplate primarytable[] =
|
|||||||
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||||
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||||
|
|
||||||
{56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
{56, &Jit64::psq_lXX}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
||||||
{57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
{57, &Jit64::psq_lXX}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||||
{60, &Jit64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}},
|
{60, &Jit64::psq_stXX}, //"psq_st", OPTYPE_PS, FL_IN_A}},
|
||||||
{61, &Jit64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
{61, &Jit64::psq_stXX}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||||
|
|
||||||
//missing: 0, 5, 6, 9, 22, 30, 62, 58
|
//missing: 0, 5, 6, 9, 22, 30, 62, 58
|
||||||
{0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
|
{0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
|
||||||
@ -150,10 +150,10 @@ static GekkoOPTemplate table4_2[] =
|
|||||||
|
|
||||||
static GekkoOPTemplate table4_3[] =
|
static GekkoOPTemplate table4_3[] =
|
||||||
{
|
{
|
||||||
{6, &Jit64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
|
{6, &Jit64::psq_lXX}, //"psq_lx", OPTYPE_PS, 0}},
|
||||||
{7, &Jit64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}},
|
{7, &Jit64::psq_stXX}, //"psq_stx", OPTYPE_PS, 0}},
|
||||||
{38, &Jit64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}},
|
{38, &Jit64::psq_lXX}, //"psq_lux", OPTYPE_PS, 0}},
|
||||||
{39, &Jit64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}},
|
{39, &Jit64::psq_stXX}, //"psq_stux", OPTYPE_PS, 0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
static GekkoOPTemplate table19[] =
|
static GekkoOPTemplate table19[] =
|
||||||
|
@ -16,33 +16,41 @@ using namespace Gen;
|
|||||||
|
|
||||||
// The big problem is likely instructions that set the quantizers in the same block.
|
// The big problem is likely instructions that set the quantizers in the same block.
|
||||||
// We will have to break block after quantizers are written to.
|
// We will have to break block after quantizers are written to.
|
||||||
void Jit64::psq_st(UGeckoInstruction inst)
|
void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStorePairedOff);
|
JITDISABLE(bJITLoadStorePairedOff);
|
||||||
FALLBACK_IF(!inst.RA);
|
FALLBACK_IF(!inst.RA);
|
||||||
|
|
||||||
s32 offset = inst.SIMM_12;
|
s32 offset = inst.SIMM_12;
|
||||||
bool update = inst.OPCD == 61 && offset;
|
bool indexed = inst.OPCD == 4;
|
||||||
|
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
int s = inst.RS;
|
int b = indexed ? inst.RB : a;
|
||||||
|
int s = inst.FS;
|
||||||
|
|
||||||
|
gpr.Lock(a, b);
|
||||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
if (update)
|
if (update)
|
||||||
gpr.BindToRegister(a, true, true);
|
gpr.BindToRegister(a, true, true);
|
||||||
fpr.BindToRegister(s, true, false);
|
fpr.BindToRegister(s, true, false);
|
||||||
if (offset && gpr.R(a).IsSimpleReg())
|
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
|
||||||
{
|
{
|
||||||
|
if (indexed)
|
||||||
|
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||||
|
else
|
||||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
|
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
|
||||||
if (offset)
|
if (indexed)
|
||||||
|
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
|
||||||
|
else if (offset)
|
||||||
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
||||||
}
|
}
|
||||||
// In memcheck mode, don't update the address until the exception check
|
// In memcheck mode, don't update the address until the exception check
|
||||||
if (update && offset && !js.memcheck)
|
if (update && !js.memcheck)
|
||||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||||
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
|
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
|
||||||
// Hence, we need to mask out the unused bits. The layout of the GQR register is
|
// Hence, we need to mask out the unused bits. The layout of the GQR register is
|
||||||
@ -67,9 +75,12 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update && offset && js.memcheck)
|
if (update && js.memcheck)
|
||||||
{
|
{
|
||||||
MEMCHECK_START(false)
|
MEMCHECK_START(false)
|
||||||
|
if (indexed)
|
||||||
|
ADD(32, gpr.R(a), gpr.R(b));
|
||||||
|
else
|
||||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
}
|
}
|
||||||
@ -77,45 +88,59 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::psq_l(UGeckoInstruction inst)
|
void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStorePairedOff);
|
JITDISABLE(bJITLoadStorePairedOff);
|
||||||
FALLBACK_IF(!inst.RA);
|
FALLBACK_IF(!inst.RA);
|
||||||
|
|
||||||
s32 offset = inst.SIMM_12;
|
s32 offset = inst.SIMM_12;
|
||||||
bool update = inst.OPCD == 57 && offset;
|
bool indexed = inst.OPCD == 4;
|
||||||
|
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
int s = inst.RS;
|
int b = indexed ? inst.RB : a;
|
||||||
|
int s = inst.FS;
|
||||||
|
|
||||||
|
gpr.Lock(a, b);
|
||||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
gpr.BindToRegister(a, true, update && offset);
|
gpr.BindToRegister(a, true, update);
|
||||||
fpr.BindToRegister(s, false, true);
|
fpr.BindToRegister(s, false, true);
|
||||||
if (offset && gpr.R(a).IsSimpleReg())
|
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
|
||||||
{
|
{
|
||||||
|
if (indexed)
|
||||||
|
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||||
|
else
|
||||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
|
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
|
||||||
if (offset)
|
if (indexed)
|
||||||
|
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
|
||||||
|
else if (offset)
|
||||||
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
||||||
}
|
}
|
||||||
// In memcheck mode, don't update the address until the exception check
|
// In memcheck mode, don't update the address until the exception check
|
||||||
if (update && offset && !js.memcheck)
|
if (update && !js.memcheck)
|
||||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||||
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
|
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
|
||||||
AND(32, R(RSCRATCH2), M(((char *)&GQR(inst.I)) + 2));
|
|
||||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
|
||||||
if (inst.W)
|
|
||||||
OR(32, R(RSCRATCH), Imm8(8));
|
|
||||||
|
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
|
// Get the high part of the GQR register
|
||||||
|
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + inst.I]);
|
||||||
|
gqr.offset += 2;
|
||||||
|
|
||||||
|
AND(32, R(RSCRATCH2), gqr);
|
||||||
|
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||||
|
|
||||||
|
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[inst.W * 8])));
|
||||||
|
|
||||||
MEMCHECK_START(false)
|
MEMCHECK_START(false)
|
||||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||||
if (update && offset && js.memcheck)
|
if (update && js.memcheck)
|
||||||
{
|
{
|
||||||
|
if (indexed)
|
||||||
|
ADD(32, gpr.R(a), gpr.R(b));
|
||||||
|
else
|
||||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||||
}
|
}
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
|
Loading…
x
Reference in New Issue
Block a user