JIT for frsqrte; not really a large difference, but drops it off a

profile I'm looking at.



git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3671 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-07-04 00:48:09 +00:00
parent c52e4d281e
commit 1b936a4f71
5 changed files with 32 additions and 15 deletions

View File

@ -80,6 +80,11 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
Default(inst); return; Default(inst); return;
} }
if (inst.SUBOP5 != 18 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21 &&
inst.SUBOP5 != 25) {
Default(inst); return;
}
// Only the interpreter has "proper" support for (some) FP flags // Only the interpreter has "proper" support for (some) FP flags
if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) { if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) {
Default(inst); return; Default(inst); return;
@ -91,12 +96,6 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
case 23: //sel
Default(inst);
break;
case 24: //res
Default(inst);
break;
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
default: default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");

View File

@ -124,6 +124,7 @@ enum Opcode {
FSAdd, FSAdd,
FSSub, FSSub,
FSNeg, FSNeg,
FSRSqrt,
FPAdd, FPAdd,
FPMul, FPMul,
FPSub, FPSub,
@ -444,6 +445,9 @@ public:
InstLoc EmitFSNeg(InstLoc op1) { InstLoc EmitFSNeg(InstLoc op1) {
return FoldUOp(FSNeg, op1); return FoldUOp(FSNeg, op1);
} }
InstLoc EmitFSRSqrt(InstLoc op1) {
return FoldUOp(FSRSqrt, op1);
}
InstLoc EmitFDMul(InstLoc op1, InstLoc op2) { InstLoc EmitFDMul(InstLoc op1, InstLoc op2) {
return FoldBiOp(FDMul, op1, op2); return FoldBiOp(FDMul, op1, op2);
} }

View File

@ -430,7 +430,8 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
#ifdef _M_IX86 #ifdef _M_IX86
return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset); return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset);
#else #else
return MComplex(RBX, baseReg, 1, offset + ProfileOffset); LEA(32, EAX, MDisp(baseReg, offset + ProfileOffset));
return MComplex(RBX, EAX, 1, 0);
#endif #endif
} }
return MDisp(baseReg, offset); return MDisp(baseReg, offset);
@ -734,6 +735,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak
case FSMul: case FSMul:
case FSAdd: case FSAdd:
case FSSub: case FSSub:
case FSRSqrt:
case FDMul: case FDMul:
case FDAdd: case FDAdd:
case FDSub: case FDSub:
@ -1370,6 +1372,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak
fregEmitBinInst(RI, I, &Jit64::SUBSS); fregEmitBinInst(RI, I, &Jit64::SUBSS);
break; break;
} }
case FSRSqrt: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);
Jit->RSQRTSS(reg, fregLocForInst(RI, getOp1(I)));
RI.fregs[reg] = I;
fregNormalRegClear(RI, I);
break;
}
case FDMul: { case FDMul: {
if (!thisUsed) break; if (!thisUsed) break;
fregEmitBinInst(RI, I, &Jit64::MULSD); fregEmitBinInst(RI, I, &Jit64::MULSD);

View File

@ -31,24 +31,28 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(FloatingPoint) JITDISABLE(FloatingPoint)
if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 && inst.SUBOP5 != 21)) { if (inst.Rc || (inst.SUBOP5 != 25 && inst.SUBOP5 != 20 &&
inst.SUBOP5 != 21 && inst.SUBOP5 != 26)) {
Default(inst); return; Default(inst); return;
} }
IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA); IREmitter::InstLoc val = ibuild.EmitLoadFReg(inst.FA);
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 25: //mul
val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
break;
case 18: //div
case 20: //sub case 20: //sub
val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFDSub(val, ibuild.EmitLoadFReg(inst.FB));
break; break;
case 21: //add case 21: //add
val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB)); val = ibuild.EmitFDAdd(val, ibuild.EmitLoadFReg(inst.FB));
break; break;
case 23: //sel case 25: //mul
case 24: //res val = ibuild.EmitFDMul(val, ibuild.EmitLoadFReg(inst.FC));
break;
case 26: //rsqrte
val = ibuild.EmitLoadFReg(inst.FB);
val = ibuild.EmitDoubleToSingle(val);
val = ibuild.EmitFSRSqrt(val);
val = ibuild.EmitDupSingleToMReg(val);
break;
default: default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!"); _assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
} }

View File

@ -484,7 +484,7 @@ static GekkoOPTemplate table63_2[] =
{22, Interpreter::fsqrtx, &Jit64::Default, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, {22, Interpreter::fsqrtx, &Jit64::Default, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}},
{23, Interpreter::fselx, &Jit64::Default, {"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, {23, Interpreter::fselx, &Jit64::Default, {"fselx", OPTYPE_FPU, FL_RC_BIT_F}},
{25, Interpreter::fmulx, &Jit64::fp_arith_s, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, {25, Interpreter::fmulx, &Jit64::fp_arith_s, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F}},
{26, Interpreter::frsqrtex,&Jit64::Default, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, {26, Interpreter::frsqrtex,&Jit64::fp_arith_s, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
{28, Interpreter::fmsubx, &Jit64::fmaddXX, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {28, Interpreter::fmsubx, &Jit64::fmaddXX, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}},
{29, Interpreter::fmaddx, &Jit64::fmaddXX, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, {29, Interpreter::fmaddx, &Jit64::fmaddXX, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}},
{30, Interpreter::fnmsubx, &Jit64::fmaddXX, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, {30, Interpreter::fnmsubx, &Jit64::fmaddXX, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}},