mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-03-12 14:46:49 +01:00
JIT a few more instructions. Very minor speed boost in a few games.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@961 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
f77624147d
commit
5247f6661b
@ -148,6 +148,8 @@ fnegx
|
|||||||
frspx
|
frspx
|
||||||
frsqrtex
|
frsqrtex
|
||||||
ps_sum0
|
ps_sum0
|
||||||
|
ps_muls0
|
||||||
|
ps_adds1
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -96,6 +96,7 @@ namespace Jit64
|
|||||||
|
|
||||||
void addx(UGeckoInstruction inst);
|
void addx(UGeckoInstruction inst);
|
||||||
void orx(UGeckoInstruction inst);
|
void orx(UGeckoInstruction inst);
|
||||||
|
void xorx(UGeckoInstruction inst);
|
||||||
void andx(UGeckoInstruction inst);
|
void andx(UGeckoInstruction inst);
|
||||||
void mulli(UGeckoInstruction inst);
|
void mulli(UGeckoInstruction inst);
|
||||||
void mulhwux(UGeckoInstruction inst);
|
void mulhwux(UGeckoInstruction inst);
|
||||||
@ -120,6 +121,7 @@ namespace Jit64
|
|||||||
void mtmsr(UGeckoInstruction inst);
|
void mtmsr(UGeckoInstruction inst);
|
||||||
void mfmsr(UGeckoInstruction inst);
|
void mfmsr(UGeckoInstruction inst);
|
||||||
void mftb(UGeckoInstruction inst);
|
void mftb(UGeckoInstruction inst);
|
||||||
|
void mtcrf(UGeckoInstruction inst);
|
||||||
|
|
||||||
void reg_imm(UGeckoInstruction inst);
|
void reg_imm(UGeckoInstruction inst);
|
||||||
|
|
||||||
@ -130,6 +132,8 @@ namespace Jit64
|
|||||||
void ps_mergeXX(UGeckoInstruction inst);
|
void ps_mergeXX(UGeckoInstruction inst);
|
||||||
void ps_maddXX(UGeckoInstruction inst);
|
void ps_maddXX(UGeckoInstruction inst);
|
||||||
void ps_rsqrte(UGeckoInstruction inst);
|
void ps_rsqrte(UGeckoInstruction inst);
|
||||||
|
void ps_sum(UGeckoInstruction inst);
|
||||||
|
void ps_muls(UGeckoInstruction inst);
|
||||||
|
|
||||||
void fp_arith_s(UGeckoInstruction inst);
|
void fp_arith_s(UGeckoInstruction inst);
|
||||||
|
|
||||||
|
@ -54,6 +54,7 @@ const u8 *fifoDirectWrite8;
|
|||||||
const u8 *fifoDirectWrite16;
|
const u8 *fifoDirectWrite16;
|
||||||
const u8 *fifoDirectWrite32;
|
const u8 *fifoDirectWrite32;
|
||||||
const u8 *fifoDirectWriteFloat;
|
const u8 *fifoDirectWriteFloat;
|
||||||
|
const u8 *fifoDirectWriteXmm64;
|
||||||
|
|
||||||
bool compareEnabled = false;
|
bool compareEnabled = false;
|
||||||
|
|
||||||
@ -308,6 +309,19 @@ void GenFifoFloatWrite()
|
|||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GenFifoXmm64Write()
|
||||||
|
{
|
||||||
|
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
||||||
|
PUSH(ESI);
|
||||||
|
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||||
|
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||||
|
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
|
||||||
|
ADD(32, R(ESI), Imm8(8));
|
||||||
|
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||||
|
POP(ESI);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
void GenerateCommon()
|
void GenerateCommon()
|
||||||
{
|
{
|
||||||
computeRc = AlignCode16();
|
computeRc = AlignCode16();
|
||||||
@ -332,6 +346,8 @@ void GenerateCommon()
|
|||||||
GenFifoWrite(32);
|
GenFifoWrite(32);
|
||||||
fifoDirectWriteFloat = AlignCode4();
|
fifoDirectWriteFloat = AlignCode4();
|
||||||
GenFifoFloatWrite();
|
GenFifoFloatWrite();
|
||||||
|
fifoDirectWriteXmm64 = AlignCode4();
|
||||||
|
GenFifoXmm64Write();
|
||||||
|
|
||||||
computeRcFp = AlignCode16();
|
computeRcFp = AlignCode16();
|
||||||
//CMPSD(R(XMM0), M(&zero),
|
//CMPSD(R(XMM0), M(&zero),
|
||||||
|
@ -39,6 +39,7 @@ namespace Jit64
|
|||||||
extern const u8 *fifoDirectWrite16;
|
extern const u8 *fifoDirectWrite16;
|
||||||
extern const u8 *fifoDirectWrite32;
|
extern const u8 *fifoDirectWrite32;
|
||||||
extern const u8 *fifoDirectWriteFloat;
|
extern const u8 *fifoDirectWriteFloat;
|
||||||
|
extern const u8 *fifoDirectWriteXmm64;
|
||||||
|
|
||||||
extern bool compareEnabled;
|
extern bool compareEnabled;
|
||||||
void Generate();
|
void Generate();
|
||||||
|
@ -101,7 +101,19 @@ namespace Jit64
|
|||||||
int d = inst.RD, a = inst.RA, s = inst.RS;
|
int d = inst.RD, a = inst.RA, s = inst.RS;
|
||||||
switch (inst.OPCD)
|
switch (inst.OPCD)
|
||||||
{
|
{
|
||||||
case 14: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD); break; //addi
|
case 14: // addi
|
||||||
|
// occasionally used as MOV - emulate, with immediate propagation
|
||||||
|
if (gpr.R(a).IsImm() && d != a && a != 0) {
|
||||||
|
gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16);
|
||||||
|
} else if (inst.SIMM_16 == 0 && d != a && a != 0) {
|
||||||
|
gpr.Lock(a);
|
||||||
|
gpr.LoadToX64(d, false, true);
|
||||||
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
|
gpr.UnlockAll();
|
||||||
|
} else {
|
||||||
|
regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD); //addi
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, ADD); break; //addis
|
case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, ADD); break; //addis
|
||||||
case 24:
|
case 24:
|
||||||
if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop
|
if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop
|
||||||
@ -292,6 +304,39 @@ namespace Jit64
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB];
|
||||||
|
void xorx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
#ifdef JIT_OFF_OPTIONS
|
||||||
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||||
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
#endif
|
||||||
|
INSTRUCTION_START;
|
||||||
|
int a = inst.RA;
|
||||||
|
int s = inst.RS;
|
||||||
|
int b = inst.RB;
|
||||||
|
|
||||||
|
if (s == b) {
|
||||||
|
gpr.SetImmediate32(a, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gpr.LoadToX64(a, a == s || a == b, true);
|
||||||
|
gpr.Lock(a, s, b);
|
||||||
|
MOV(32, R(EAX), gpr.R(s));
|
||||||
|
XOR(32, R(EAX), gpr.R(b));
|
||||||
|
MOV(32, gpr.R(a), R(EAX));
|
||||||
|
gpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst.Rc)
|
||||||
|
{
|
||||||
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
|
CALL((u8*)Asm::computeRc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void andx(UGeckoInstruction inst)
|
void andx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
#ifdef JIT_OFF_OPTIONS
|
#ifdef JIT_OFF_OPTIONS
|
||||||
|
@ -91,7 +91,7 @@ namespace Jit64
|
|||||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||||
// Will give nice boost to dual core mode
|
// Will give nice boost to dual core mode
|
||||||
// (mb2): I agree,
|
// (mb2): I agree,
|
||||||
// IMHO those Idles should be always skipped and replaced by a more controlable "native" Idle methode
|
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
|
||||||
// ... maybe the throttle one already do that :p
|
// ... maybe the throttle one already do that :p
|
||||||
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
|
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
|
||||||
if (Core::GetStartupParameter().bSkipIdle &&
|
if (Core::GetStartupParameter().bSkipIdle &&
|
||||||
@ -100,17 +100,16 @@ namespace Jit64
|
|||||||
Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 &&
|
Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 &&
|
||||||
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
|
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
|
||||||
{
|
{
|
||||||
|
gpr.Flush(FLUSH_ALL);
|
||||||
gpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
fpr.Flush(FLUSH_ALL);
|
if (Core::GetStartupParameter().bUseDualCore)
|
||||||
if (Core::GetStartupParameter().bUseDualCore)
|
CALL((void *)&PowerPC::OnIdleDC);
|
||||||
CALL((void *)&PowerPC::OnIdleDC);
|
else
|
||||||
else
|
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
||||||
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC + 12));
|
||||||
MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC + 12));
|
JMP(Asm::testExceptions, true);
|
||||||
JMP(Asm::testExceptions, true);
|
js.compilerPC += 8;
|
||||||
js.compilerPC += 8;
|
return;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
@ -236,7 +235,7 @@ namespace Jit64
|
|||||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gpr.R(a).IsImm() && !update)
|
if (gpr.R(a).IsImm())
|
||||||
{
|
{
|
||||||
// If we already know the address through constant folding, we can do some
|
// If we already know the address through constant folding, we can do some
|
||||||
// fun tricks...
|
// fun tricks...
|
||||||
@ -244,6 +243,8 @@ namespace Jit64
|
|||||||
addr += offset;
|
addr += offset;
|
||||||
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
||||||
{
|
{
|
||||||
|
if (offset && update)
|
||||||
|
gpr.SetImmediate32(a, addr);
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
||||||
// INT3();
|
// INT3();
|
||||||
@ -261,6 +262,8 @@ namespace Jit64
|
|||||||
}
|
}
|
||||||
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
||||||
{
|
{
|
||||||
|
if (offset && update)
|
||||||
|
gpr.SetImmediate32(a, addr);
|
||||||
MOV(accessSize, R(EAX), gpr.R(s));
|
MOV(accessSize, R(EAX), gpr.R(s));
|
||||||
BSWAP(accessSize, EAX);
|
BSWAP(accessSize, EAX);
|
||||||
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
||||||
|
@ -52,8 +52,11 @@
|
|||||||
|
|
||||||
namespace Jit64 {
|
namespace Jit64 {
|
||||||
|
|
||||||
double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
|
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
u64 GC_ALIGNED16(temp64);
|
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
|
|
||||||
|
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
|
||||||
|
static u64 GC_ALIGNED16(temp64);
|
||||||
|
|
||||||
// TODO(ector): Improve 64-bit version
|
// TODO(ector): Improve 64-bit version
|
||||||
void WriteDual32(u64 value, u32 address)
|
void WriteDual32(u64 value, u32 address)
|
||||||
@ -183,6 +186,20 @@ void psq_st(UGeckoInstruction inst)
|
|||||||
if (stType == QUANTIZE_FLOAT)
|
if (stType == QUANTIZE_FLOAT)
|
||||||
{
|
{
|
||||||
DISABLE_32BIT;
|
DISABLE_32BIT;
|
||||||
|
|
||||||
|
if (gpr.R(a).IsImm() && !update && cpu_info.bSSSE3)
|
||||||
|
{
|
||||||
|
u32 addr = gpr.R(a).offset + offset;
|
||||||
|
if (addr == 0xCC008000) {
|
||||||
|
// Writing to FIFO. Let's do fast method.
|
||||||
|
CVTPD2PS(XMM0, fpr.R(s));
|
||||||
|
PSHUFB(XMM0, M((void*)&pbswapShuffle2x4));
|
||||||
|
CALL((void*)Asm::fifoDirectWriteXmm64);
|
||||||
|
js.fifoBytesThisBlock += 8;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||||
gpr.Lock(a);
|
gpr.Lock(a);
|
||||||
fpr.Lock(s);
|
fpr.Lock(s);
|
||||||
@ -282,9 +299,6 @@ void psq_st(UGeckoInstruction inst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
|
||||||
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
|
||||||
|
|
||||||
void psq_l(UGeckoInstruction inst)
|
void psq_l(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
#ifdef JIT_OFF_OPTIONS
|
#ifdef JIT_OFF_OPTIONS
|
||||||
|
@ -247,6 +247,92 @@ namespace Jit64
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ps_sum(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
#ifdef JIT_OFF_OPTIONS
|
||||||
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||||
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
#endif
|
||||||
|
INSTRUCTION_START;
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
int c = inst.FC;
|
||||||
|
fpr.Lock(a,b,c,d);
|
||||||
|
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
||||||
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 10:
|
||||||
|
// Do the sum in upper subregisters, merge uppers
|
||||||
|
MOVDDUP(XMM0, fpr.R(a));
|
||||||
|
MOVAPD(XMM1, fpr.R(b));
|
||||||
|
ADDPD(XMM0, R(XMM1));
|
||||||
|
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
||||||
|
MOVAPD(fpr.R(d), XMM0);
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
// Do the sum in lower subregisters, merge lowers
|
||||||
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
|
MOVAPD(XMM1, fpr.R(b));
|
||||||
|
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
||||||
|
ADDPD(XMM0, R(XMM1)); // sum lowers
|
||||||
|
MOVAPD(XMM1, fpr.R(c));
|
||||||
|
UNPCKLPD(XMM1, R(XMM0)); // merge
|
||||||
|
MOVAPD(fpr.R(d), XMM1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
PanicAlert("ps_sum WTF!!!");
|
||||||
|
}
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ps_muls(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
Default(inst); return;
|
||||||
|
#ifdef JIT_OFF_OPTIONS
|
||||||
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||||
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
#endif
|
||||||
|
INSTRUCTION_START;
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int a = inst.FA;
|
||||||
|
int c = inst.FC;
|
||||||
|
fpr.Lock(a, c, d);
|
||||||
|
fpr.LoadToX64(d, d == a || d == c, true);
|
||||||
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 12:
|
||||||
|
// Single multiply scalar high
|
||||||
|
// TODO - faster version for when regs are different
|
||||||
|
MOVAPD(XMM0, fpr.R(c));
|
||||||
|
MOVDDUP(XMM1, fpr.R(a));
|
||||||
|
MULPS(XMM0, R(XMM1));
|
||||||
|
MOVAPD(fpr.R(d), XMM0);
|
||||||
|
break;
|
||||||
|
case 13:
|
||||||
|
// TODO - faster version for when regs are different
|
||||||
|
MOVAPD(XMM0, fpr.R(c));
|
||||||
|
MOVAPD(XMM1, fpr.R(a));
|
||||||
|
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
||||||
|
MULPD(XMM0, R(XMM1)); // sum lowers
|
||||||
|
MOVAPD(fpr.R(d), XMM0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
PanicAlert("ps_muls WTF!!!");
|
||||||
|
}
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
||||||
void ps_mergeXX(UGeckoInstruction inst)
|
void ps_mergeXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
@ -156,5 +156,29 @@ namespace Jit64
|
|||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
mfspr(inst);
|
mfspr(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mtcrf(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
u32 mask = 0;
|
||||||
|
u32 crm = inst.CRM;
|
||||||
|
gpr.FlushLockX(ECX);
|
||||||
|
if (crm == 0xFF) {
|
||||||
|
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||||
|
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
|
} else {
|
||||||
|
//TODO: use lookup table? probably not worth it
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
if (crm & (1 << i))
|
||||||
|
mask |= 0xF << (i*4);
|
||||||
|
}
|
||||||
|
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||||
|
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||||
|
AND(32, R(EAX), Imm32(mask));
|
||||||
|
AND(32, R(ECX), Imm32(~mask));
|
||||||
|
OR(32, R(EAX), R(ECX));
|
||||||
|
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
|
}
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +55,9 @@ void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 o
|
|||||||
SHR(32, R(EAX), Imm8(16));
|
SHR(32, R(EAX), Imm8(16));
|
||||||
}
|
}
|
||||||
if (signExtend && accessSize < 32) {
|
if (signExtend && accessSize < 32) {
|
||||||
MOVSX(32, accessSize, EAX, R(EAX));
|
// For 16-bit, this must be done AFTER the BSWAP.
|
||||||
|
// TODO: bake 8-bit into the original load.
|
||||||
|
MOVSX(32, accessSize, EAX, R(EAX));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,6 +77,7 @@ void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
|
|||||||
case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
|
case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
|
||||||
}
|
}
|
||||||
if (signExtend && accessSize < 32) {
|
if (signExtend && accessSize < 32) {
|
||||||
|
// Need to sign extend values coming from the Read_U* functions.
|
||||||
MOVSX(32, accessSize, EAX, R(EAX));
|
MOVSX(32, accessSize, EAX, R(EAX));
|
||||||
}
|
}
|
||||||
SetJumpTarget(arg2);
|
SetJumpTarget(arg2);
|
||||||
@ -111,7 +114,7 @@ void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 of
|
|||||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
|
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
|
||||||
{
|
{
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
|
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
|
||||||
#else
|
#else
|
||||||
MOV(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), arg);
|
MOV(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), arg);
|
||||||
#endif
|
#endif
|
||||||
|
@ -33,4 +33,4 @@ void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
|||||||
void ForceSinglePrecisionS(X64Reg xmm);
|
void ForceSinglePrecisionS(X64Reg xmm);
|
||||||
void ForceSinglePrecisionP(X64Reg xmm);
|
void ForceSinglePrecisionP(X64Reg xmm);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -659,7 +659,8 @@ void FindFunctions(u32 startAddr, u32 endAddr, SymbolDB *func_db)
|
|||||||
//Step 2:
|
//Step 2:
|
||||||
func_db->FillInCallers();
|
func_db->FillInCallers();
|
||||||
|
|
||||||
int numLeafs = 0, numNice = 0, numUnNice = 0, numTimer=0, numRFI=0, numStraightLeaf=0;
|
int numLeafs = 0, numNice = 0, numUnNice = 0;
|
||||||
|
int numTimer = 0, numRFI = 0, numStraightLeaf = 0;
|
||||||
int leafSize = 0, niceSize = 0, unniceSize = 0;
|
int leafSize = 0, niceSize = 0, unniceSize = 0;
|
||||||
for (SymbolDB::XFuncMap::iterator iter = func_db->GetIterator(); iter != func_db->End(); iter++)
|
for (SymbolDB::XFuncMap::iterator iter = func_db->GetIterator(); iter != func_db->End(); iter++)
|
||||||
{
|
{
|
||||||
|
@ -224,10 +224,10 @@ GekkoOPTemplate table4[] =
|
|||||||
|
|
||||||
GekkoOPTemplate table4_2[] =
|
GekkoOPTemplate table4_2[] =
|
||||||
{
|
{
|
||||||
{10, Interpreter::ps_sum0, Jit64::Default, {"ps_sum0", OPTYPE_PS, 0}},
|
{10, Interpreter::ps_sum0, Jit64::ps_sum, {"ps_sum0", OPTYPE_PS, 0}},
|
||||||
{11, Interpreter::ps_sum1, Jit64::Default, {"ps_sum1", OPTYPE_PS, 0}},
|
{11, Interpreter::ps_sum1, Jit64::ps_sum, {"ps_sum1", OPTYPE_PS, 0}},
|
||||||
{12, Interpreter::ps_muls0, Jit64::Default, {"ps_muls0", OPTYPE_PS, 0}},
|
{12, Interpreter::ps_muls0, Jit64::ps_muls, {"ps_muls0", OPTYPE_PS, 0}},
|
||||||
{13, Interpreter::ps_muls1, Jit64::Default, {"ps_muls1", OPTYPE_PS, 0}},
|
{13, Interpreter::ps_muls1, Jit64::ps_muls, {"ps_muls1", OPTYPE_PS, 0}},
|
||||||
{14, Interpreter::ps_madds0, Jit64::Default, {"ps_madds0", OPTYPE_PS, 0}},
|
{14, Interpreter::ps_madds0, Jit64::Default, {"ps_madds0", OPTYPE_PS, 0}},
|
||||||
{15, Interpreter::ps_madds1, Jit64::Default, {"ps_madds1", OPTYPE_PS, 0}},
|
{15, Interpreter::ps_madds1, Jit64::Default, {"ps_madds1", OPTYPE_PS, 0}},
|
||||||
{18, Interpreter::ps_div, Jit64::ps_arith, {"ps_div", OPTYPE_PS, 0, 16}},
|
{18, Interpreter::ps_div, Jit64::ps_arith, {"ps_div", OPTYPE_PS, 0, 16}},
|
||||||
@ -278,7 +278,7 @@ GekkoOPTemplate table31[] =
|
|||||||
{60, Interpreter::andcx, Jit64::Default, {"andcx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{60, Interpreter::andcx, Jit64::Default, {"andcx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
{444, Interpreter::orx, Jit64::orx, {"orx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{444, Interpreter::orx, Jit64::orx, {"orx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
{124, Interpreter::norx, Jit64::Default, {"norx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{124, Interpreter::norx, Jit64::Default, {"norx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
{316, Interpreter::xorx, Jit64::Default, {"xorx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{316, Interpreter::xorx, Jit64::xorx, {"xorx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_IN_AB | FL_OUT_S | FL_RC_BIT}},
|
||||||
@ -359,7 +359,7 @@ GekkoOPTemplate table31[] =
|
|||||||
|
|
||||||
{19, Interpreter::mfcr, Jit64::Default, {"mfcr", OPTYPE_SYSTEM, 0}},
|
{19, Interpreter::mfcr, Jit64::Default, {"mfcr", OPTYPE_SYSTEM, 0}},
|
||||||
{83, Interpreter::mfmsr, Jit64::mfmsr, {"mfmsr", OPTYPE_SYSTEM, 0}},
|
{83, Interpreter::mfmsr, Jit64::mfmsr, {"mfmsr", OPTYPE_SYSTEM, 0}},
|
||||||
{144, Interpreter::mtcrf, Jit64::Default, {"mtcrf", OPTYPE_SYSTEM, 0}},
|
{144, Interpreter::mtcrf, Jit64::mtcrf, {"mtcrf", OPTYPE_SYSTEM, 0}},
|
||||||
{146, Interpreter::mtmsr, Jit64::mtmsr, {"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}},
|
{146, Interpreter::mtmsr, Jit64::mtmsr, {"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}},
|
||||||
{210, Interpreter::mtsr, Jit64::Default, {"mtsr", OPTYPE_SYSTEM, 0}},
|
{210, Interpreter::mtsr, Jit64::Default, {"mtsr", OPTYPE_SYSTEM, 0}},
|
||||||
{242, Interpreter::mtsrin, Jit64::Default, {"mtsrin", OPTYPE_SYSTEM, 0}},
|
{242, Interpreter::mtsrin, Jit64::Default, {"mtsrin", OPTYPE_SYSTEM, 0}},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user