From ba684cabcd16fb97dbda12f83fca0980fe27c3dc Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sun, 17 Aug 2008 22:26:42 +0000 Subject: [PATCH] some jit updates git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@240 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 + .../Core/Src/PowerPC/Jit64/JitRegCache.cpp | 2 + .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 24 +++++++++- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 12 ++--- .../Core/Src/PowerPC/Jit64/Jit_Paired.cpp | 44 ++++++++++++++++++- Source/Core/Core/Src/PowerPC/PPCTables.cpp | 4 +- Source/Core/Core/Src/PowerPC/PowerPC.cpp | 7 ++- 7 files changed, 82 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index d83a15861d..8cae388832 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -114,6 +114,8 @@ namespace Jit64 void reg_imm(UGeckoInstruction inst); + void ps_sel(UGeckoInstruction inst); + void ps_mr(UGeckoInstruction inst); void ps_sign(UGeckoInstruction inst); //aggregate void ps_arith(UGeckoInstruction inst); //aggregate void ps_mergeXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp index 3b68c10a51..22aa2ac808 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.cpp @@ -149,6 +149,8 @@ namespace Jit64 void RegCache::FlushR(X64Reg reg) { + if (reg >= NUMXREGS) + PanicAlert("Flushing non existent reg"); if (!xregs[reg].free) { StoreFromX64(xregs[reg].ppcReg); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index e5440bf829..c4181c8ecf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -101,7 +101,9 @@ void lfs(UGeckoInstruction inst) void lfd(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; + if (!cpu_info.bSSSE3) { + DISABLE_32BIT; + } int d = inst.RD; int a = inst.RA; if (!a) @@ -117,9 +119,18 @@ void lfd(UGeckoInstruction inst) fpr.Lock(d); if (cpu_info.bSSSE3) { X64Reg xd = fpr.RX(d); +#ifdef _M_X64 MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); +#else + MOV(32, R(EAX), R(ABI_PARAM1)); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset)); +#endif PSHUFB(xd, M((void *)bswapShuffle1x8Dupe)); } else { +#ifndef _M_X64 + PanicAlert("lfd - wtf"); +#endif MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); BSWAP(64, EAX); MOV(64, M(&temp64), R(EAX)); @@ -153,7 +164,7 @@ void stfd(UGeckoInstruction inst) AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); #endif if (cpu_info.bSSSE3) { - MOVAPS(XMM0, fpr.R(s)); + MOVAPD(XMM0, fpr.R(s)); PSHUFB(XMM0, M((void *)bswapShuffle1x8)); #ifdef _M_X64 MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0); @@ -227,6 +238,15 @@ void stfs(UGeckoInstruction inst) } +void stfsx(UGeckoInstruction inst) +{ + // We can take a shortcut here - it's not likely that a hardware access would use this instruction. + INSTRUCTION_START; + // TODO + Default(inst); return; +} + + void lfsx(UGeckoInstruction inst) { INSTRUCTION_START; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 35c4379108..00ea2d2d80 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -214,7 +214,7 @@ void psq_st(UGeckoInstruction inst) ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); if (update && offset) MOV(32, gpr.R(a), R(ABI_PARAM2)); - MOVAPS(XMM0, fpr.R(s)); + MOVAPD(XMM0, fpr.R(s)); MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); MULPD(XMM0, R(XMM1)); CVTPD2DQ(XMM0, R(XMM0)); @@ -247,7 +247,7 @@ void psq_st(UGeckoInstruction inst) ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); if (update) MOV(32, gpr.R(a), R(ABI_PARAM2)); - MOVAPS(XMM0, fpr.R(s)); + MOVAPD(XMM0, fpr.R(s)); MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); MULPD(XMM0, R(XMM1)); SHUFPD(XMM0, R(XMM0), 1); @@ -317,7 +317,7 @@ void psq_l(UGeckoInstruction inst) CVTPS2PD(r, M(&psTemp[0])); SHUFPD(r, R(r), 1); } - if (update) + if (update && offset != 0) ADD(32, gpr.R(inst.RA), Imm32(offset)); break; #else @@ -347,7 +347,7 @@ void psq_l(UGeckoInstruction inst) CVTPS2PD(r, M(&psTemp[0])); gpr.UnlockAllX(); } - if (update) + if (update && offset != 0) ADD(32, gpr.R(inst.RA), Imm32(offset)); break; #endif @@ -373,7 +373,7 @@ void psq_l(UGeckoInstruction inst) X64Reg r = fpr.R(inst.RS).GetSimpleReg(); MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale])); MULPD(r, R(XMM0)); - if (update) + if (update && offset != 0) ADD(32, gpr.R(inst.RA), Imm32(offset)); } break; @@ -399,7 +399,7 @@ void psq_l(UGeckoInstruction inst) MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale])); MULPD(r, R(XMM0)); SHUFPD(r, R(r), 1); - if (update) + if (update && offset != 0) ADD(32, gpr.R(inst.RA), Imm32(offset)); } break; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp index d171079f41..dce84fb784 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp @@ -30,7 +30,9 @@ // ps_madds0 // ps_muls0 // ps_madds1 - +// ps_sel +// cmppd, andpd, andnpd, or +// lfsx, ps_merge01 etc // #define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START @@ -46,6 +48,46 @@ namespace Jit64 const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; + const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0}; + + void ps_mr(UGeckoInstruction inst) + { + INSTRUCTION_START; + int d = inst.FD; + int b = inst.FB; + if (d == b) + return; + fpr.LoadToX64(d, false); + MOVAPD(fpr.RX(d), fpr.R(b)); + } + + void ps_sel(UGeckoInstruction inst) + { + INSTRUCTION_START; + Default(inst); + return; + + // GRR can't get this to work 100%. Getting artifacts in D.O.N. intro. + int d = inst.FD; + int a = inst.FA; + int b = inst.FB; + int c = inst.FC; + fpr.FlushLockX(XMM7); + fpr.FlushLockX(XMM6); + fpr.Lock(a, b, c, d); + fpr.LoadToX64(a, true, false); + fpr.LoadToX64(d, false, true); + // BLENDPD would have been nice... + MOVAPD(XMM7, fpr.R(a)); + CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111 + MOVAPD(XMM6, R(XMM7)); + ANDPD(XMM7, fpr.R(d)); + ANDNPD(XMM6, fpr.R(c)); + MOVAPD(fpr.RX(d), R(XMM7)); + ORPD(fpr.RX(d), R(XMM6)); + fpr.UnlockAll(); + fpr.UnlockAllX(); + } void ps_sign(UGeckoInstruction inst) { diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index 6dd9ceb221..bd1549630e 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -213,7 +213,7 @@ GekkoOPTemplate table4[] = {136, CInterpreter::ps_nabs, Jit64::ps_sign, {"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {264, CInterpreter::ps_abs, Jit64::ps_sign, {"ps_abs", OPTYPE_PS, FL_RC_BIT}}, {64, CInterpreter::ps_cmpu1, Jit64::Default, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, - {72, CInterpreter::ps_mr, Jit64::Default, {"ps_mr", OPTYPE_PS, FL_RC_BIT}}, + {72, CInterpreter::ps_mr, Jit64::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT}}, {96, CInterpreter::ps_cmpo1, Jit64::Default, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {528, CInterpreter::ps_merge00, Jit64::ps_mergeXX, {"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {560, CInterpreter::ps_merge01, Jit64::ps_mergeXX, {"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, @@ -234,7 +234,7 @@ GekkoOPTemplate table4_2[] = {18, CInterpreter::ps_div, Jit64::ps_arith, {"ps_div", OPTYPE_PS, 0, 16}}, {20, CInterpreter::ps_sub, Jit64::ps_arith, {"ps_sub", OPTYPE_PS, 0}}, {21, CInterpreter::ps_add, Jit64::ps_arith, {"ps_add", OPTYPE_PS, 0}}, - {23, CInterpreter::ps_sel, Jit64::Default, {"ps_sel", OPTYPE_PS, 0}}, + {23, CInterpreter::ps_sel, Jit64::ps_sel, {"ps_sel", OPTYPE_PS, 0}}, {24, CInterpreter::ps_res, Jit64::Default, {"ps_res", OPTYPE_PS, 0}}, {25, CInterpreter::ps_mul, Jit64::ps_arith, {"ps_mul", OPTYPE_PS, 0}}, {26, CInterpreter::ps_rsqrte, Jit64::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, 0}}, diff --git a/Source/Core/Core/Src/PowerPC/PowerPC.cpp b/Source/Core/Core/Src/PowerPC/PowerPC.cpp index 74611d6791..c1f7d3af6f 100644 --- a/Source/Core/Core/Src/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/Src/PowerPC/PowerPC.cpp @@ -29,14 +29,17 @@ namespace PowerPC { - // align to cache line - GC_ALIGNED64_DECL(PowerPCState ppcState); + PowerPCState GC_ALIGNED16(ppcState); ICPUCore* m_pCore = NULL; volatile CPUState state = CPU_STEPPING; void ResetRegisters() { + if (((u64)&ppcState & 0xf) != 0) { + PanicAlert("The compiler misaligned ppcState in memory. Likely to cause crashes."); + } + for (int i = 0; i < 32; i++) { ppcState.gpr[i] = 0;