some jit updates

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@240 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-17 22:26:42 +00:00
parent 4faa685def
commit ba684cabcd
7 changed files with 82 additions and 13 deletions

View File

@ -114,6 +114,8 @@ namespace Jit64
void reg_imm(UGeckoInstruction inst); void reg_imm(UGeckoInstruction inst);
void ps_sel(UGeckoInstruction inst);
void ps_mr(UGeckoInstruction inst);
void ps_sign(UGeckoInstruction inst); //aggregate void ps_sign(UGeckoInstruction inst); //aggregate
void ps_arith(UGeckoInstruction inst); //aggregate void ps_arith(UGeckoInstruction inst); //aggregate
void ps_mergeXX(UGeckoInstruction inst); void ps_mergeXX(UGeckoInstruction inst);

View File

@ -149,6 +149,8 @@ namespace Jit64
void RegCache::FlushR(X64Reg reg) void RegCache::FlushR(X64Reg reg)
{ {
if (reg >= NUMXREGS)
PanicAlert("Flushing non existent reg");
if (!xregs[reg].free) if (!xregs[reg].free)
{ {
StoreFromX64(xregs[reg].ppcReg); StoreFromX64(xregs[reg].ppcReg);

View File

@ -101,7 +101,9 @@ void lfs(UGeckoInstruction inst)
void lfd(UGeckoInstruction inst) void lfd(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
DISABLE_32BIT; if (!cpu_info.bSSSE3) {
DISABLE_32BIT;
}
int d = inst.RD; int d = inst.RD;
int a = inst.RA; int a = inst.RA;
if (!a) if (!a)
@ -117,9 +119,18 @@ void lfd(UGeckoInstruction inst)
fpr.Lock(d); fpr.Lock(d);
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
X64Reg xd = fpr.RX(d); X64Reg xd = fpr.RX(d);
#ifdef _M_X64
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
#else
MOV(32, R(EAX), R(ABI_PARAM1));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
#endif
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe)); PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
} else { } else {
#ifndef _M_X64
PanicAlert("lfd - wtf");
#endif
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX); BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX)); MOV(64, M(&temp64), R(EAX));
@ -153,7 +164,7 @@ void stfd(UGeckoInstruction inst)
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
#endif #endif
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
MOVAPS(XMM0, fpr.R(s)); MOVAPD(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x8)); PSHUFB(XMM0, M((void *)bswapShuffle1x8));
#ifdef _M_X64 #ifdef _M_X64
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0); MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
@ -227,6 +238,15 @@ void stfs(UGeckoInstruction inst)
} }
void stfsx(UGeckoInstruction inst)
{
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
INSTRUCTION_START;
// TODO
Default(inst); return;
}
void lfsx(UGeckoInstruction inst) void lfsx(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;

View File

@ -214,7 +214,7 @@ void psq_st(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset) if (update && offset)
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPS(XMM0, fpr.R(s)); MOVAPD(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1)); MULPD(XMM0, R(XMM1));
CVTPD2DQ(XMM0, R(XMM0)); CVTPD2DQ(XMM0, R(XMM0));
@ -247,7 +247,7 @@ void psq_st(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update) if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPS(XMM0, fpr.R(s)); MOVAPD(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1)); MULPD(XMM0, R(XMM1));
SHUFPD(XMM0, R(XMM0), 1); SHUFPD(XMM0, R(XMM0), 1);
@ -317,7 +317,7 @@ void psq_l(UGeckoInstruction inst)
CVTPS2PD(r, M(&psTemp[0])); CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r), 1); SHUFPD(r, R(r), 1);
} }
if (update) if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
break; break;
#else #else
@ -347,7 +347,7 @@ void psq_l(UGeckoInstruction inst)
CVTPS2PD(r, M(&psTemp[0])); CVTPS2PD(r, M(&psTemp[0]));
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
if (update) if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
break; break;
#endif #endif
@ -373,7 +373,7 @@ void psq_l(UGeckoInstruction inst)
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale])); MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0)); MULPD(r, R(XMM0));
if (update) if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
} }
break; break;
@ -399,7 +399,7 @@ void psq_l(UGeckoInstruction inst)
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale])); MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0)); MULPD(r, R(XMM0));
SHUFPD(r, R(r), 1); SHUFPD(r, R(r), 1);
if (update) if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
} }
break; break;

View File

@ -30,7 +30,9 @@
// ps_madds0 // ps_madds0
// ps_muls0 // ps_muls0
// ps_madds1 // ps_madds1
// ps_sel
// cmppd, andpd, andnpd, or
// lfsx, ps_merge01 etc
// #define INSTRUCTION_START Default(inst); return; // #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START #define INSTRUCTION_START
@ -46,6 +48,46 @@ namespace Jit64
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
void ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START;
int d = inst.FD;
int b = inst.FB;
if (d == b)
return;
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
}
void ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst);
return;
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.FlushLockX(XMM7);
fpr.FlushLockX(XMM6);
fpr.Lock(a, b, c, d);
fpr.LoadToX64(a, true, false);
fpr.LoadToX64(d, false, true);
// BLENDPD would have been nice...
MOVAPD(XMM7, fpr.R(a));
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
MOVAPD(XMM6, R(XMM7));
ANDPD(XMM7, fpr.R(d));
ANDNPD(XMM6, fpr.R(c));
MOVAPD(fpr.RX(d), R(XMM7));
ORPD(fpr.RX(d), R(XMM6));
fpr.UnlockAll();
fpr.UnlockAllX();
}
void ps_sign(UGeckoInstruction inst) void ps_sign(UGeckoInstruction inst)
{ {

View File

@ -213,7 +213,7 @@ GekkoOPTemplate table4[] =
{136, CInterpreter::ps_nabs, Jit64::ps_sign, {"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {136, CInterpreter::ps_nabs, Jit64::ps_sign, {"ps_nabs", OPTYPE_PS, FL_RC_BIT}},
{264, CInterpreter::ps_abs, Jit64::ps_sign, {"ps_abs", OPTYPE_PS, FL_RC_BIT}}, {264, CInterpreter::ps_abs, Jit64::ps_sign, {"ps_abs", OPTYPE_PS, FL_RC_BIT}},
{64, CInterpreter::ps_cmpu1, Jit64::Default, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {64, CInterpreter::ps_cmpu1, Jit64::Default, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}},
{72, CInterpreter::ps_mr, Jit64::Default, {"ps_mr", OPTYPE_PS, FL_RC_BIT}}, {72, CInterpreter::ps_mr, Jit64::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT}},
{96, CInterpreter::ps_cmpo1, Jit64::Default, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {96, CInterpreter::ps_cmpo1, Jit64::Default, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}},
{528, CInterpreter::ps_merge00, Jit64::ps_mergeXX, {"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {528, CInterpreter::ps_merge00, Jit64::ps_mergeXX, {"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
{560, CInterpreter::ps_merge01, Jit64::ps_mergeXX, {"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {560, CInterpreter::ps_merge01, Jit64::ps_mergeXX, {"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
@ -234,7 +234,7 @@ GekkoOPTemplate table4_2[] =
{18, CInterpreter::ps_div, Jit64::ps_arith, {"ps_div", OPTYPE_PS, 0, 16}}, {18, CInterpreter::ps_div, Jit64::ps_arith, {"ps_div", OPTYPE_PS, 0, 16}},
{20, CInterpreter::ps_sub, Jit64::ps_arith, {"ps_sub", OPTYPE_PS, 0}}, {20, CInterpreter::ps_sub, Jit64::ps_arith, {"ps_sub", OPTYPE_PS, 0}},
{21, CInterpreter::ps_add, Jit64::ps_arith, {"ps_add", OPTYPE_PS, 0}}, {21, CInterpreter::ps_add, Jit64::ps_arith, {"ps_add", OPTYPE_PS, 0}},
{23, CInterpreter::ps_sel, Jit64::Default, {"ps_sel", OPTYPE_PS, 0}}, {23, CInterpreter::ps_sel, Jit64::ps_sel, {"ps_sel", OPTYPE_PS, 0}},
{24, CInterpreter::ps_res, Jit64::Default, {"ps_res", OPTYPE_PS, 0}}, {24, CInterpreter::ps_res, Jit64::Default, {"ps_res", OPTYPE_PS, 0}},
{25, CInterpreter::ps_mul, Jit64::ps_arith, {"ps_mul", OPTYPE_PS, 0}}, {25, CInterpreter::ps_mul, Jit64::ps_arith, {"ps_mul", OPTYPE_PS, 0}},
{26, CInterpreter::ps_rsqrte, Jit64::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, 0}}, {26, CInterpreter::ps_rsqrte, Jit64::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, 0}},

View File

@ -29,14 +29,17 @@
namespace PowerPC namespace PowerPC
{ {
// align to cache line PowerPCState GC_ALIGNED16(ppcState);
GC_ALIGNED64_DECL(PowerPCState ppcState);
ICPUCore* m_pCore = NULL; ICPUCore* m_pCore = NULL;
volatile CPUState state = CPU_STEPPING; volatile CPUState state = CPU_STEPPING;
void ResetRegisters() void ResetRegisters()
{ {
if (((u64)&ppcState & 0xf) != 0) {
PanicAlert("The compiler misaligned ppcState in memory. Likely to cause crashes.");
}
for (int i = 0; i < 32; i++) for (int i = 0; i < 32; i++)
{ {
ppcState.gpr[i] = 0; ppcState.gpr[i] = 0;