Latest round of JIT changes. Probably broke something as usual.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@170 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-10 18:24:01 +00:00
parent f9019d0ad3
commit 8cfd8aa309
9 changed files with 83 additions and 35 deletions

View File

@ -83,7 +83,7 @@ void CInterpreter::lfdx(UGeckoInstruction _inst)
void CInterpreter::lfs(UGeckoInstruction _inst) void CInterpreter::lfs(UGeckoInstruction _inst)
{ {
u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst)); u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
rPS0(_inst.FD) = *(float*)&uTemp; rPS0(_inst.FD) = *(float*)&uTemp;
rPS1(_inst.FD) = rPS0(_inst.FD); rPS1(_inst.FD) = rPS0(_inst.FD);
} }
@ -629,6 +629,8 @@ void CInterpreter::sync(UGeckoInstruction _inst)
void CInterpreter::tlbia(UGeckoInstruction _inst) void CInterpreter::tlbia(UGeckoInstruction _inst)
{ {
// Gekko does not support this instructions.
PanicAlert("The GC CPU does not support tlbia");
// invalid the whole TLB // invalid the whole TLB
//MessageBox(0,"TLBIA","TLBIA",0); //MessageBox(0,"TLBIA","TLBIA",0);
} }

View File

@ -642,6 +642,8 @@ void CInterpreter::ps_merge11(UGeckoInstruction _inst)
void void
CInterpreter::dcbz_l(UGeckoInstruction _inst) CInterpreter::dcbz_l(UGeckoInstruction _inst)
{ {
// This is supposed to allocate a cache line in the locked cache. Not entirely sure how
// this is visible to the rest of the world. For now, we ignore it.
/* /*
addr_t ea = Helper_Get_EA(_inst); addr_t ea = Helper_Get_EA(_inst);

View File

@ -221,10 +221,18 @@ void CInterpreter::mtsrin(UGeckoInstruction _inst)
PowerPC::ppcState.sr[index] = m_GPR[_inst.RS]; PowerPC::ppcState.sr[index] = m_GPR[_inst.RS];
} }
void CInterpreter::mftb(UGeckoInstruction _inst)
{
int iIndex = (_inst.TBR >> 5) | ((_inst.TBR&0x1F) << 5);
if (iIndex == 268) m_GPR[_inst.RD] = TL;
else if (iIndex == 269) m_GPR[_inst.RD] = TU;
else _dbg_assert_(GEKKO,0);
}
void CInterpreter::mfspr(UGeckoInstruction _inst) void CInterpreter::mfspr(UGeckoInstruction _inst)
{ {
u32 iIndex = ((_inst.SPR & 0x1F) << 5) + ((_inst.SPR >> 5)&0x1F); u32 iIndex = ((_inst.SPR & 0x1F) << 5) + ((_inst.SPR >> 5)&0x1F);
m_GPR[_inst.RD] = rSPR(iIndex);
//TODO - check processor privilege level - many of these require privilege //TODO - check processor privilege level - many of these require privilege
//XER LR CTR are the only ones available in user mode, time base can be read too. //XER LR CTR are the only ones available in user mode, time base can be read too.
@ -241,20 +249,17 @@ void CInterpreter::mfspr(UGeckoInstruction _inst)
//(or if it's full, not sure) //(or if it's full, not sure)
//MessageBox(NULL, "Read from SPR_WPAR", "????", MB_OK); //MessageBox(NULL, "Read from SPR_WPAR", "????", MB_OK);
//Paper Mario reads here, this should be investigated ... TODO(ector) //Paper Mario reads here, this should be investigated ... TODO(ector)
bool wpar_empty = false;
if (!wpar_empty)
rSPR(iIndex) |= 1; // BNE = buffer not empty
else
rSPR(iIndex) &= ~1;
} }
break; break;
} }
m_GPR[_inst.RD] = rSPR(iIndex);
} }
void CInterpreter::mftb(UGeckoInstruction _inst)
{
int iIndex = (_inst.TBR >> 5) | ((_inst.TBR&0x1F) << 5);
if (iIndex == 268) m_GPR[_inst.RD] = TL;
else if (iIndex == 269) m_GPR[_inst.RD] = TU;
else _dbg_assert_(GEKKO,0);
}
void CInterpreter::mtspr(UGeckoInstruction _inst) void CInterpreter::mtspr(UGeckoInstruction _inst)
{ {
u32 iIndex = (_inst.SPRU << 5) | (_inst.SPRL & 0x1F); u32 iIndex = (_inst.SPRU << 5) | (_inst.SPRL & 0x1F);
@ -285,14 +290,20 @@ void CInterpreter::mtspr(UGeckoInstruction _inst)
case SPR_HID2: // HID2 case SPR_HID2: // HID2
{ {
UReg_HID2 old_hid2;
old_hid2.Hex = oldValue;
if (HID2.PSE == 0) if (HID2.PSE == 0)
PanicAlert("WARNING: PSE in HID2 isnt set"); PanicAlert("WARNING: PSE in HID2 isnt set");
bool WriteGatherPipeEnable = (bool)HID2.WPE; //TODO? bool WriteGatherPipeEnable = (bool)HID2.WPE; //TODO?
bool LockedCacheEnable = (bool)HID2.LCE; bool LockedCacheEnable = (bool)HID2.LCE;
int DMAQueueLength = HID2.DMAQL; // Ignore - our DMA:s are instantaneous int DMAQueueLength = HID2.DMAQL; // Ignore - our DMA:s are instantaneous
bool PairedSingleEnable = HID2.PSE;
bool QuantizeEnable = HID2.LSQE;
//TODO(ector): Protect LC memory if LCE is false. //TODO(ector): Protect LC memory if LCE is false.
//TODO(ector): Honor PSE. //TODO(ector): Honor PSE.
// //
//_assert_msg_(GEKKO, WriteGatherPipeEnable, "Write gather pipe not enabled!"); //_assert_msg_(GEKKO, WriteGatherPipeEnable, "Write gather pipe not enabled!");
//if ((HID2.PSE == 0)) //if ((HID2.PSE == 0))
@ -301,11 +312,13 @@ void CInterpreter::mtspr(UGeckoInstruction _inst)
break; break;
case SPR_WPAR: case SPR_WPAR:
_assert_msg_(GEKKO, m_GPR[_inst.RD] == 0x0C008000,"Gather pipe @ %08x", ); _assert_msg_(GEKKO, m_GPR[_inst.RD] == 0x0C008000, "Gather pipe @ %08x");
GPFifo::ResetGatherPipe(); GPFifo::ResetGatherPipe();
break; break;
case SPR_DMAL: //locked cache DMA case SPR_DMAL:
// Locked cache<->Memory DMA
// Total fake, we ignore that DMAs take time.
if (DMAL.DMA_T) if (DMAL.DMA_T)
{ {
u32 dwMemAddress = DMAU.MEM_ADDR << 5; u32 dwMemAddress = DMAU.MEM_ADDR << 5;

View File

@ -14,8 +14,10 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include <map> #include <map>
#include "Common.h"
#include "x64Emitter.h" #include "x64Emitter.h"
#include "ABI.h" #include "ABI.h"
#include "../../HLE/HLE.h" #include "../../HLE/HLE.h"
@ -235,7 +237,7 @@ namespace Jit64
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
MOV(32, M(&PC), Imm32(js.compilerPC)); MOV(32, M(&PC), Imm32(js.compilerPC));
MOV(32, M(&NPC), Imm32(js.compilerPC+4)); MOV(32, M(&NPC), Imm32(js.compilerPC + 4));
} }
CInterpreter::_interpreterInstruction instr = GetInterpreterOp(_inst); CInterpreter::_interpreterInstruction instr = GetInterpreterOp(_inst);
ABI_CallFunctionC((void*)instr, _inst.hex); ABI_CallFunctionC((void*)instr, _inst.hex);

View File

@ -234,6 +234,7 @@ namespace Jit64
bool FPURegCache::IsXRegVolatile(X64Reg reg) const bool FPURegCache::IsXRegVolatile(X64Reg reg) const
{ {
#ifdef _WIN32 #ifdef _WIN32
// return true;
if (reg < 6) if (reg < 6)
return true; return true;
else else
@ -263,7 +264,7 @@ namespace Jit64
R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX
#endif #endif
#elif _M_IX86 #elif _M_IX86
ESI, EDI, EBX, EBP, EDX //, RCX ESI, EDI, EBX, EBP, EDX
#endif #endif
}; };
count = sizeof(allocationOrder) / sizeof(const int); count = sizeof(allocationOrder) / sizeof(const int);

View File

@ -78,6 +78,14 @@ namespace Jit64
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0; virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
void FlushR(X64Reg reg); void FlushR(X64Reg reg);
void FlushR(X64Reg reg, X64Reg reg2) {FlushR(reg); FlushR(reg2);} void FlushR(X64Reg reg, X64Reg reg2) {FlushR(reg); FlushR(reg2);}
void FlushLockX(X64Reg reg) {
FlushR(reg);
LockX(reg);
}
void FlushLockX(X64Reg reg1, X64Reg reg2) {
FlushR(reg1); FlushR(reg2);
LockX(reg1); LockX(reg2);
}
virtual void Flush(FlushMode mode); virtual void Flush(FlushMode mode);
virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);} virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);}
void End() {Flush(FLUSH_ALL);} void End() {Flush(FLUSH_ALL);}

View File

@ -120,6 +120,8 @@ namespace Jit64
void lbzx(UGeckoInstruction inst) void lbzx(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (b == d || a == d) if (b == d || a == d)
@ -183,12 +185,14 @@ namespace Jit64
// Safe and boring // Safe and boring
gpr.Flush(FLUSH_VOLATILE); gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE); fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a); gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
return; return;
} }
@ -219,12 +223,14 @@ namespace Jit64
// Safe and boring // Safe and boring
gpr.Flush(FLUSH_VOLATILE); gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE); fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a); gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, d == a, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
return; return;
} }
@ -232,14 +238,19 @@ namespace Jit64
void dcbz(UGeckoInstruction inst) void dcbz(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
DISABLE_32BIT;
MOV(32, R(EAX), gpr.R(inst.RB)); MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31)); AND(32, R(EAX), Imm32(~31));
XORPD(XMM0, R(XMM0)); XORPD(XMM0, R(XMM0));
#ifdef _M_X64
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
#else
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
#endif
} }
#ifndef _WIN32 #ifndef _WIN32
@ -262,7 +273,7 @@ namespace Jit64
if (a || update) if (a || update)
{ {
gpr.Flush(FLUSH_VOLATILE); gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
int accessSize; int accessSize;
switch (inst.OPCD & ~1) switch (inst.OPCD & ~1)
{ {
@ -340,6 +351,7 @@ namespace Jit64
*/ */
//Still here? Do regular path. //Still here? Do regular path.
gpr.Lock(s, a); gpr.Lock(s, a);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM2), gpr.R(a));
MOV(32, R(ABI_PARAM1), gpr.R(s)); MOV(32, R(ABI_PARAM1), gpr.R(s));
if (offset) if (offset)
@ -370,10 +382,11 @@ namespace Jit64
{ {
case 32: ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); break; case 32: ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); break;
case 16: ABI_CallFunctionRR((void *)&Memory::Write_U16, ABI_PARAM1, ABI_PARAM2); break; case 16: ABI_CallFunctionRR((void *)&Memory::Write_U16, ABI_PARAM1, ABI_PARAM2); break;
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break; case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break;
} }
SetJumpTarget(arg2); SetJumpTarget(arg2);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
else else
{ {

View File

@ -47,6 +47,12 @@
namespace Jit64 namespace Jit64
{ {
// pshufb todo: MOVQ
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
static u64 GC_ALIGNED16(temp64); static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32); static u32 GC_ALIGNED16(temp32);
@ -67,7 +73,12 @@ void lfs(UGeckoInstruction inst)
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Flush(FLUSH_VOLATILE); if (jo.noAssumeFPLoadFromMem) {
// We might call a function.
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
}
gpr.Lock(d, a); gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
@ -86,6 +97,7 @@ void lfs(UGeckoInstruction inst)
CVTSS2SD(fpr.RX(d), M(&temp32)); CVTSS2SD(fpr.RX(d), M(&temp32));
MOVDDUP(fpr.RX(d), fpr.R(d)); MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -127,6 +139,7 @@ void stfd(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1);
fpr.LoadToX64(s, true, false); fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s)); MOVSD(M(&temp64), fpr.RX(s));
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
@ -134,25 +147,24 @@ void stfd(UGeckoInstruction inst)
BSWAP(64, EAX); BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX)); MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
void stfs(UGeckoInstruction inst) void stfs(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1; bool update = inst.OPCD & 1;
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
if (a && !update) if (a && !update)
{ {
gpr.Flush(FLUSH_VOLATILE); gpr.Flush(FLUSH_VOLATILE);
// fpr.Flush(FLUSH_VOLATILE); fpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
gpr.LockX(ABI_PARAM1, ABI_PARAM2); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM2), gpr.R(a));
if (update && offset) if (update && offset)
{ {
@ -161,7 +173,6 @@ void stfs(UGeckoInstruction inst)
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0); MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32)); MOV(32, R(ABI_PARAM1), M(&temp32));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset); SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -176,7 +187,6 @@ void stfs(UGeckoInstruction inst)
void lfsx(UGeckoInstruction inst) void lfsx(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
DISABLE_32BIT;
fpr.Lock(inst.RS); fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true); fpr.LoadToX64(inst.RS, false, true);
MOV(32, R(EAX), gpr.R(inst.RB)); MOV(32, R(EAX), gpr.R(inst.RB));

View File

@ -125,6 +125,8 @@ void psq_st(UGeckoInstruction inst)
{ {
DISABLE_32BIT; DISABLE_32BIT;
gpr.Flush(FLUSH_VOLATILE); gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
if (update) if (update)
@ -147,14 +149,12 @@ void psq_st(UGeckoInstruction inst)
CALL((void *)&WriteDual32); CALL((void *)&WriteDual32);
SetJumpTarget(arg2); SetJumpTarget(arg2);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
else if (stType == QUANTIZE_U8) else if (stType == QUANTIZE_U8)
{ {
gpr.FlushR(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.FlushR(ABI_PARAM2);
gpr.LockX(ABI_PARAM1);
gpr.LockX(ABI_PARAM2);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
if (update) if (update)
@ -187,10 +187,7 @@ void psq_st(UGeckoInstruction inst)
} }
else if (stType == QUANTIZE_S16) else if (stType == QUANTIZE_S16)
{ {
gpr.FlushR(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.FlushR(ABI_PARAM2);
gpr.LockX(ABI_PARAM1);
gpr.LockX(ABI_PARAM2);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
if (update) if (update)