First steps towards 64-bit support in JITIL. Not yet working. yes, those crazy casts (u32)(u64)ptr makes sense when we know the ptr is below 4gig - then it's OK.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2164 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-02-08 22:20:35 +00:00
parent f5f99e8f04
commit 935eb226ca
3 changed files with 2135 additions and 2112 deletions

View File

@ -180,8 +180,7 @@ InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned
}
#if 0
InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2,
InstLoc Op3) {
InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, InstLoc Op3) {
InstLoc curIndex = &InstList[InstList.size()];
unsigned backOp1 = curIndex - 1 - Op1;
if (backOp1 >= 254) {
@ -204,8 +203,7 @@ InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2,
backOp1++;
curIndex++;
}
InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) |
(backOp3 << 24));
InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (backOp3 << 24));
return curIndex;
}
#endif
@ -624,7 +622,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned
InstLoc IRBuilder::EmitIntConst(unsigned value) {
InstLoc curIndex = &InstList[InstList.size()];
InstList.push_back(CInt32 | (ConstList.size() << 8));
InstList.push_back(CInt32 | ((unsigned int)ConstList.size() << 8));
ConstList.push_back(value);
return curIndex;
}
@ -751,10 +749,27 @@ static void fregSpill(RegInfo& RI, X64Reg reg) {
}
// ECX is scratch, so we don't allocate it
static X64Reg RegAllocOrder[] = {EDI, ESI, EBP, EBX, EDX, EAX};
static unsigned RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg);
static X64Reg FRegAllocOrder[] = {XMM2, XMM3, XMM4, XMM5, XMM6, XMM7};
static unsigned FRegAllocSize = sizeof(FRegAllocOrder) / sizeof(X64Reg);
#ifdef _M_X64
// 64-bit - calling conventions differ between linux & windows, so...
#ifdef _WIN32
static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11};
#else
static const X64Reg RegAllocOrder[] = {RBP, R12, R13, R14, R8, R9, R10, R11};
#endif
static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg);
static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
static const int FRegAllocSize = sizeof(FRegAllocOrder) / sizeof(X64Reg);
#else
// 32-bit
static const X64Reg RegAllocOrder[] = {EDI, ESI, EBP, EBX, EDX, EAX};
static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg);
static const X64Reg FRegAllocOrder[] = {XMM2, XMM3, XMM4, XMM5, XMM6, XMM7};
static const int FRegAllocSize = sizeof(FRegAllocOrder) / sizeof(X64Reg);
#endif
static X64Reg regFindFreeReg(RegInfo& RI) {
for (unsigned i = 0; i < RegAllocSize; i++)
@ -957,12 +972,12 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
if (dest)
*dest = regFindFreeReg(RI);
if (Profiled)
return M((void*)((u32)Memory::base + (addr & Memory::MEMVIEW32_MASK)));
return M((void*)((u8*)Memory::base + (addr & Memory::MEMVIEW32_MASK)));
return M((void*)addr);
#else
// 64-bit
if (Profiled)
return M((void*)((u32)Memory::base + addr));
return M((void*)((u8*)Memory::base + addr));
return M((void*)addr);
#endif
}
@ -1094,7 +1109,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
RI.Jit->BSWAP(Size, ECX);
}
RI.Jit->MOV(32, R(EAX), M(&GPFifo::m_gatherPipeCount));
RI.Jit->MOV(Size, MDisp(EAX, (u32)GPFifo::m_gatherPipe), R(ECX));
RI.Jit->MOV(Size, MDisp(EAX, (u32)(u64)GPFifo::m_gatherPipe), R(ECX));
RI.Jit->ADD(32, R(EAX), Imm8(Size >> 3));
RI.Jit->MOV(32, M(&GPFifo::m_gatherPipeCount), R(EAX));
RI.Jit->js.fifoBytesThisBlock += Size >> 3;
@ -1120,8 +1135,7 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
regClearInst(RI, getOp1(I));
}
static void regEmitShiftInst(RegInfo& RI, InstLoc I,
void (Jit64::*op)(int, OpArg, OpArg))
static void regEmitShiftInst(RegInfo& RI, InstLoc I, void (Jit64::*op)(int, OpArg, OpArg))
{
X64Reg reg = regBinLHSReg(RI, I);
if (isImm(*getOp2(I))) {
@ -1136,8 +1150,7 @@ static void regEmitShiftInst(RegInfo& RI, InstLoc I,
regNormalRegClear(RI, I);
}
static void regStoreInstToConstLoc(RegInfo& RI, unsigned width, InstLoc I,
void* loc) {
static void regStoreInstToConstLoc(RegInfo& RI, unsigned width, InstLoc I, void* loc) {
if (width != 32) {
PanicAlert("Not implemented!");
return;
@ -1198,7 +1211,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
RI.MakeProfile = false;//!RI.UseProfile;
// Pass to compute liveness
ibuild->StartBackPass();
for (unsigned int index = RI.IInfo.size() - 1; index != -1U; --index) {
for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) {
InstLoc I = ibuild->ReadBackward();
unsigned int op = getOpcode(*I);
bool thisUsed = regReadUse(RI, I) ? true : false;
@ -1678,13 +1691,17 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
regSpill(RI, EAX);
regSpill(RI, EDX);
X64Reg reg = fregFindFreeReg(RI);
unsigned quantreg = *I >> 16;
unsigned int quantreg = *I >> 16;
Jit->MOVZX(32, 16, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]) + 2));
Jit->MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! (MComplex can do this, no?)
#ifdef _M_IX86
Jit->SHL(32, R(EDX), Imm8(2));
#else
Jit->SHL(32, R(EDX), Imm8(3));
#endif
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
Jit->CALLptr(MDisp(EDX, (u32)asm_routines.pairedLoadQuantized));
Jit->CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized));
Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I;
regNormalRegClear(RI, I);
@ -1736,10 +1753,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
Jit->MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
#ifdef _M_IX86
Jit->SHL(32, R(EDX), Imm8(2));
#else
Jit->SHL(32, R(EDX), Imm8(3));
#endif
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->CALLptr(MDisp(EDX, (u32)asm_routines.pairedStoreQuantized));
Jit->CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized));
if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
@ -2102,6 +2123,6 @@ void ProfiledReJit() {
u8* x = (u8*)jit.GetCodePtr();
jit.SetCodePtr(jit.js.rewriteStart);
DoWriteCode(&jit.ibuild, &jit, true);
jit.js.curBlock->codeSize = jit.GetCodePtr() - jit.js.rewriteStart;
jit.js.curBlock->codeSize = (int)(jit.GetCodePtr() - jit.js.rewriteStart);
jit.SetCodePtr(x);
}

View File

@ -201,6 +201,7 @@ namespace IREmitter {
}
class IRBuilder {
private:
InstLoc EmitZeroOp(unsigned Opcode, unsigned extra);
InstLoc EmitUOp(unsigned OpCode, InstLoc Op1,
unsigned extra = 0);

View File

@ -75,6 +75,7 @@ void AsmRoutineManager::Generate()
MOV(64, R(RBX), Imm64((u64)Memory::base));
MOV(64, R(R15), Imm64((u64)jit.GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough
#endif
// INT3();
const u8 *outerLoop = GetCodePtr();
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
@ -389,7 +390,7 @@ void AsmRoutineManager::GenQuantizedLoads() {
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_dequantizeTableS));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
@ -407,7 +408,7 @@ void AsmRoutineManager::GenQuantizedLoads() {
PSRAD(XMM0, 24);
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_dequantizeTableS));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
@ -426,7 +427,7 @@ void AsmRoutineManager::GenQuantizedLoads() {
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)m_dequantizeTableS));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
@ -446,7 +447,7 @@ void AsmRoutineManager::GenQuantizedLoads() {
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
AND(32, R(EAX), Imm32(0xFC));
MOVSS(XMM1, MDisp(EAX, (u32)m_dequantizeTableS));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();