Merge pull request #303 from Tilka/movbe

Add remaining possible uses of MOVBE
This commit is contained in:
Pierre Bourdon 2014-04-24 17:23:34 +02:00
commit 47373af9d9
6 changed files with 167 additions and 144 deletions

View File

@ -8,8 +8,8 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
{ {
unsigned const char *startCodePtr = codePtr; unsigned const char *startCodePtr = codePtr;
u8 rex = 0; u8 rex = 0;
u8 codeByte = 0; u32 opcode;
u8 codeByte2 = 0; int opcode_length;
//Check for regular prefix //Check for regular prefix
info->operandSize = 4; info->operandSize = 4;
@ -17,6 +17,7 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
info->signExtend = false; info->signExtend = false;
info->hasImmediate = false; info->hasImmediate = false;
info->isMemoryWrite = false; info->isMemoryWrite = false;
info->byteSwap = false;
u8 modRMbyte = 0; u8 modRMbyte = 0;
u8 sibByte = 0; u8 sibByte = 0;
@ -45,41 +46,53 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
codePtr++; codePtr++;
} }
codeByte = *codePtr++; opcode = *codePtr++;
opcode_length = 1;
// Skip two-byte opcode byte if (opcode == 0x0F)
bool twoByte = false;
if (codeByte == 0x0F)
{ {
twoByte = true; opcode = (opcode << 8) | *codePtr++;
codeByte2 = *codePtr++; opcode_length = 2;
} if ((opcode & 0xFB) == 0x38)
if (!twoByte)
{
if ((codeByte & 0xF0) == 0x80 ||
((codeByte & 0xF8) == 0xC0 && (codeByte & 0x0E) != 0x02))
{ {
modRMbyte = *codePtr++; opcode = (opcode << 8) | *codePtr++;
hasModRM = true; opcode_length = 3;
} }
} }
else
switch (opcode_length)
{ {
if (((codeByte2 & 0xF0) == 0x00 && (codeByte2 & 0x0F) >= 0x04 && (codeByte2 & 0x0D) != 0x0D) || case 1:
(codeByte2 & 0xF0) == 0x30 || if ((opcode & 0xF0) == 0x80 ||
codeByte2 == 0x77 || ((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
(codeByte2 & 0xF0) == 0x80 || {
((codeByte2 & 0xF0) == 0xA0 && (codeByte2 & 0x07) <= 0x02) || modRMbyte = *codePtr++;
(codeByte2 & 0xF8) == 0xC8) hasModRM = true;
{ }
// No mod R/M byte break;
} case 2:
else if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
{ ((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) ||
modRMbyte = *codePtr++; (opcode & 0xF0) == 0x30 ||
hasModRM = true; (opcode & 0xFF) == 0x77 ||
} (opcode & 0xF0) == 0x80 ||
(opcode & 0xF8) == 0xC8)
{
// No mod R/M byte
}
else
{
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
case 3:
// TODO: support more 3-byte opcode instructions
if ((opcode & 0xFE) == 0xF0)
{
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
} }
if (hasModRM) if (hasModRM)
@ -114,109 +127,92 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
if (displacementSize == 1) if (displacementSize == 1)
info->displacement = (s32)(s8)*codePtr; info->displacement = (s32)(s8)*codePtr;
else else
info->displacement = *((s32 *)codePtr); info->displacement = *((s32*)codePtr);
codePtr += displacementSize; codePtr += displacementSize;
switch (opcode)
switch (codeByte)
{ {
// writes
case 0xC6: // mem <- imm8 case 0xC6: // mem <- imm8
{ info->isMemoryWrite = true;
info->isMemoryWrite = true; info->hasImmediate = true;
info->hasImmediate = true; info->immediate = *codePtr;
info->immediate = *codePtr; codePtr++;
codePtr++; //move past immediate
}
break; break;
case 0xC7: // mem <- imm16/32 case 0xC7: // mem <- imm16/32
info->isMemoryWrite = true;
switch (info->operandSize)
{ {
info->isMemoryWrite = true; case 2:
if (info->operandSize == 2) info->hasImmediate = true;
{ info->immediate = *(u16*)codePtr;
info->hasImmediate = true; codePtr += 2;
info->immediate = *(u16*)codePtr; break;
codePtr += 2;
} case 4:
else if (info->operandSize == 4) info->hasImmediate = true;
{ info->immediate = *(u32*)codePtr;
info->hasImmediate = true; codePtr += 4;
info->immediate = *(u32*)codePtr; break;
codePtr += 4; case 8:
} info->zeroExtend = true;
else if (info->operandSize == 8) info->immediate = *(u32*)codePtr;
{ codePtr += 4;
info->zeroExtend = true; break;
info->immediate = *(u32*)codePtr;
codePtr += 4;
}
} }
break;
case 0x88: // mem <- r8 case 0x88: // mem <- r8
info->isMemoryWrite = true;
if (info->operandSize != 4)
{ {
info->isMemoryWrite = true; return false;
if (info->operandSize == 4)
{
info->operandSize = 1;
break;
}
else
return false;
break;
} }
info->operandSize = 1;
break;
case 0x89: // mem <- r16/32/64 case 0x89: // mem <- r16/32/64
{ info->isMemoryWrite = true;
info->isMemoryWrite = true; break;
break;
}
case 0x0F: // two-byte escape
{
info->isMemoryWrite = false;
switch (codeByte2)
{
case 0xB6: // movzx on byte
info->zeroExtend = true;
info->operandSize = 1;
break;
case 0xB7: // movzx on short
info->zeroExtend = true;
info->operandSize = 2;
break;
case 0xBE: // movsx on byte
info->signExtend = true;
info->operandSize = 1;
break;
case 0xBF: // movsx on short
info->signExtend = true;
info->operandSize = 2;
break;
default:
return false;
}
break;
}
case 0x8A: // r8 <- mem case 0x8A: // r8 <- mem
if (info->operandSize != 4)
{ {
info->isMemoryWrite = false; return false;
if (info->operandSize == 4)
{
info->operandSize = 1;
break;
}
else
return false;
} }
info->operandSize = 1;
break;
case 0x8B: // r16/32/64 <- mem case 0x8B: // r16/32/64 <- mem
{ break;
info->isMemoryWrite = false;
break;
}
case 0x0FB6: // movzx on byte
info->zeroExtend = true;
info->operandSize = 1;
break;
case 0x0FB7: // movzx on short
info->zeroExtend = true;
info->operandSize = 2;
break;
case 0x0FBE: // movsx on byte
info->signExtend = true;
info->operandSize = 1;
break;
case 0x0FBF: // movsx on short
info->signExtend = true;
info->operandSize = 2;
break;
case 0x0F38F0: // movbe read
info->byteSwap = true;
break;
case 0x0F38F1: // movbe write
info->byteSwap = true;
info->isMemoryWrite = true;
break; break;
default: default:

View File

@ -17,6 +17,7 @@ struct InstructionInfo
bool signExtend; bool signExtend;
bool hasImmediate; bool hasImmediate;
bool isMemoryWrite; bool isMemoryWrite;
bool byteSwap;
u64 immediate; u64 immediate;
s32 displacement; s32 displacement;
}; };

View File

@ -368,8 +368,7 @@ void Jit64::stX(UGeckoInstruction inst)
// Fast and daring - requires 64-bit // Fast and daring - requires 64-bit
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
gpr.BindToRegister(a, true, false); gpr.BindToRegister(a, true, false);
BSWAP(32, EAX); SwapAndStore(32, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), EAX);
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
return; return;
} }
#endif*/ #endif*/

View File

@ -404,8 +404,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
} else { } else {
#if _M_X86_64 #if _M_X86_64
MOV(32, R(RCX), MComplex(RBX, RCX, 1, 0)); LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0));
BSWAP(32, RCX);
MOVD_xmm(XMM0, R(RCX)); MOVD_xmm(XMM0, R(RCX));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
#else #else

View File

@ -187,6 +187,12 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
return nullptr; return nullptr;
} }
if (info.byteSwap && info.instructionSize < 5)
{
PanicAlert("BackPatch: MOVBE is too small");
return nullptr;
}
auto it = registersInUseAtLoc.find(codePtr); auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end()) if (it == registersInUseAtLoc.end())
{ {
@ -200,8 +206,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{ {
XEmitter emitter(codePtr); XEmitter emitter(codePtr);
int bswapNopCount; int bswapNopCount;
if (info.byteSwap)
// MOVBE -> no BSWAP following
bswapNopCount = 0;
// Check the following BSWAP for REX byte // Check the following BSWAP for REX byte
if ((codePtr[info.instructionSize] & 0xF0) == 0x40) else if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
bswapNopCount = 3; bswapNopCount = 3;
else else
bswapNopCount = 2; bswapNopCount = 2;
@ -214,29 +223,38 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
else else
{ {
// TODO: special case FIFO writes. Also, support 32-bit mode. // TODO: special case FIFO writes. Also, support 32-bit mode.
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64 *ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16) *ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32) *ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}
u8 *start = codePtr - bswapSize; u8 *start;
if (info.byteSwap)
{
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
start = codePtr;
}
else
{
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64 *ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16) *ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32) *ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}
start = codePtr - bswapSize;
}
XEmitter emitter(start); XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse); const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline); emitter.CALL((void *)trampoline);

View File

@ -426,11 +426,21 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
if (accessSize == 8 && reg_value >= 4) { if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
} }
if (swap) BSWAP(accessSize, reg_value);
#if _M_X86_64 #if _M_X86_64
result = GetWritableCodePtr(); result = GetWritableCodePtr();
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); if (swap)
{
SwapAndStore(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), reg_value);
}
else
{
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
}
#else #else
if (swap)
{
BSWAP(accessSize, reg_value);
}
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr(); result = GetWritableCodePtr();
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
@ -502,6 +512,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags) void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
{ {
// FIXME
if (false && cpu_info.bSSSE3) { if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it. // This path should be faster but for some reason it causes errors so I've disabled it.
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
@ -516,8 +527,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re
TEST(32, R(reg_addr), Imm32(mem_mask)); TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch argh = J_CC(CC_Z); FixupBranch argh = J_CC(CC_Z);
MOVSS(M(&float_buffer), xmm_value); MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer)); LoadAndSwap(32, EAX, M(&float_buffer));
BSWAP(32, EAX);
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr); ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);