From e659f5ac58e704295f2459b77030536707415298 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Wed, 30 Apr 2014 10:12:15 +0200 Subject: [PATCH 1/2] JitBackpatch: fix NOP padding The new NOP emitter breaks when called with a negative count. As it turns out, it did happen when deoptimizing 8 bit MOVs because they are only 4 bytes long and need no BSWAP. --- Source/Core/Common/x64Emitter.cpp | 4 +++- Source/Core/Common/x64Emitter.h | 2 +- .../Core/Core/PowerPC/JitCommon/JitBackpatch.cpp | 15 +++++++++++---- Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h | 3 +++ 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 23c3d2647e..89d5ddf564 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -6,6 +6,7 @@ #include "Common/Common.h" #include "Common/CPUDetect.h" +#include "Common/Log.h" #include "Common/x64Emitter.h" namespace Gen @@ -516,8 +517,9 @@ void XEmitter::RET() {Write8(0xC3);} void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret // The first sign of decadence: optimized NOPs. -void XEmitter::NOP(int size) +void XEmitter::NOP(size_t size) { + _dbg_assert_(DYNA_REC, (int)size > 0); while (true) { switch (size) diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 528a4495d8..35c45f6c20 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -290,7 +290,7 @@ public: void INT3(); // Do nothing - void NOP(int count = 1); + void NOP(size_t count = 1); // Save energy in wait-loops on P4 only. Probably not too useful. void PAUSE(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 7f557734e4..91eb304b79 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -187,7 +187,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) return nullptr; } - if (info.byteSwap && info.instructionSize < 5) + if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE) { PanicAlert("BackPatch: MOVBE is too small"); return nullptr; @@ -217,7 +217,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse); emitter.CALL((void *)trampoline); - emitter.NOP((int)info.instructionSize + bswapNopCount - 5); + int padding = info.instructionSize + bswapNopCount - BACKPATCH_SIZE; + if (padding > 0) + { + emitter.NOP(padding); + } return codePtr; } else @@ -258,11 +262,14 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) XEmitter emitter(start); const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse); emitter.CALL((void *)trampoline); - emitter.NOP((int)(codePtr + info.instructionSize - emitter.GetCodePtr())); + int padding = codePtr + info.instructionSize - emitter.GetCodePtr(); + if (padding > 0) + { + emitter.NOP(padding); + } return start; } #else return 0; #endif } - diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h index 8f285fe88c..dd5ad8628b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h @@ -8,6 +8,9 @@ #include "Common/x64Analyzer.h" #include "Common/x64Emitter.h" +// We need at least this many bytes for backpatching. +const int BACKPATCH_SIZE = 5; + // meh. #if defined(_WIN32) #include From 36dbde0f3c1471caba972931fbaa80ed786f87bc Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Wed, 30 Apr 2014 15:15:45 +0200 Subject: [PATCH 2/2] Jit_Util: reduce NOP padding of 8 bit loads and use MOVSX directly if needed. --- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 3 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 55 +++++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 91eb304b79..9220646702 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -206,8 +206,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) { XEmitter emitter(codePtr); int bswapNopCount; - if (info.byteSwap) - // MOVBE -> no BSWAP following + if (info.byteSwap || info.operandSize == 1) bswapNopCount = 0; // Check the following BSWAP for REX byte else if ((codePtr[info.instructionSize] & 0xF0) == 0x40) diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index f7020482f4..43c757d160 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -98,19 +98,28 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac } result = GetWritableCodePtr(); - MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); + if (accessSize == 8 && signExtend) + MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); + else + MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); } else { MOV(32, R(reg_value), opAddress); result = GetWritableCodePtr(); - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset)); + if (accessSize == 8 && signExtend) + MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset)); + else + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset)); } #else if (opAddress.IsImm()) { result = GetWritableCodePtr(); - MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); + if (accessSize == 8 && signExtend) + MOVSX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); + else + MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); } else { @@ -118,31 +127,32 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac MOV(32, R(reg_value), opAddress); AND(32, R(reg_value), Imm32(Memory::MEMVIEW32_MASK)); result = GetWritableCodePtr(); - MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset)); + if (accessSize == 8 && signExtend) + MOVSX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset)); + else + MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset)); } #endif - // Add a 2 bytes NOP to have some space for the backpatching - if (accessSize == 8) - NOP(2); + switch (accessSize) + { + case 8: + _dbg_assert_(DYNA_REC, BACKPATCH_SIZE - (GetCodePtr() - result <= 0)); + break; - if (accessSize == 32) - { - BSWAP(32, reg_value); - } - else if (accessSize == 16) - { + case 16: BSWAP(32, reg_value); if (signExtend) SAR(32, R(reg_value), Imm8(16)); else SHR(32, R(reg_value), Imm8(16)); + break; + + case 32: + BSWAP(32, reg_value); + break; } - else if (signExtend) - { - // TODO: bake 8-bit into the original load. - MOVSX(32, accessSize, reg_value, R(reg_value)); - } + return result; } @@ -472,11 +482,12 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce ) { MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write - u8 *mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP)); - if (accessSize == 8) + const u8* backpatchStart = GetCodePtr(); + u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP)); + int padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart); + if (padding > 0) { - NOP(1); - NOP(1); + NOP(padding); } registersInUseAtLoc[mov] = registersInUse;