From 2a8936312e37cc787270972671b01f5a64b2d7ed Mon Sep 17 00:00:00 2001 From: Fiora Date: Fri, 2 Jan 2015 14:47:44 -0800 Subject: [PATCH] Fastmem: jump to trampolines instead of calling them Should be slightly faster, and also lets us skip the nops on the way back. Remove the trampoline cache, since it isn't really useful anymore with this. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 2 - .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 14 ++-- .../PowerPC/JitCommon/TrampolineCache.cpp | 73 ++----------------- .../Core/PowerPC/JitCommon/TrampolineCache.h | 25 +------ 4 files changed, 18 insertions(+), 96 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 77905ca8ae..436364d0de 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -785,8 +785,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc else { exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr(); - // the fastmem trampoline is jumping here, so we need to pop the return stack - ADD(64, R(RSP), Imm8(8)); } gpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index d165915c99..566ce38109 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -83,7 +83,6 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) if (!info.isMemoryWrite) { - XEmitter emitter(codePtr); int bswapNopCount; if (info.byteSwap || info.operandSize == 1) bswapNopCount = 0; @@ -109,9 +108,11 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) totalSize += 3; } - const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse, exceptionHandler); - emitter.CALL((void *)trampoline); + XEmitter emitter(codePtr); int padding = totalSize - BACKPATCH_SIZE; + u8* returnPtr = codePtr + 5 + padding; + const u8* trampoline = trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr); + emitter.JMP(trampoline, true); if (padding > 0) { emitter.NOP(padding); @@ -162,9 +163,10 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) start = codePtr - bswapSize; } XEmitter emitter(start); - const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse, exceptionHandler, pc); - emitter.CALL((void *)trampoline); - ptrdiff_t padding = (codePtr - emitter.GetCodePtr()) + info.instructionSize; + ptrdiff_t padding = (codePtr - (start + 5)) + info.instructionSize; + u8* returnPtr = start + 5 + padding; + const u8* trampoline = trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc); + emitter.JMP(trampoline, true); if (padding > 0) { emitter.NOP(padding); diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp index 4f900621b2..7283e51ed8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -27,29 +27,14 @@ void TrampolineCache::Init() void TrampolineCache::ClearCodeSpace() { X64CodeBlock::ClearCodeSpace(); - cachedTrampolines.clear(); } void TrampolineCache::Shutdown() { FreeCodeSpace(); - cachedTrampolines.clear(); } -const u8* TrampolineCache::GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler) -{ - TrampolineCacheKey key = { registersInUse, exceptionHandler, 0, info }; - - auto it = cachedTrampolines.find(key); - if (it != cachedTrampolines.end()) - return it->second; - - const u8* trampoline = GenerateReadTrampoline(info, registersInUse, exceptionHandler); - cachedTrampolines[key] = trampoline; - return trampoline; -} - -const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler) +const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -60,9 +45,7 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B registersInUse[addrReg] = true; registersInUse[dataReg] = false; - // It's a read. Easy. - // RSP alignment here is 8 due to the call. - ABI_PushRegistersAndAdjustStack(registersInUse, 8); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); int dataRegSize = info.operandSize == 8 ? 64 : 32; MOVTwo(dataRegSize, ABI_PARAM1, addrReg, ABI_PARAM2, dataReg); @@ -89,30 +72,17 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B if (dataReg != ABI_RETURN) MOV(dataRegSize, R(dataReg), R(ABI_RETURN)); - ABI_PopRegistersAndAdjustStack(registersInUse, 8); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (exceptionHandler) { TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); J_CC(CC_NZ, exceptionHandler); } - RET(); + JMP(returnPtr, true); return trampoline; } -const u8* TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc) -{ - TrampolineCacheKey key = { registersInUse, exceptionHandler, pc, info }; - - auto it = cachedTrampolines.find(key); - if (it != cachedTrampolines.end()) - return it->second; - - const u8* trampoline = GenerateWriteTrampoline(info, registersInUse, exceptionHandler, pc); - cachedTrampolines[key] = trampoline; - return trampoline; -} - -const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc) +const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc) { if (GetSpaceLeft() < 1024) PanicAlert("Trampoline cache full"); @@ -122,15 +92,13 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, X64Reg dataReg = (X64Reg)info.regOperandReg; X64Reg addrReg = (X64Reg)info.scaledReg; - // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a - // hardware access - we can take shortcuts. // Don't treat FIFO writes specially for now because they require a burst // check anyway. // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(pc)); - ABI_PushRegistersAndAdjustStack(registersInUse, 8); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); if (info.hasImmediate) { @@ -178,38 +146,13 @@ const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo &info, break; } - ABI_PopRegistersAndAdjustStack(registersInUse, 8); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (exceptionHandler) { TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); J_CC(CC_NZ, exceptionHandler); } - RET(); + JMP(returnPtr, true); return trampoline; } - -size_t TrampolineCacheKeyHasher::operator()(const TrampolineCacheKey& k) const -{ - size_t res = std::hash()(k.registersInUse.m_val); - res ^= std::hash()(k.info.operandSize) >> 1; - res ^= std::hash()(k.info.regOperandReg) >> 2; - res ^= std::hash()(k.info.scaledReg) >> 3; - res ^= std::hash()(k.info.immediate) >> 4; - res ^= std::hash()(k.pc) >> 5; - res ^= std::hash()(k.info.displacement) << 1; - res ^= std::hash()(k.info.signExtend) << 2; - res ^= std::hash()(k.info.hasImmediate) << 3; - res ^= std::hash()(k.info.isMemoryWrite) << 4; - res ^= std::hash()(k.exceptionHandler) << 5; - - return res; -} - -bool TrampolineCacheKey::operator==(const TrampolineCacheKey &other) const -{ - return pc == other.pc && - registersInUse == other.registersInUse && - exceptionHandler == other.exceptionHandler && - info == other.info; -} diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h index f2cdb2ba92..9b35950b66 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h @@ -14,34 +14,13 @@ // We need at least this many bytes for backpatching. const int BACKPATCH_SIZE = 5; -struct TrampolineCacheKey -{ - BitSet32 registersInUse; - u8* exceptionHandler; - u32 pc; - InstructionInfo info; - - bool operator==(const TrampolineCacheKey &other) const; -}; - -struct TrampolineCacheKeyHasher -{ - size_t operator()(const TrampolineCacheKey& k) const; -}; - class TrampolineCache : public Gen::X64CodeBlock { public: void Init(); void Shutdown(); - const u8* GetReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler); - const u8* GetWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc); + const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr); + const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u8* returnPtr, u32 pc); void ClearCodeSpace(); - -private: - const u8* GenerateReadTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler); - const u8* GenerateWriteTrampoline(const InstructionInfo &info, BitSet32 registersInUse, u8* exceptionHandler, u32 pc); - - std::unordered_map cachedTrampolines; };