From e3d21c0b11e124dee453825f27f463f442c39b72 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sat, 9 Aug 2008 16:56:24 +0000 Subject: [PATCH] 32-bit speedup (videos mostly affected). Lots of various cleanup and future proofing. A small debugger feature. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@162 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/ABI.cpp | 46 +++- Source/Core/Common/Src/ABI.h | 9 + Source/Core/Common/Src/x64Analyzer.cpp | 17 +- Source/Core/Common/Src/x64Analyzer.h | 2 + Source/Core/Core/Core.vcproj | 4 + Source/Core/Core/Src/Boot/Boot.cpp | 3 - Source/Core/Core/Src/Core.cpp | 13 +- Source/Core/Core/Src/HW/Memmap.cpp | 12 +- Source/Core/Core/Src/HW/SystemTimers.cpp | 12 +- Source/Core/Core/Src/MemTools.cpp | 246 +++++------------- Source/Core/Core/Src/MemTools.h | 1 - Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 + Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 2 + .../Core/Src/PowerPC/Jit64/JitBackpatch.cpp | 51 ++-- .../Core/Src/PowerPC/Jit64/JitBackpatch.h | 17 ++ .../Core/Core/Src/PowerPC/Jit64/JitCache.cpp | 3 +- .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp | 28 +- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 150 +---------- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 204 +++++++++++++++ .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 82 ++++-- Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp | 66 +++-- Source/Core/DebuggerWX/src/CodeWindow.cpp | 44 +++- Source/Core/DebuggerWX/src/CodeWindow.h | 2 + Source/Core/DolphinWX/src/BootManager.cpp | 7 +- Source/Core/DolphinWX/src/Frame.cpp | 27 +- Source/Core/DolphinWX/src/Frame.h | 1 + Source/Core/DolphinWX/src/Globals.h | 1 + 27 files changed, 604 insertions(+), 448 deletions(-) create mode 100644 Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp index 04aafa0bf2..034c2b77e3 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/ABI.cpp @@ -100,6 +100,7 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) } #ifdef _WIN32 + // Win64 Specific Code // ==================================== void ABI_PushAllCalleeSavedRegsAndAdjustStack() { @@ -107,27 +108,54 @@ void ABI_PushAllCalleeSavedRegsAndAdjustStack() { PUSH(RBX); PUSH(RSI); PUSH(RDI); - //PUSH(RBP); + PUSH(RBP); PUSH(R12); PUSH(R13); PUSH(R14); PUSH(R15); //TODO: Also preserve XMM0-3? - SUB(64, R(RSP), Imm8(0x20)); + SUB(64, R(RSP), Imm8(0x28)); } void ABI_PopAllCalleeSavedRegsAndAdjustStack() { - ADD(64, R(RSP), Imm8(0x20)); + ADD(64, R(RSP), Imm8(0x28)); POP(R15); POP(R14); POP(R13); POP(R12); - //POP(RBP); + POP(RBP); POP(RDI); POP(RSI); POP(RBX); } +// Win64 Specific Code +// ==================================== +void ABI_PushAllCallerSavedRegsAndAdjustStack() { + PUSH(RCX); + PUSH(RDX); + PUSH(RSI); + PUSH(RDI); + PUSH(R8); + PUSH(R9); + PUSH(R10); + PUSH(R11); + //TODO: Also preserve XMM0-15? + SUB(64, R(RSP), Imm8(0x28)); +} + +void ABI_PopAllCallerSavedRegsAndAdjustStack() { + ADD(64, R(RSP), Imm8(0x28)); + POP(R11); + POP(R10); + POP(R9); + POP(R8); + POP(RDI); + POP(RSI); + POP(RDX); + POP(RCX); +} + #else // Unix64 Specific Code // ==================================== @@ -151,6 +179,16 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack() { POP(RBX); } +void ABI_PushAllCallerSavedRegsAndAdjustStack() { + INT3(); + //not yet supported +} + +void ABI_PopAllCallerSavedRegsAndAdjustStack() { + INT3(); + //not yet supported +} + #endif #endif diff --git a/Source/Core/Common/Src/ABI.h b/Source/Core/Common/Src/ABI.h index 632df334ef..feb4431531 100644 --- a/Source/Core/Common/Src/ABI.h +++ b/Source/Core/Common/Src/ABI.h @@ -92,8 +92,17 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2); void ABI_CallFunctionR(void *func, Gen::X64Reg reg1); void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2); +// A function that doesn't have any control over what it will do to regs, +// such as the dispatcher, should be surrounded by these. void ABI_PushAllCalleeSavedRegsAndAdjustStack(); void ABI_PopAllCalleeSavedRegsAndAdjustStack(); +// A function that doesn't know anything about it's surroundings, should +// be surrounded by these to establish a safe environment, where it can roam free. +// An example is a backpatch injected function. +void ABI_PushAllCallerSavedRegsAndAdjustStack(); +void ABI_PopAllCallerSavedRegsAndAdjustStack(); + + #endif // _JIT_ABI_H diff --git a/Source/Core/Common/Src/x64Analyzer.cpp b/Source/Core/Common/Src/x64Analyzer.cpp index c1e0b5f2dd..07434757b8 100644 --- a/Source/Core/Common/Src/x64Analyzer.cpp +++ b/Source/Core/Common/Src/x64Analyzer.cpp @@ -26,6 +26,9 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc //Check for regular prefix info.operandSize = 4; info.zeroExtend = false; + info.signExtend = false; + info.hasImmediate = false; + info.isMemoryWrite = false; int addressSize = 8; u8 modRMbyte = 0; @@ -33,7 +36,6 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc bool hasModRM = false; bool hasSIBbyte = false; bool hasDisplacement = false; - info.hasImmediate = false; int displacementSize = 0; @@ -136,6 +138,7 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc if (accessType == 1) { + info.isMemoryWrite = true; //Write access switch (codeByte) { @@ -179,7 +182,9 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc } else { - //mov eax,dword ptr [rax] == 8b 00 + // Memory read + + //mov eax, dword ptr [rax] == 8b 00 switch (codeByte) { case 0x0F: @@ -193,6 +198,14 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc info.zeroExtend = true; info.operandSize = 2; break; + case 0xBE: //movsx on byte + info.signExtend = true; + info.operandSize = 1; + break; + case 0xBF: + info.signExtend = true; + info.operandSize = 2; + break; default: return false; } diff --git a/Source/Core/Common/Src/x64Analyzer.h b/Source/Core/Common/Src/x64Analyzer.h index 3dd00f6c7c..68c52ca932 100644 --- a/Source/Core/Common/Src/x64Analyzer.h +++ b/Source/Core/Common/Src/x64Analyzer.h @@ -27,7 +27,9 @@ struct InstructionInfo int otherReg; int scaledReg; bool zeroExtend; + bool signExtend; bool hasImmediate; + bool isMemoryWrite; u64 immediate; s32 displacement; }; diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj index 582d1e93d1..ef32602182 100644 --- a/Source/Core/Core/Core.vcproj +++ b/Source/Core/Core/Core.vcproj @@ -879,6 +879,10 @@ RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStore.cpp" > + + diff --git a/Source/Core/Core/Src/Boot/Boot.cpp b/Source/Core/Core/Src/Boot/Boot.cpp index c17d76fe66..507b31da8d 100644 --- a/Source/Core/Core/Src/Boot/Boot.cpp +++ b/Source/Core/Core/Src/Boot/Boot.cpp @@ -185,10 +185,7 @@ void CBoot::EmulatedBIOS(bool _bDebug) // return PC = PowerPC::ppcState.gpr[3]; - // // --- preinit some stuff from bios --- - // - // Bus Clock Speed Memory::Write_U32(0x09a7ec80, 0x800000F8); diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index 716237a17d..01cf7f3e35 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -186,9 +186,16 @@ THREAD_RETURN CpuThread(void *pArg) if (_CoreParameter.bLockThreads) Common::Thread::SetCurrentThreadAffinity(1); //Force to first core - // Let's run under memory watch - EMM::InstallExceptionHandler(); - // StartConsoleThread(); + if (_CoreParameter.bUseFastMem) + { +#ifdef _M_X64 + // Let's run under memory watch + EMM::InstallExceptionHandler(); +#else + PanicAlert("32-bit platforms do not support fastmem yet. Report this bug."); +#endif + } + CCPU::Run(); if (_CoreParameter.bRunCompareServer || _CoreParameter.bRunCompareClient) diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp index 31d2d92d30..3d67e37da9 100644 --- a/Source/Core/Core/Src/HW/Memmap.cpp +++ b/Source/Core/Core/Src/HW/Memmap.cpp @@ -144,7 +144,7 @@ template void HWCALL HW_Write_Memory(T _Data, const u32 _Addres void InitHWMemFuncs() { - for (int i=0; i; hwWrite16[i] = HW_Default_Write; @@ -156,7 +156,7 @@ void InitHWMemFuncs() hwRead64 [i] = HW_Default_Read; } - for (int i=0; i - #include #include "Common.h" @@ -32,179 +31,6 @@ namespace EMM { -/* DESIGN - -THIS IS NOT THE CURRENT STATE OF THIS FILE - IT'S UNFINISHED - -We grab 4GB of virtual address space, and locate memories in there. The memories are either -VirtualAlloc or mapped swapfile. - -I/O areas are mapped into the virtual memspace, and VirtualProtected where necessary. - -Every chunk is mapped twice into memory, once into the virtual memspace, and once elsewhere. -This second mapping is used when a "read+writable" pointer is requested for a region. This -would generally be for internal use by IO functions, and for actually performing the writes -and reads after detecting them. - -There is individual read and write protection for each chunk of memory. - -Every region has a default read-write handler. If an exception is caught, this is executed. - -The default read-write handlers use the "writable" pointers. - -There should be a method to mark a region for "write notification". Dynarecs can use this -to flush their code caches if a region is written to. - -At this moment, there can only be one wrapped memspace at a time. -*/ - -DWORD_PTR memspaceBottom = 0; -DWORD_PTR memspaceTop = 0; - -enum MSFlags -{ - MEMSPACE_MIRROR_FIRST_PART = 1, - MEMSPACE_MIRROR_OF_PREVIOUS = 2, - MEMSPACE_MAPPED_HARDWARE = 4, -}; - -struct MemSpaceEntry -{ - u64 emulatedBase; - u64 emulatedSize; - u32 flags; -}; - -#define MEGABYTE 1024*1024 - -const MemSpaceEntry GCMemSpace[] = -{ - {0x80000000, 24*MEGABYTE, MEMSPACE_MIRROR_FIRST_PART}, - {0xC0000000, 24*MEGABYTE, MEMSPACE_MIRROR_OF_PREVIOUS}, - {0xCC000000, 0x10000, MEMSPACE_MAPPED_HARDWARE}, - {0xE0000000, 0x4000, 0}, //cache -}; - -struct Watch -{ - int ID; - EAddr startAddr; - EAddr endAddr; - WR watchFor; - WatchCallback callback; - WatchType type; - u64 userData; -}; - -std::vector watches; - -void UpdateProtection(EAddr startAddr, EAddr endAddr) -{ - -} - -int AddWatchRegion(EAddr startAddr, EAddr endAddr, WR watchFor, WatchType type, WatchCallback callback, u64 userData) -{ - static int watchIDGen = 0; - - Watch watch; - watch.ID = watchIDGen++; - watch.startAddr = startAddr; - watch.endAddr = endAddr; - watch.watchFor = watchFor; - watch.callback = callback; - watch.userData = userData; - watch.type = type; - watches.push_back(watch); - UpdateProtection(startAddr, endAddr); - - return watch.ID; -} - -void Notify(EAddr address, WR action) -{ - for (std::vector::iterator iter = watches.begin(); iter != watches.end(); ++iter) - { - if (action & iter->type) - { - if (address >= iter->startAddr && address < iter->endAddr) - { - //Alright! - iter->callback(address, Access32 /*TODO*/, action, iter->ID); - } - } - } -} - - -class MemSpace -{ - MemSpaceEntry *entries; - - u64 emulatedBottom; - u64 emulatedTop; - u64 emulatedSize; - - void *virtualBase; - -public: - - void Init(const MemSpaceEntry *e, int count) - { - /* - //first pass: figure out minimum address, and total amount of allocated memory - emulatedBase = 0xFFFFFFFFFFFFFFFFL; - emulatedTop = 0; - - u64 mappedTotal = 0; - for (int i=0; i emulatedTop) - emulatedTop = e[i].emulatedBase+e[i].emulatedSize; - if (e[i].flags & MEMSPACE_MIRROR_FIRST_PART) - { - mappedTotal += e[i].emulatedSize; - } - } - emulatedSize = emulatedTop - emulatedBase; - - // The above stuff is not used atm - we just grab 4G - - //second pass: grab 4G of virtual address space - virtualBase = VirtualAlloc(0, 0x100000000L, MEM_RESERVE, PAGE_READWRITE); - - //also grab a bunch of virtual memory while we're at it - - - //Release the 4G space! - //Let's hope no weirdo thread klomps in here and grabs it - VirtualFree(base, 0, MEM_RELEASE); - - for (int i=0; i %s\n", strings[i]); + free(strings); +} + +void sigsegv_handler(int signal, int siginfo_t *info, void *raw_context) +{ + if (signal != SIGSEGV) + { + // We are not interested in other signals - handle it as usual. + return; + } + ucontext_t *context = (ucontext_t)raw_context; + int si_code = info->si_code; + if (si_code != SEGV_MAPERR) + { + // Huh? Return. + return; + } + mcontext_t *ctx = &context->uc_mcontext; + void *fault_memory_ptr = (void *)info->si_addr; + void *fault_instruction_ptr = (void *)ctx->mc_rip; + + if (!Jit64::IsInJitCode(fault_instruction_ptr)) { + // Let's not prevent debugging. + return; + } + + u64 memspaceBottom = (u64)Memory::base; + if (badAddress < memspaceBottom) { + PanicAlert("Exception handler - access below memory space. %08x%08x", + badAddress >> 32, badAddress); + } + u32 emAddress = (u32)(badAddress - memspaceBottom); + + // Backpatch time. + Jit64::BackPatch(fault_instruction_ptr, accessType, emAddress); +} + +#endif + void InstallExceptionHandler() { -/* +#ifdef _M_IX86 + PanicAlert("InstallExceptionHandler called, but this platform does not yet support it."); + return; +#endif + +#if 0 + sighandler_t old_signal_handler = signal(SIGSEGV , sigsegv_handler); + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = sigsegv_handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, NULL); +#endif + + /* * signal(xyz); */ } diff --git a/Source/Core/Core/Src/MemTools.h b/Source/Core/Core/Src/MemTools.h index 903e9c3aa7..c3d847bbcf 100644 --- a/Source/Core/Core/Src/MemTools.h +++ b/Source/Core/Core/Src/MemTools.h @@ -69,4 +69,3 @@ void WriteHandler32(EAddr address, u32 value); void WriteHandler64(EAddr address, u64 value); #endif - diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 01cfb6fe2c..ed5f7bc49a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -24,6 +24,7 @@ #include "../PPCAnalyst.h" #include "JitCache.h" +#include "x64Emitter.h" namespace Jit64 { @@ -85,6 +86,7 @@ namespace Jit64 void FlushRegCaches(); + void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset); void addx(UGeckoInstruction inst); void orx(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 204188e658..166864f3ee 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -14,6 +14,8 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ + +#include "ABI.h" #include "x64Emitter.h" #include "../../HW/Memmap.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp index c6b5b76b56..48c3ff56bb 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.cpp @@ -1,3 +1,20 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + #include #include "Common.h" @@ -36,6 +53,10 @@ void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { return; } +// This generates some fairly heavy trampolines, but: +// 1) It's really necessary. We don't know anything about the context. +// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be +// that many of them in a typical program/game. void BackPatch(u8 *codePtr, int accessType, u32 emAddress) { if (!IsInJitCode(codePtr)) @@ -48,6 +69,10 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress) if (!DisassembleMov(codePtr, info, accessType)) { BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress); } + if (info.isMemoryWrite) { + BackPatchError("BackPatch - determined that MOV is write, not yet supported and should have been caught before", + codePtr, emAddress); + } if (info.operandSize != 4) { BackPatchError(StringFromFormat("BackPatch - no support for operand size %i", info.operandSize), codePtr, emAddress); } @@ -70,19 +95,10 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress) u8 *trampoline = trampolineCodePtr; SetCodePtr(trampolineCodePtr); // * Save all volatile regs - PUSH(RCX); - PUSH(RDX); - PUSH(RSI); - PUSH(RDI); - PUSH(R8); - PUSH(R9); - PUSH(R10); - PUSH(R11); - //TODO: Also preserve XMM0-3? - SUB(64, R(RSP), Imm8(0x20)); + ABI_PushAllCallerSavedRegsAndAdjustStack(); // * Set up stack frame. // * Call ReadMemory32 - //LEA(32, ECX, MDisp((X64Reg)addrReg, info.displacement)); + //LEA(32, ABI_PARAM1, MDisp((X64Reg)addrReg, info.displacement)); MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); if (info.displacement) { ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); @@ -91,7 +107,8 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress) //case 1: // CALL((void *)&Memory::Read_U8); // break; - case 4: + case 4: + // THIS FUNCTION CANNOT TOUCH FLOATING POINT REGISTERS. CALL((void *)&Memory::Read_U32); break; default: @@ -99,15 +116,7 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress) break; } // * Tear down stack frame. - ADD(64, R(RSP), Imm8(0x20)); - POP(R11); - POP(R10); - POP(R9); - POP(R8); - POP(RDI); - POP(RSI); - POP(RDX); - POP(RCX); + ABI_PopAllCallerSavedRegsAndAdjustStack(); MOV(32, R(dataReg), R(EAX)); RET(); trampolineCodePtr = GetWritableCodePtr(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.h b/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.h index 1cf07196ab..e64439983c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitBackpatch.h @@ -1,3 +1,20 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + #ifndef _JITBACKPATCH_H #define _JITBACKPATCH_H diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp index 709ce123d3..b058a79316 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitCache.cpp @@ -17,6 +17,7 @@ #include #include "Common.h" +#include "../../Core.h" #include "MemoryUtil.h" #include "../../HW/Memmap.h" @@ -78,7 +79,7 @@ namespace Jit64 jo.optimizeStack = true; jo.enableBlocklink = true; // Speed boost, but not 100% safe #ifdef _M_X64 - jo.enableFastMem = true; + jo.enableFastMem = Core::GetStartupParameter().bUseFastMem; #else jo.enableFastMem = false; #endif diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 5cd622ee66..416f147c7f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -28,6 +28,14 @@ // The branches are known good, or at least reasonably good. // No need for a disable-mechanism. +// If defined, clears CR0 at blr and bl-s. If the assumption that +// flags never carry over between functions holds, then the task for +// an optimizer becomes much easier. + +// #define ACID_TEST + +// Zelda and many more games seem to pass the Acid Test. + using namespace Gen; namespace Jit64 { @@ -70,11 +78,16 @@ namespace Jit64 destination = SignExt26(inst.LI << 2); else destination = js.compilerPC + SignExt26(inst.LI << 2); +#ifdef ACID_TEST + if (inst.LK) + AND(32, M(&CR), Imm32(~(0xFF000000))); +#endif WriteExit(destination, 0); - } //else we were merged with the next block, we only need the link above, if that + } else { - PanicAlert("bx not last instruction of block"); // this should not happen atm + // TODO: investigate the good old method of merging blocks here. + PanicAlert("bx not last instruction of block"); // this should not happen } } @@ -171,18 +184,21 @@ namespace Jit64 if((inst.BO & 16) == 0) { + PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex); _assert_msg_(DYNA_REC, 0, "Bizarro bcctrx"); + /* fastway = false; MOV(32, M(&PC), Imm32(js.compilerPC+4)); MOV(32, R(EAX), M(&CR)); XOR(32, R(ECX), R(ECX)); - AND(32, R(EAX), Imm32(0x80000000>>inst.BI)); + AND(32, R(EAX), Imm32(0x80000000 >> inst.BI)); CCFlags branch; if(inst.BO & 8) branch = CC_NZ; else branch = CC_Z; + */ // TODO(ector): Why is this commented out? //SETcc(branch, R(ECX)); // check for EBX @@ -205,11 +221,17 @@ namespace Jit64 if (inst.hex == 0x4e800020) { //CDynaRegCache::Flush(); + // This below line can be used to prove that blr "eats flags" in practice. + // This observation will let us do a lot of fun observations. +#ifdef ACID_TEST + AND(32, M(&CR), Imm32(~(0xFF000000))); +#endif MOV(32, R(EAX), M(&LR)); MOV(32, M(&PC), R(EAX)); WriteExitDestInEAX(0); return; } + // Call interpreter Default(inst); MOV(32, R(EAX), M(&NPC)); WriteExitDestInEAX(0); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index d1dcba61c2..d5681638ca 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -139,7 +139,7 @@ namespace Jit64 } //Still here? Do regular path. -#if defined(_M_X64) && defined(_WIN32) +#if defined(_M_X64) if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) { #else if (true) { @@ -173,154 +173,6 @@ namespace Jit64 gpr.UnlockAll(); } - void lfs(UGeckoInstruction inst) - { - INSTRUCTION_START; - int d = inst.RD; - int a = inst.RA; - if (!a) - { - Default(inst); - return; - } - s32 offset = (s32)(s16)inst.SIMM_16; - - gpr.Flush(FLUSH_VOLATILE); - gpr.Lock(d, a); - - MOV(32, R(ABI_PARAM1), gpr.R(a)); -#ifdef _M_X64 - if (!jo.noAssumeFPLoadFromMem) - { - MOV(32, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); -//#else -// MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::GetMainRAMPtr() + (u32)offset)); -//#endif - BSWAP(32, EAX); - } - else -#endif - { - SafeLoadRegToEAX(ABI_PARAM1, 32, offset); - } - - MOV(32, M(&temp32), R(EAX)); - fpr.Lock(d); - fpr.LoadToX64(d, false); - CVTSS2SD(fpr.RX(d), M(&temp32)); - MOVDDUP(fpr.RX(d), fpr.R(d)); - gpr.UnlockAll(); - fpr.UnlockAll(); - } - - void lfd(UGeckoInstruction inst) - { - INSTRUCTION_START; - DISABLE_32BIT; - int d = inst.RD; - int a = inst.RA; - if (!a) - { - Default(inst); - return; - } - s32 offset = (s32)(s16)inst.SIMM_16; - gpr.Lock(a); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); - BSWAP(64,EAX); - MOV(64, M(&temp64), R(EAX)); - fpr.Lock(d); - fpr.LoadToX64(d, false); - MOVSD(fpr.RX(d), M(&temp64)); - MOVDDUP(fpr.RX(d), fpr.R(d)); - gpr.UnlockAll(); - fpr.UnlockAll(); - } - - void stfd(UGeckoInstruction inst) - { - INSTRUCTION_START; - DISABLE_32BIT; - int s = inst.RS; - int a = inst.RA; - if (!a) - { - Default(inst); - return; - } - s32 offset = (s32)(s16)inst.SIMM_16; - gpr.Lock(a); - fpr.Lock(s); - fpr.LoadToX64(s, true, false); - MOVSD(M(&temp64), fpr.RX(s)); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - MOV(64, R(EAX), M(&temp64)); - BSWAP(64, EAX); - MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX)); - gpr.UnlockAll(); - fpr.UnlockAll(); - } - - void stfs(UGeckoInstruction inst) - { - INSTRUCTION_START; - DISABLE_32BIT; - bool update = inst.OPCD & 1; - int s = inst.RS; - int a = inst.RA; - s32 offset = (s32)(s16)inst.SIMM_16; - - if (a && !update) - { - gpr.Flush(FLUSH_VOLATILE); - gpr.Lock(a); - fpr.Lock(s); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); - if (update && offset) - { - MOV(32, gpr.R(a), R(ABI_PARAM2)); - } - CVTSD2SS(XMM0, fpr.R(s)); - MOVSS(M(&temp32), XMM0); - MOV(32, R(ABI_PARAM1), M(&temp32)); - - TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); - BSWAP(32, ABI_PARAM1); - MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - CALL((void *)&Memory::Write_U32); - SetJumpTarget(arg2); - gpr.UnlockAll(); - fpr.UnlockAll(); - } - else - { - Default(inst); - } - } - - void lfsx(UGeckoInstruction inst) - { - INSTRUCTION_START; - DISABLE_32BIT; - fpr.Lock(inst.RS); - fpr.LoadToX64(inst.RS, false, true); - MOV(32, R(EAX), gpr.R(inst.RB)); - if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - MOV(32, R(EAX), MComplex(RBX, EAX, SCALE_1, 0)); - BSWAP(32, EAX); - MOV(32, M(&temp32), R(EAX)); - CVTSS2SD(XMM0, M(&temp32)); - MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); - fpr.UnlockAll(); - } - // Zero cache line. void dcbz(UGeckoInstruction inst) { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp new file mode 100644 index 0000000000..56a776831f --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -0,0 +1,204 @@ +// Copyright (C) 2003-2008 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only. +// Should give a very noticable speed boost to paired single heavy code. + +#include "Common.h" + +#include "../PowerPC.h" +#include "../../Core.h" +#include "../../HW/GPFifo.h" +#include "../../HW/CommandProcessor.h" +#include "../../HW/PixelEngine.h" +#include "../../HW/Memmap.h" +#include "../PPCTables.h" +#include "x64Emitter.h" +#include "ABI.h" + +#include "Jit.h" +#include "JitCache.h" +#include "JitAsm.h" +#include "JitRegCache.h" + +// #define INSTRUCTION_START Default(inst); return; +#define INSTRUCTION_START + +#ifdef _M_IX86 +#define DISABLE_32BIT Default(inst); return; +#else +#define DISABLE_32BIT ; +#endif + +namespace Jit64 +{ + +static u64 GC_ALIGNED16(temp64); +static u32 GC_ALIGNED16(temp32); + +// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common, +// and pshufb could help a lot. +// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves. + +void lfs(UGeckoInstruction inst) +{ + INSTRUCTION_START; + int d = inst.RD; + int a = inst.RA; + if (!a) + { + Default(inst); + return; + } + s32 offset = (s32)(s16)inst.SIMM_16; + + gpr.Flush(FLUSH_VOLATILE); + gpr.Lock(d, a); + + MOV(32, R(ABI_PARAM1), gpr.R(a)); +#ifdef _M_X64 + if (!jo.noAssumeFPLoadFromMem) + { + MOV(32, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); +//#else +// MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::GetMainRAMPtr() + (u32)offset)); +//#endif + BSWAP(32, EAX); + } + else +#endif + { + SafeLoadRegToEAX(ABI_PARAM1, 32, offset); + } + + MOV(32, M(&temp32), R(EAX)); + fpr.Lock(d); + fpr.LoadToX64(d, false); + CVTSS2SD(fpr.RX(d), M(&temp32)); + MOVDDUP(fpr.RX(d), fpr.R(d)); + gpr.UnlockAll(); + fpr.UnlockAll(); +} +void lfd(UGeckoInstruction inst) +{ + INSTRUCTION_START; + DISABLE_32BIT; + int d = inst.RD; + int a = inst.RA; + if (!a) + { + Default(inst); + return; + } + s32 offset = (s32)(s16)inst.SIMM_16; + gpr.Lock(a); + MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); + BSWAP(64, EAX); + MOV(64, M(&temp64), R(EAX)); + fpr.Lock(d); + fpr.LoadToX64(d, false); + MOVSD(fpr.RX(d), M(&temp64)); + MOVDDUP(fpr.RX(d), fpr.R(d)); + gpr.UnlockAll(); + fpr.UnlockAll(); +} + +void stfd(UGeckoInstruction inst) +{ + INSTRUCTION_START; + DISABLE_32BIT; + int s = inst.RS; + int a = inst.RA; + if (!a) + { + Default(inst); + return; + } + s32 offset = (s32)(s16)inst.SIMM_16; + gpr.Lock(a); + fpr.Lock(s); + fpr.LoadToX64(s, true, false); + MOVSD(M(&temp64), fpr.RX(s)); + MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(64, R(EAX), M(&temp64)); + BSWAP(64, EAX); + MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX)); + gpr.UnlockAll(); + fpr.UnlockAll(); +} + +void stfs(UGeckoInstruction inst) +{ + INSTRUCTION_START; + DISABLE_32BIT; + bool update = inst.OPCD & 1; + int s = inst.RS; + int a = inst.RA; + s32 offset = (s32)(s16)inst.SIMM_16; + + if (a && !update) + { + gpr.Flush(FLUSH_VOLATILE); + gpr.Lock(a); + fpr.Lock(s); + MOV(32, R(ABI_PARAM2), gpr.R(a)); + if (offset) + ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); + if (update && offset) + { + MOV(32, gpr.R(a), R(ABI_PARAM2)); + } + CVTSD2SS(XMM0, fpr.R(s)); + MOVSS(M(&temp32), XMM0); + MOV(32, R(ABI_PARAM1), M(&temp32)); + + TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); + FixupBranch argh = J_CC(CC_NZ); + BSWAP(32, ABI_PARAM1); + MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); + FixupBranch arg2 = J(); + SetJumpTarget(argh); + CALL((void *)&Memory::Write_U32); + SetJumpTarget(arg2); + gpr.UnlockAll(); + fpr.UnlockAll(); + } + else + { + Default(inst); + } +} + +void lfsx(UGeckoInstruction inst) +{ + INSTRUCTION_START; + DISABLE_32BIT; + fpr.Lock(inst.RS); + fpr.LoadToX64(inst.RS, false, true); + MOV(32, R(EAX), gpr.R(inst.RB)); + if (inst.RA) + ADD(32, R(EAX), gpr.R(inst.RA)); + MOV(32, R(EAX), MComplex(RBX, EAX, SCALE_1, 0)); + BSWAP(32, EAX); + MOV(32, M(&temp32), R(EAX)); + CVTSS2SD(XMM0, M(&temp32)); + MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); + fpr.UnlockAll(); +} + +} // namespace diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 515c2962b4..88e2ff381b 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -35,8 +35,8 @@ #include "JitAsm.h" #include "JitRegCache.h" -// #define INSTRUCTION_START -#define INSTRUCTION_START Default(inst); return; +// #define INSTRUCTION_START Default(inst); return; +#define INSTRUCTION_START #ifdef _M_IX86 #define DISABLE_32BIT Default(inst); return; @@ -56,7 +56,7 @@ void WriteDual32(u64 value, u32 address) Memory::Write_U32((u32)value, address + 4); } -static const double GC_ALIGNED16(m_quantizeTableD[]) = +const double GC_ALIGNED16(m_quantizeTableD[]) = { (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), @@ -76,7 +76,7 @@ static const double GC_ALIGNED16(m_quantizeTableD[]) = 1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1), }; -static const double GC_ALIGNED16(m_dequantizeTableD[]) = +const double GC_ALIGNED16(m_dequantizeTableD[]) = { 1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3), 1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7), @@ -101,7 +101,6 @@ static const double GC_ALIGNED16(m_dequantizeTableD[]) = void psq_st(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) { Default(inst); @@ -124,6 +123,7 @@ void psq_st(UGeckoInstruction inst) if (stType == QUANTIZE_FLOAT) { + DISABLE_32BIT; gpr.Flush(FLUSH_VOLATILE); gpr.Lock(a); fpr.Lock(s); @@ -151,7 +151,10 @@ void psq_st(UGeckoInstruction inst) } else if (stType == QUANTIZE_U8) { - gpr.Flush(FLUSH_VOLATILE); + gpr.FlushR(ABI_PARAM1); + gpr.FlushR(ABI_PARAM2); + gpr.LockX(ABI_PARAM1); + gpr.LockX(ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); if (update) @@ -172,17 +175,22 @@ void psq_st(UGeckoInstruction inst) #ifdef _M_X64 MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); #else - BSWAP(32, ABI_PARAM1); - SHR(32, R(ABI_PARAM1), Imm8(16)); - CALL(&Memory::Write_U16); + MOV(32, R(EAX), R(ABI_PARAM2)); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1)); #endif if (update) MOV(32, gpr.R(a), R(ABI_PARAM2)); gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } else if (stType == QUANTIZE_S16) { + gpr.FlushR(ABI_PARAM1); + gpr.FlushR(ABI_PARAM2); + gpr.LockX(ABI_PARAM1); + gpr.LockX(ABI_PARAM2); gpr.Lock(a); fpr.Lock(s); if (update) @@ -200,15 +208,16 @@ void psq_st(UGeckoInstruction inst) PACKSSDW(XMM0, R(XMM0)); MOVD_xmm(M(&temp64), XMM0); MOV(32, R(ABI_PARAM1), M(&temp64)); -#ifdef _M_X64 BSWAP(32, ABI_PARAM1); +#ifdef _M_X64 MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); #else - BSWAP(32, ABI_PARAM1); - PUSH(32, R(ABI_PARAM1)); - CALL(&Memory::Write_U32); + MOV(32, R(EAX), R(ABI_PARAM2)); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(32, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1)); #endif gpr.UnlockAll(); + gpr.UnlockAllX(); fpr.UnlockAll(); } else { @@ -223,7 +232,6 @@ void psq_st(UGeckoInstruction inst) void psq_l(UGeckoInstruction inst) { INSTRUCTION_START; - DISABLE_32BIT; if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) { Default(inst); @@ -241,11 +249,10 @@ void psq_l(UGeckoInstruction inst) return; } int offset = inst.SIMM_12; - //INT3(); switch (ldType) { -#ifdef _M_X64 case QUANTIZE_FLOAT: { +#ifdef _M_X64 gpr.LoadToX64(inst.RA); MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); BSWAP(64, RAX); @@ -253,17 +260,42 @@ void psq_l(UGeckoInstruction inst) fpr.LoadToX64(inst.RS, false); X64Reg r = fpr.R(inst.RS).GetSimpleReg(); CVTPS2PD(r, M(&psTemp[0])); - SHUFPD(r, R(r),1); + SHUFPD(r, R(r), 1); if (update) ADD(32, gpr.R(inst.RA), Imm32(offset)); break; +#else + gpr.FlushR(ECX); + gpr.LockX(ECX); + gpr.LoadToX64(inst.RA); + // This can probably be optimized somewhat. + LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); + AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base)); + BSWAP(32, RAX); + MOV(32, M(&psTemp[0]), R(RAX)); + MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4)); + BSWAP(32, RAX); + MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX)); + fpr.LoadToX64(inst.RS, false); + X64Reg r = fpr.R(inst.RS).GetSimpleReg(); + CVTPS2PD(r, M(&psTemp[0])); + if (update) + ADD(32, gpr.R(inst.RA), Imm32(offset)); + gpr.UnlockAllX(); + break; +#endif } - case QUANTIZE_U8: { gpr.LoadToX64(inst.RA); - XOR(32, R(EAX), R(EAX)); - MOV(16, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); +#ifdef _M_X64 + MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); +#else + LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base)); +#endif MOV(32, M(&temp64), R(EAX)); MOVD_xmm(XMM0, M(&temp64)); // SSE4 optimization opportunity here. @@ -279,11 +311,16 @@ void psq_l(UGeckoInstruction inst) ADD(32, gpr.R(inst.RA), Imm32(offset)); } break; - case QUANTIZE_S16: { gpr.LoadToX64(inst.RA); +#ifdef _M_X64 MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); +#else + LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base)); +#endif BSWAP(32, EAX); MOV(32, M(&temp64), R(EAX)); //INT3(); @@ -308,12 +345,11 @@ void psq_l(UGeckoInstruction inst) MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD)); MOVDDUP(r, MComplex(RCX, EAX, 8, 0)); */ -#endif default: // 4 0 // 6 0 //power tennis // 5 0 - //PanicAlert("ld:%i %i", ldType, (int)inst.W); + // PanicAlert("ld:%i %i", ldType, (int)inst.W); Default(inst); return; } diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index 2bc0a432a6..2b4e8d3120 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -292,8 +292,10 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & { st.isFirstBlockOfFunction = true; } + gpa.any = true; fpa.any = false; + enum Todo { JustCopy = 0, Flatten = 1, Nothing = 2 @@ -307,7 +309,6 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & if (iter != functions.end()) { SFunction &f = iter->second; - if (f.flags & FFLAG_LEAF) { //no reason to flatten @@ -394,12 +395,14 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & return 0; } else + { return 0; + } // Do analysis of the code, look for dependencies etc int numSystemInstructions = 0; - for (int i=0; i<32; i++) + for (int i = 0; i < 32; i++) { gpa.firstRead[i] = -1; gpa.firstWrite[i] = -1; @@ -408,7 +411,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & } gpa.any = true; - for (size_t i=0; iflags; if (flags & FL_TIMER) @@ -424,7 +427,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & // Does the instruction output CR0? if (flags & FL_RC_BIT) - code[i].outputCR0 = inst.hex&1; //todo fix + code[i].outputCR0 = inst.hex & 1; //todo fix else if ((flags & FL_SET_CRn) && inst.CRFD == 0) code[i].outputCR0 = true; else @@ -432,18 +435,18 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & // Does the instruction output CR1? if (flags & FL_RC_BIT_F) - code[i].outputCR1 = inst.hex&1; //todo fix + code[i].outputCR1 = inst.hex & 1; //todo fix else if ((flags & FL_SET_CRn) && inst.CRFD == 1) code[i].outputCR1 = true; else code[i].outputCR1 = (flags & FL_SET_CR1) ? true : false; - for (int j=0; j<3; j++) + for (int j = 0; j < 3; j++) { code[i].fregsIn[j] = -1; code[i].regsIn[j] = -1; } - for (int j=0; j<2; j++) + for (int j = 0; j < 2; j++) code[i].regsOut[j] = -1; code[i].fregOut=-1; @@ -485,14 +488,21 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats & break; case OPTYPE_LOADFP: break; - + case OPTYPE_BRANCH: + if (code[i].inst.hex == 0x4e800020) + { + // For analysis purposes, we can assume that blr eats flags. + code[i].outputCR0 = true; + code[i].outputCR1 = true; + } + break; case OPTYPE_SYSTEM: case OPTYPE_SYSTEMFP: numSystemInstructions++; break; } - for (int j=0; jsize * 4; - } } else break; @@ -656,7 +667,7 @@ void PPCAnalyst::FindFunctionsAfterBLR() void PPCAnalyst::FindFunctions(u32 startAddr, u32 endAddr) { //Step 1: Find all functions - FindFunctionsFromBranches(startAddr,endAddr); + FindFunctionsFromBranches(startAddr, endAddr); LOG(HLE,"Memory scan done. Found %i functions.",functions.size()); @@ -861,30 +872,29 @@ bool PPCAnalyst::SaveFuncDB(const TCHAR *filename) bool PPCAnalyst::LoadFuncDB(const TCHAR *filename) { - FILE *f = fopen(filename,"rb"); + FILE *f = fopen(filename, "rb"); if (!f) { - LOG(HLE,"Database load failed"); + LOG(HLE, "Database load failed"); return false; } - u32 fcount=0; - fread(&fcount,4,1,f); - for (size_t i=0; isecond)); iter++; diff --git a/Source/Core/DebuggerWX/src/CodeWindow.cpp b/Source/Core/DebuggerWX/src/CodeWindow.cpp index b878dff18d..a2f77dbc4a 100644 --- a/Source/Core/DebuggerWX/src/CodeWindow.cpp +++ b/Source/Core/DebuggerWX/src/CodeWindow.cpp @@ -39,6 +39,7 @@ #include "Debugger/PPCDebugInterface.h" #include "Debugger/Debugger_SymbolMap.h" +#include "PowerPC/PPCAnalyst.h" #include "Core.h" #include "LogManager.h" @@ -64,6 +65,8 @@ BEGIN_EVENT_TABLE(CCodeWindow, wxFrame) EVT_MENU(IDM_REGISTERWINDOW, CCodeWindow::OnToggleRegisterWindow) EVT_MENU(IDM_BREAKPOINTWINDOW, CCodeWindow::OnToggleBreakPointWindow) EVT_MENU(IDM_MEMORYWINDOW, CCodeWindow::OnToggleMemoryWindow) + + EVT_MENU(IDM_SCANFUNCTIONS, CCodeWindow::OnSymbolsMenu) // toolbar EVT_MENU(IDM_DEBUG_GO, CCodeWindow::OnCodeStep) EVT_MENU(IDM_STEP, CCodeWindow::OnCodeStep) @@ -195,14 +198,14 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam wxMenuBar* pMenuBar = new wxMenuBar(wxMB_DOCKABLE); { - wxMenu* pDebugMenu = new wxMenu; - wxMenuItem* interpreter = pDebugMenu->Append(IDM_INTERPRETER, _T("&Interpreter"), wxEmptyString, wxITEM_CHECK); + wxMenu* pCoreMenu = new wxMenu; + wxMenuItem* interpreter = pCoreMenu->Append(IDM_INTERPRETER, _T("&Interpreter"), wxEmptyString, wxITEM_CHECK); interpreter->Check(!_LocalCoreStartupParameter.bUseDynarec); // wxMenuItem* dualcore = pDebugMenu->Append(IDM_DUALCORE, _T("&DualCore"), wxEmptyString, wxITEM_CHECK); // dualcore->Check(_LocalCoreStartupParameter.bUseDualCore); - pMenuBar->Append(pDebugMenu, _T("&Core Startup")); + pMenuBar->Append(pCoreMenu, _T("&Core Startup")); } { @@ -225,6 +228,11 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam pMenuBar->Append(pDebugDialogs, _T("&Views")); } + { + wxMenu *pSymbolsMenu = new wxMenu; + pSymbolsMenu->Append(IDM_SCANFUNCTIONS, _T("&Scan for functions")); + pMenuBar->Append(pSymbolsMenu, _T("&Symbols")); + } SetMenuBar(pMenuBar); } @@ -246,6 +254,23 @@ void CCodeWindow::JumpToAddress(u32 _Address) codeview->Center(_Address); } +void CCodeWindow::OnSymbolsMenu(wxCommandEvent& event) +{ + if (Core::GetState() == Core::CORE_UNINITIALIZED) + { + // TODO: disable menu items instead :P + return; + } + switch (event.GetId()) + { + case IDM_SCANFUNCTIONS: + PPCAnalyst::FindFunctions(0x80003100, 0x80400000); + PPCAnalyst::LoadFuncDB("data/totaldb.dsy"); + Debugger::GetFromAnalyzer(); + NotifyMapLoaded(); + break; + } +} void CCodeWindow::OnCodeStep(wxCommandEvent& event) { @@ -537,6 +562,7 @@ void CCodeWindow::OnToggleMemoryWindow(wxCommandEvent& event) } } } + void CCodeWindow::OnHostMessage(wxCommandEvent& event) { switch (event.GetId()) @@ -582,13 +608,13 @@ void CCodeWindow::PopulateToolbar(wxToolBar* toolBar) h = m_Bitmaps[Toolbar_DebugGo].GetHeight(); toolBar->SetToolBitmapSize(wxSize(w, h)); - toolBar->AddTool(IDM_DEBUG_GO, _T("Play"), m_Bitmaps[Toolbar_DebugGo], _T("Delete the selected BreakPoint or MemoryCheck")); - toolBar->AddTool(IDM_STEP, _T("Step"), m_Bitmaps[Toolbar_Step], _T("Add BreakPoint...")); - toolBar->AddTool(IDM_STEPOVER, _T("Step Over"), m_Bitmaps[Toolbar_StepOver], _T("Add BreakPoint...")); - toolBar->AddTool(IDM_SKIP, _T("Skip"), m_Bitmaps[Toolbar_Skip], _T("Add BreakPoint...")); + toolBar->AddTool(IDM_DEBUG_GO, _T("Play"), m_Bitmaps[Toolbar_DebugGo]); + toolBar->AddTool(IDM_STEP, _T("Step"), m_Bitmaps[Toolbar_Step]); + toolBar->AddTool(IDM_STEPOVER, _T("Step Over"), m_Bitmaps[Toolbar_StepOver]); + toolBar->AddTool(IDM_SKIP, _T("Skip"), m_Bitmaps[Toolbar_Skip]); toolBar->AddSeparator(); - toolBar->AddTool(IDM_GOTOPC, _T("Goto PC"), m_Bitmaps[Toolbar_GotoPC], _T("Add BreakPoint...")); - toolBar->AddTool(IDM_SETPC, _T("Set PC"), m_Bitmaps[Toolbar_SetPC], _T("Add BreakPoint...")); + toolBar->AddTool(IDM_GOTOPC, _T("Goto PC"), m_Bitmaps[Toolbar_GotoPC]); + toolBar->AddTool(IDM_SETPC, _T("Set PC"), m_Bitmaps[Toolbar_SetPC]); toolBar->AddSeparator(); toolBar->AddControl(new wxTextCtrl(toolBar, IDM_ADDRBOX, _T(""))); diff --git a/Source/Core/DebuggerWX/src/CodeWindow.h b/Source/Core/DebuggerWX/src/CodeWindow.h index e068769ed5..e63fb2eff9 100644 --- a/Source/Core/DebuggerWX/src/CodeWindow.h +++ b/Source/Core/DebuggerWX/src/CodeWindow.h @@ -78,6 +78,7 @@ class CCodeWindow IDM_REGISTERWINDOW, IDM_BREAKPOINTWINDOW, IDM_MEMORYWINDOW, + IDM_SCANFUNCTIONS, }; enum @@ -120,6 +121,7 @@ class CCodeWindow void OnToggleLogWindow(wxCommandEvent& event); void OnToggleMemoryWindow(wxCommandEvent& event); void OnHostMessage(wxCommandEvent& event); + void OnSymbolsMenu(wxCommandEvent& event); void CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParameter); diff --git a/Source/Core/DolphinWX/src/BootManager.cpp b/Source/Core/DolphinWX/src/BootManager.cpp index 727f053321..b9e07edf24 100644 --- a/Source/Core/DolphinWX/src/BootManager.cpp +++ b/Source/Core/DolphinWX/src/BootManager.cpp @@ -63,9 +63,12 @@ bool BootCore(const std::string& _rFilename) StartUp.bRunCompareServer = false; StartUp.bEnableDebugging = g_pCodeWindow ? true : false; // RUNNING_DEBUG std::string BaseDataPath; - #ifdef _WIN32 +#ifdef _WIN32 StartUp.hInstance = wxGetInstance(); - #endif +#ifdef _M_X64 + StartUp.bUseFastMem = true; +#endif +#endif StartUp.AutoSetup(SCoreStartupParameter::BOOT_DEFAULT); diff --git a/Source/Core/DolphinWX/src/Frame.cpp b/Source/Core/DolphinWX/src/Frame.cpp index b932c8e594..d620ba47f6 100644 --- a/Source/Core/DolphinWX/src/Frame.cpp +++ b/Source/Core/DolphinWX/src/Frame.cpp @@ -90,6 +90,7 @@ EVT_MENU(IDM_CONFIG_PAD_PLUGIN, CFrame::OnPluginPAD) EVT_MENU(IDM_BROWSE, CFrame::OnBrowse) EVT_MENU(IDM_TOGGLE_FULLSCREEN, CFrame::OnToggleFullscreen) EVT_MENU(IDM_TOGGLE_DUALCORE, CFrame::OnToggleDualCore) +EVT_MENU(IDM_TOGGLE_THROTTLE, CFrame::OnToggleThrottle) EVT_HOST_COMMAND(wxID_ANY, CFrame::OnHostMessage) END_EVENT_TABLE() @@ -230,6 +231,12 @@ CFrame::CreateMenu() pEmulationMenu->Append(pItem); pItem->Check(SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore); } + { + // throttling + wxMenuItem* pItem = new wxMenuItem(pEmulationMenu, IDM_TOGGLE_THROTTLE, _T("&Speed throttle"), wxEmptyString, wxITEM_CHECK); + pEmulationMenu->Append(pItem); + pItem->Check(SConfig::GetInstance().m_LocalCoreStartupParameter.bThrottle); + } m_pMenuBar->Append(pEmulationMenu, _T("&Emulation")); } @@ -518,7 +525,6 @@ CFrame::OnHostMessage(wxCommandEvent& event) break; case IDM_BOOTING_STARTED: - if (m_pBootProcessDialog == NULL) { /* m_pBootProcessDialog = new wxProgressDialog @@ -537,7 +543,6 @@ CFrame::OnHostMessage(wxCommandEvent& event) break; case IDM_BOOTING_ENDED: - if (m_pBootProcessDialog != NULL) { // m_pBootProcessDialog->Destroy(); @@ -547,7 +552,6 @@ CFrame::OnHostMessage(wxCommandEvent& event) break; case IDM_UPDATESTATUSBAR: - if (m_pStatusBar != NULL) { m_pStatusBar->SetStatusText(event.GetString()); @@ -557,24 +561,26 @@ CFrame::OnHostMessage(wxCommandEvent& event) } -void -CFrame::OnToggleFullscreen(wxCommandEvent& WXUNUSED (event)) +void CFrame::OnToggleFullscreen(wxCommandEvent& WXUNUSED (event)) { ShowFullScreen(true); UpdateGUI(); } -void -CFrame::OnToggleDualCore(wxCommandEvent& WXUNUSED (event)) +void CFrame::OnToggleDualCore(wxCommandEvent& WXUNUSED (event)) { SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore = !SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore; SConfig::GetInstance().SaveSettings(); } +void CFrame::OnToggleThrottle(wxCommandEvent& WXUNUSED (event)) +{ + SConfig::GetInstance().m_LocalCoreStartupParameter.bThrottle = !SConfig::GetInstance().m_LocalCoreStartupParameter.bThrottle; + SConfig::GetInstance().SaveSettings(); +} -void -CFrame::OnKeyDown(wxKeyEvent& event) +void CFrame::OnKeyDown(wxKeyEvent& event) { if (((event.GetKeyCode() == WXK_RETURN) && (event.GetModifiers() == wxMOD_ALT)) || (event.GetKeyCode() == WXK_ESCAPE)) @@ -589,8 +595,7 @@ CFrame::OnKeyDown(wxKeyEvent& event) } -void -CFrame::UpdateGUI() +void CFrame::UpdateGUI() { // buttons { diff --git a/Source/Core/DolphinWX/src/Frame.h b/Source/Core/DolphinWX/src/Frame.h index 1f25f9daf1..e8b9447175 100644 --- a/Source/Core/DolphinWX/src/Frame.h +++ b/Source/Core/DolphinWX/src/Frame.h @@ -66,6 +66,7 @@ class CFrame void OnBrowse(wxCommandEvent& event); void OnToggleFullscreen(wxCommandEvent& event); void OnToggleDualCore(wxCommandEvent& event); + void OnToggleThrottle(wxCommandEvent& event); void OnKeyDown(wxKeyEvent& event); void OnHostMessage(wxCommandEvent& event); diff --git a/Source/Core/DolphinWX/src/Globals.h b/Source/Core/DolphinWX/src/Globals.h index 2f1209117e..86506ddcba 100644 --- a/Source/Core/DolphinWX/src/Globals.h +++ b/Source/Core/DolphinWX/src/Globals.h @@ -33,6 +33,7 @@ enum IDM_CONFIG_PAD_PLUGIN, IDM_TOGGLE_FULLSCREEN, IDM_TOGGLE_DUALCORE, + IDM_TOGGLE_THROTTLE, IDM_NOTIFYMAPLOADED, IDM_UPDATELOGDISPLAY, IDM_UPDATEDISASMDIALOG,