From ccb96be9b3735e30934128190ad71c071ba3e9e4 Mon Sep 17 00:00:00 2001 From: nodchip Date: Thu, 9 Sep 2010 02:14:03 +0000 Subject: [PATCH] Jit64/JitIL: Enabled block merging to improved the performance. This improves the fps 4-5 faster in some games. However it decreases the fps 10 slower in other games, MP2 and etc. In this commit, the actual block merging is disabled. If you want to try block merging, please set FUNCTION_FOLLOWING_THRESHOLD to a positive integer. Increased the size of code buffer to prevent cache clearing with block merging. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6193 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 2 +- .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp | 43 ++++++++++--------- .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp | 2 +- .../Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp | 9 ++++ Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp | 21 ++++++--- 5 files changed, 48 insertions(+), 29 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index f402f3542d..8db6e94bbf 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -158,7 +158,7 @@ ps_adds1 */ -static int CODE_SIZE = 1024*1024*16; +static int CODE_SIZE = 1024*1024*32; namespace CPUCompare { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 949fca58c8..aef25e9779 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -81,36 +81,39 @@ void Jit64::bx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(Branch) + // We must always process the following sentence + // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + + // If this is not the last instruction of a block, + // we will skip the rest process. + // Because PPCAnalyst::Flatten() merged the blocks. + if (!js.isLastInstruction) { + return; + } + gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL); - if (js.isLastInstruction) - { - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); + u32 destination; + if (inst.AA) + destination = SignExt26(inst.LI << 2); + else + destination = js.compilerPC + SignExt26(inst.LI << 2); #ifdef ACID_TEST - if (inst.LK) - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + if (inst.LK) + AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); #endif - if (destination == js.compilerPC) - { - //PanicAlert("Idle loop detected at %08x", destination); + if (destination == js.compilerPC) + { + //PanicAlert("Idle loop detected at %08x", destination); // CALL(ProtectFunction(&CoreTiming::Idle, 0)); // JMP(Asm::testExceptions, true); - // make idle loops go faster - js.downcountAmount += 8; - } - WriteExit(destination, 0); - } - else { - // TODO: investigate the good old method of merging blocks here. - PanicAlert("bx not last instruction of block"); // this should not happen + // make idle loops go faster + js.downcountAmount += 8; } + WriteExit(destination, 0); } // TODO - optimize to hell and beyond diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 71c96c3a6a..42b82a532e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -152,7 +152,7 @@ ps_adds1 */ -static int CODE_SIZE = 1024*1024*16; +static int CODE_SIZE = 1024*1024*32; namespace CPUCompare { diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp index e022489f9c..608d362032 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp @@ -59,9 +59,18 @@ void JitIL::bx(UGeckoInstruction inst) NORMALBRANCH_START INSTRUCTION_START; + // We must always process the following sentence + // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); + // If this is not the last instruction of a block, + // we will skip the rest process. + // Because PPCAnalyst::Flatten() merged the blocks. + if (!js.isLastInstruction) { + return; + } + u32 destination; if (inst.AA) destination = SignExt26(inst.LI << 2); diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp index 0f5744b516..55a322a5e0 100644 --- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp @@ -40,10 +40,9 @@ namespace PPCAnalyst { using namespace std; -enum -{ - CODEBUFFER_SIZE = 32000, -}; +static const int CODEBUFFER_SIZE = 32000; +// 0 does not perform block merging +static const int FUNCTION_FOLLOWING_THRESHOLD = 0; CodeBuffer::CodeBuffer(int size) { @@ -446,9 +445,15 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc } if (follow) numFollows++; - if (numFollows > 1) + // TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD. + // If it is small, the performance will be down. + // If it is big, the size of generated code will be big and + // cache clearning will happen many times. + // TODO: Investivate the reason why + // "0" is fastest in some games, MP2 for example. + if (numFollows > FUNCTION_FOLLOWING_THRESHOLD) follow = false; - follow = false; + if (!follow) { if (opinfo->flags & FL_ENDBLOCK) //right now we stop early @@ -460,7 +465,9 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc } else { - code[i].skip = true; + // We don't "code[i].skip = true" here + // because bx may store a certain value to the link register. + // Instead, we skip a part of bx in Jit**::bx(). address = destination; } }