From ccb96be9b3735e30934128190ad71c071ba3e9e4 Mon Sep 17 00:00:00 2001
From: nodchip <nodchip@gmail.com>
Date: Thu, 9 Sep 2010 02:14:03 +0000
Subject: [PATCH] Jit64/JitIL: Enabled block merging to improved the
 performance. This improves the fps 4-5 faster in some games. However it
 decreases the fps 10 slower in other games, MP2 and etc. In this commit, the
 actual block merging is disabled. If you want to try block merging, please
 set FUNCTION_FOLLOWING_THRESHOLD to a positive integer.             
 Increased the size of code buffer to prevent cache clearing with block
 merging.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6193 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp    |  2 +-
 .../Core/Src/PowerPC/Jit64/Jit_Branch.cpp     | 43 ++++++++++---------
 .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp   |  2 +-
 .../Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp |  9 ++++
 Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp   | 21 ++++++---
 5 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
index f402f3542d..8db6e94bbf 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
@@ -158,7 +158,7 @@ ps_adds1
 
 */
 
-static int CODE_SIZE = 1024*1024*16;
+static int CODE_SIZE = 1024*1024*32;
 
 namespace CPUCompare
 {
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
index 949fca58c8..aef25e9779 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
@@ -81,36 +81,39 @@ void Jit64::bx(UGeckoInstruction inst)
 	INSTRUCTION_START
 	JITDISABLE(Branch)
 
+	// We must always process the following sentence
+	// even if the blocks are merged by PPCAnalyst::Flatten().
 	if (inst.LK)
 		MOV(32, M(&LR), Imm32(js.compilerPC + 4));
+
+	// If this is not the last instruction of a block,
+	// we will skip the rest process.
+	// Because PPCAnalyst::Flatten() merged the blocks.
+	if (!js.isLastInstruction) {
+		return;
+	}
+
 	gpr.Flush(FLUSH_ALL);
 	fpr.Flush(FLUSH_ALL);
 
-	if (js.isLastInstruction)
-	{
-		u32 destination;
-		if (inst.AA)
-			destination = SignExt26(inst.LI << 2);
-		else
-			destination = js.compilerPC + SignExt26(inst.LI << 2);
+	u32 destination;
+	if (inst.AA)
+		destination = SignExt26(inst.LI << 2);
+	else
+		destination = js.compilerPC + SignExt26(inst.LI << 2);
 #ifdef ACID_TEST
-		if (inst.LK)
-			AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000)));
+	if (inst.LK)
+		AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000)));
 #endif
-		if (destination == js.compilerPC)
-		{
-			//PanicAlert("Idle loop detected at %08x", destination);
+	if (destination == js.compilerPC)
+	{
+		//PanicAlert("Idle loop detected at %08x", destination);
 		//	CALL(ProtectFunction(&CoreTiming::Idle, 0));
 		//	JMP(Asm::testExceptions, true);
-			// make idle loops go faster
-			js.downcountAmount += 8;
-		}
-		WriteExit(destination, 0);
-	}
-	else {
-		// TODO: investigate the good old method of merging blocks here.
-		PanicAlert("bx not last instruction of block"); // this should not happen
+		// make idle loops go faster
+		js.downcountAmount += 8;
 	}
+	WriteExit(destination, 0);
 }
 
 // TODO - optimize to hell and beyond
diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp
index 71c96c3a6a..42b82a532e 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp
@@ -152,7 +152,7 @@ ps_adds1
 
 */
 
-static int CODE_SIZE = 1024*1024*16;
+static int CODE_SIZE = 1024*1024*32;
 
 namespace CPUCompare
 {
diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp
index e022489f9c..608d362032 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp
@@ -59,9 +59,18 @@ void JitIL::bx(UGeckoInstruction inst)
 	NORMALBRANCH_START
 	INSTRUCTION_START;
 
+	// We must always process the following sentence
+	// even if the blocks are merged by PPCAnalyst::Flatten().
 	if (inst.LK)
 		ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
 
+	// If this is not the last instruction of a block,
+	// we will skip the rest process.
+	// Because PPCAnalyst::Flatten() merged the blocks.
+	if (!js.isLastInstruction) {
+		return;
+	}
+
 	u32 destination;
 	if (inst.AA)
 		destination = SignExt26(inst.LI << 2);
diff --git a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp
index 0f5744b516..55a322a5e0 100644
--- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp
+++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp
@@ -40,10 +40,9 @@ namespace PPCAnalyst {
 
 using namespace std;
 
-enum
-{
-	CODEBUFFER_SIZE = 32000,
-};
+static const int CODEBUFFER_SIZE = 32000;
+// 0 does not perform block merging
+static const int FUNCTION_FOLLOWING_THRESHOLD = 0;
 
 CodeBuffer::CodeBuffer(int size)
 {
@@ -446,9 +445,15 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
 			}
 			if (follow)
 				numFollows++;
-			if (numFollows > 1)
+			// TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD.
+			//       If it is small, the performance will be down.
+			//       If it is big, the size of generated code will be big and
+			//       cache clearning will happen many times.
+			// TODO: Investivate the reason why
+			//       "0" is fastest in some games, MP2 for example.
+			if (numFollows > FUNCTION_FOLLOWING_THRESHOLD)
 				follow = false;
-			follow = false;
+
 			if (!follow)
 			{
 				if (opinfo->flags & FL_ENDBLOCK) //right now we stop early
@@ -460,7 +465,9 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
 			}
 			else
 			{
-				code[i].skip = true;
+				// We don't "code[i].skip = true" here
+				// because bx may store a certain value to the link register.
+				// Instead, we skip a part of bx in Jit**::bx().
 				address = destination;
 			}
 		}