Jit64/JitIL: Enabled block merging to improved the performance. This improves the fps 4-5 faster in some games. However it decreases the fps 10 slower in other games, MP2 and etc. In this commit, the actual block merging is disabled. If you want to try block merging, please set FUNCTION_FOLLOWING_THRESHOLD to a positive integer.

Increased the size of code buffer to prevent cache clearing with block merging.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6193 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nodchip 2010-09-09 02:14:03 +00:00
parent bf4a18e08c
commit ccb96be9b3
5 changed files with 48 additions and 29 deletions

View File

@ -158,7 +158,7 @@ ps_adds1
*/ */
static int CODE_SIZE = 1024*1024*16; static int CODE_SIZE = 1024*1024*32;
namespace CPUCompare namespace CPUCompare
{ {

View File

@ -81,13 +81,21 @@ void Jit64::bx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(Branch) JITDISABLE(Branch)
// We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK) if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, M(&LR), Imm32(js.compilerPC + 4));
// If this is not the last instruction of a block,
// we will skip the rest process.
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction) {
return;
}
gpr.Flush(FLUSH_ALL); gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL);
if (js.isLastInstruction)
{
u32 destination; u32 destination;
if (inst.AA) if (inst.AA)
destination = SignExt26(inst.LI << 2); destination = SignExt26(inst.LI << 2);
@ -106,11 +114,6 @@ void Jit64::bx(UGeckoInstruction inst)
js.downcountAmount += 8; js.downcountAmount += 8;
} }
WriteExit(destination, 0); WriteExit(destination, 0);
}
else {
// TODO: investigate the good old method of merging blocks here.
PanicAlert("bx not last instruction of block"); // this should not happen
}
} }
// TODO - optimize to hell and beyond // TODO - optimize to hell and beyond

View File

@ -152,7 +152,7 @@ ps_adds1
*/ */
static int CODE_SIZE = 1024*1024*16; static int CODE_SIZE = 1024*1024*32;
namespace CPUCompare namespace CPUCompare
{ {

View File

@ -59,9 +59,18 @@ void JitIL::bx(UGeckoInstruction inst)
NORMALBRANCH_START NORMALBRANCH_START
INSTRUCTION_START; INSTRUCTION_START;
// We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK) if (inst.LK)
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
// If this is not the last instruction of a block,
// we will skip the rest process.
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction) {
return;
}
u32 destination; u32 destination;
if (inst.AA) if (inst.AA)
destination = SignExt26(inst.LI << 2); destination = SignExt26(inst.LI << 2);

View File

@ -40,10 +40,9 @@ namespace PPCAnalyst {
using namespace std; using namespace std;
enum static const int CODEBUFFER_SIZE = 32000;
{ // 0 does not perform block merging
CODEBUFFER_SIZE = 32000, static const int FUNCTION_FOLLOWING_THRESHOLD = 0;
};
CodeBuffer::CodeBuffer(int size) CodeBuffer::CodeBuffer(int size)
{ {
@ -446,9 +445,15 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
} }
if (follow) if (follow)
numFollows++; numFollows++;
if (numFollows > 1) // TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD.
follow = false; // If it is small, the performance will be down.
// If it is big, the size of generated code will be big and
// cache clearning will happen many times.
// TODO: Investivate the reason why
// "0" is fastest in some games, MP2 for example.
if (numFollows > FUNCTION_FOLLOWING_THRESHOLD)
follow = false; follow = false;
if (!follow) if (!follow)
{ {
if (opinfo->flags & FL_ENDBLOCK) //right now we stop early if (opinfo->flags & FL_ENDBLOCK) //right now we stop early
@ -460,7 +465,9 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
} }
else else
{ {
code[i].skip = true; // We don't "code[i].skip = true" here
// because bx may store a certain value to the link register.
// Instead, we skip a part of bx in Jit**::bx().
address = destination; address = destination;
} }
} }