mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-02-22 04:37:15 +01:00
PPCRec: Use vector for segment list + deduplicate RA file
This commit is contained in:
parent
4c16397cf4
commit
f523b2152d
@ -74,7 +74,7 @@ add_library(CemuCafe
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRanges.h
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp
|
||||
|
||||
HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp
|
||||
HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp
|
||||
|
@ -173,9 +173,8 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP
|
||||
|
||||
// collect list of PPC-->x64 entry points
|
||||
entryPointsOut.clear();
|
||||
for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
|
||||
for(PPCRecImlSegment_t* imlSegment : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if (imlSegment->isEnterable == false)
|
||||
continue;
|
||||
|
||||
|
@ -321,9 +321,10 @@ struct ppcImlGenContext_t
|
||||
sint32 imlListSize;
|
||||
sint32 imlListCount;
|
||||
// list of segments
|
||||
PPCRecImlSegment_t** segmentList;
|
||||
sint32 segmentListSize;
|
||||
sint32 segmentListCount;
|
||||
//PPCRecImlSegment_t** segmentList;
|
||||
//sint32 segmentListSize;
|
||||
//sint32 segmentListCount;
|
||||
std::vector<PPCRecImlSegment_t*> segmentList2;
|
||||
// code generation control
|
||||
bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode
|
||||
// register allocator info
|
||||
|
@ -3450,10 +3450,9 @@ void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segment
|
||||
|
||||
void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(sint32 f=0; f<ppcImlGenContext->segmentListCount; f++)
|
||||
for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[f];
|
||||
PPCRecompiler_dumpIMLSegment(imlSegment, f);
|
||||
PPCRecompiler_dumpIMLSegment(ppcImlGenContext->segmentList2[i], i);
|
||||
debug_printf("\n");
|
||||
}
|
||||
}
|
||||
@ -3548,43 +3547,18 @@ PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlS
|
||||
return imlSegment->imlList + index;
|
||||
}
|
||||
|
||||
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count)
|
||||
PPCRecImlSegment_t* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
if( (ppcImlGenContext->segmentListCount+count) > ppcImlGenContext->segmentListSize )
|
||||
{
|
||||
// allocate space for more segments
|
||||
ppcImlGenContext->segmentListSize += count;
|
||||
ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*));
|
||||
}
|
||||
for(sint32 i=(sint32)ppcImlGenContext->segmentListCount-1; i>=index; i--)
|
||||
{
|
||||
memcpy(ppcImlGenContext->segmentList+(i+count), ppcImlGenContext->segmentList+i, sizeof(PPCRecImlSegment_t*));
|
||||
}
|
||||
ppcImlGenContext->segmentListCount += count;
|
||||
for(sint32 i=0; i<count; i++)
|
||||
{
|
||||
//memset(ppcImlGenContext->segmentList+index+i, 0x00, sizeof(PPCRecImlSegment_t*));
|
||||
ppcImlGenContext->segmentList[index+i] = (PPCRecImlSegment_t*)malloc(sizeof(PPCRecImlSegment_t));
|
||||
memset(ppcImlGenContext->segmentList[index+i], 0x00, sizeof(PPCRecImlSegment_t));
|
||||
ppcImlGenContext->segmentList[index + i]->list_prevSegments = std::vector<PPCRecImlSegment_t*>();
|
||||
}
|
||||
PPCRecImlSegment_t* segment = new PPCRecImlSegment_t();
|
||||
ppcImlGenContext->segmentList2.emplace_back(segment);
|
||||
return segment;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and init a new iml instruction segment
|
||||
*/
|
||||
PPCRecImlSegment_t* PPCRecompiler_generateImlSegment(ppcImlGenContext_t* ppcImlGenContext)
|
||||
void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count)
|
||||
{
|
||||
if( ppcImlGenContext->segmentListCount >= ppcImlGenContext->segmentListSize )
|
||||
{
|
||||
// allocate space for more segments
|
||||
ppcImlGenContext->segmentListSize *= 2;
|
||||
ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*));
|
||||
}
|
||||
PPCRecImlSegment_t* ppcRecSegment = new PPCRecImlSegment_t();
|
||||
ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount] = ppcRecSegment;
|
||||
ppcImlGenContext->segmentListCount++;
|
||||
return ppcRecSegment;
|
||||
ppcImlGenContext->segmentList2.insert(ppcImlGenContext->segmentList2.begin() + index, count, nullptr);
|
||||
for (sint32 i = 0; i < count; i++)
|
||||
ppcImlGenContext->segmentList2[index + i] = new PPCRecImlSegment_t();
|
||||
}
|
||||
|
||||
void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext)
|
||||
@ -3594,17 +3568,25 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext)
|
||||
free(ppcImlGenContext->imlList);
|
||||
ppcImlGenContext->imlList = nullptr;
|
||||
}
|
||||
for(sint32 i=0; i<ppcImlGenContext->segmentListCount; i++)
|
||||
|
||||
for (PPCRecImlSegment_t* imlSegment : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
free(ppcImlGenContext->segmentList[i]->imlList);
|
||||
delete ppcImlGenContext->segmentList[i];
|
||||
}
|
||||
ppcImlGenContext->segmentListCount = 0;
|
||||
if (ppcImlGenContext->segmentList)
|
||||
{
|
||||
free(ppcImlGenContext->segmentList);
|
||||
ppcImlGenContext->segmentList = nullptr;
|
||||
free(imlSegment->imlList);
|
||||
delete imlSegment;
|
||||
}
|
||||
ppcImlGenContext->segmentList2.clear();
|
||||
|
||||
//for(sint32 i=0; i<ppcImlGenContext->segmentListCount; i++)
|
||||
//{
|
||||
// free(ppcImlGenContext->segmentList[i]->imlList);
|
||||
// delete ppcImlGenContext->segmentList[i];
|
||||
//}
|
||||
//ppcImlGenContext->segmentListCount = 0;
|
||||
//if (ppcImlGenContext->segmentList)
|
||||
//{
|
||||
// free(ppcImlGenContext->segmentList);
|
||||
// ppcImlGenContext->segmentList = nullptr;
|
||||
//}
|
||||
}
|
||||
|
||||
bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml)
|
||||
@ -4598,9 +4580,8 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
// divide iml instructions into segments
|
||||
// each segment is defined by one or more instructions with no branches or jump destinations in between
|
||||
// a branch instruction may only be the very last instruction of a segment
|
||||
ppcImlGenContext.segmentListCount = 0;
|
||||
ppcImlGenContext.segmentListSize = 2;
|
||||
ppcImlGenContext.segmentList = (PPCRecImlSegment_t**)malloc(ppcImlGenContext.segmentListSize*sizeof(PPCRecImlSegment_t*));
|
||||
cemu_assert_debug(ppcImlGenContext.segmentList2.empty());
|
||||
|
||||
sint32 segmentStart = 0;
|
||||
sint32 segmentImlIndex = 0;
|
||||
while( segmentImlIndex < ppcImlGenContext.imlListCount )
|
||||
@ -4619,7 +4600,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
(ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) )
|
||||
{
|
||||
// segment ends after current instruction
|
||||
PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext);
|
||||
PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext);
|
||||
ppcRecSegment->startOffset = segmentStart;
|
||||
ppcRecSegment->count = segmentImlIndex-segmentStart+1;
|
||||
ppcRecSegment->ppcAddress = 0xFFFFFFFF;
|
||||
@ -4631,7 +4612,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
// segment ends before current instruction
|
||||
if( segmentImlIndex > segmentStart )
|
||||
{
|
||||
PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext);
|
||||
PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext);
|
||||
ppcRecSegment->startOffset = segmentStart;
|
||||
ppcRecSegment->count = segmentImlIndex-segmentStart;
|
||||
ppcRecSegment->ppcAddress = 0xFFFFFFFF;
|
||||
@ -4643,123 +4624,122 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
if( segmentImlIndex != segmentStart )
|
||||
{
|
||||
// final segment
|
||||
PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext);
|
||||
PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext);
|
||||
ppcRecSegment->startOffset = segmentStart;
|
||||
ppcRecSegment->count = segmentImlIndex-segmentStart;
|
||||
ppcRecSegment->ppcAddress = 0xFFFFFFFF;
|
||||
segmentStart = segmentImlIndex;
|
||||
}
|
||||
// move iml instructions into the segments
|
||||
for(sint32 s=0; s<ppcImlGenContext.segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
uint32 imlStartIndex = ppcImlGenContext.segmentList[s]->startOffset;
|
||||
uint32 imlCount = ppcImlGenContext.segmentList[s]->count;
|
||||
uint32 imlStartIndex = segIt->startOffset;
|
||||
uint32 imlCount = segIt->count;
|
||||
if( imlCount > 0 )
|
||||
{
|
||||
ppcImlGenContext.segmentList[s]->imlListSize = imlCount + 4;
|
||||
ppcImlGenContext.segmentList[s]->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)*ppcImlGenContext.segmentList[s]->imlListSize);
|
||||
ppcImlGenContext.segmentList[s]->imlListCount = imlCount;
|
||||
memcpy(ppcImlGenContext.segmentList[s]->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount);
|
||||
segIt->imlListSize = imlCount + 4;
|
||||
segIt->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)* segIt->imlListSize);
|
||||
segIt->imlListCount = imlCount;
|
||||
memcpy(segIt->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
// empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code
|
||||
ppcImlGenContext.segmentList[s]->imlList = NULL;
|
||||
ppcImlGenContext.segmentList[s]->imlListSize = 0;
|
||||
ppcImlGenContext.segmentList[s]->imlListCount = 0;
|
||||
segIt->imlList = nullptr;
|
||||
segIt->imlListSize = 0;
|
||||
segIt->imlListCount = 0;
|
||||
}
|
||||
ppcImlGenContext.segmentList[s]->startOffset = 9999999;
|
||||
ppcImlGenContext.segmentList[s]->count = 9999999;
|
||||
segIt->startOffset = 9999999;
|
||||
segIt->count = 9999999;
|
||||
}
|
||||
// clear segment-independent iml list
|
||||
free(ppcImlGenContext.imlList);
|
||||
ppcImlGenContext.imlList = NULL;
|
||||
ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList
|
||||
// calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes)
|
||||
for(sint32 s=0; s<ppcImlGenContext.segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
uint32 segmentPPCAddrMin = 0xFFFFFFFF;
|
||||
uint32 segmentPPCAddrMax = 0x00000000;
|
||||
for(sint32 i=0; i<ppcImlGenContext.segmentList[s]->imlListCount; i++)
|
||||
for(sint32 i=0; i< segIt->imlListCount; i++)
|
||||
{
|
||||
if( ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress == 0 )
|
||||
if(segIt->imlList[i].associatedPPCAddress == 0 )
|
||||
continue;
|
||||
//if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP )
|
||||
// continue; // jumpmarks and no-op instructions must not affect segment ppc address range
|
||||
segmentPPCAddrMin = std::min(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMin);
|
||||
segmentPPCAddrMax = std::max(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMax);
|
||||
segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin);
|
||||
segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax);
|
||||
}
|
||||
if( segmentPPCAddrMin != 0xFFFFFFFF )
|
||||
{
|
||||
ppcImlGenContext.segmentList[s]->ppcAddrMin = segmentPPCAddrMin;
|
||||
ppcImlGenContext.segmentList[s]->ppcAddrMax = segmentPPCAddrMax;
|
||||
segIt->ppcAddrMin = segmentPPCAddrMin;
|
||||
segIt->ppcAddrMax = segmentPPCAddrMax;
|
||||
}
|
||||
else
|
||||
{
|
||||
ppcImlGenContext.segmentList[s]->ppcAddrMin = 0;
|
||||
ppcImlGenContext.segmentList[s]->ppcAddrMax = 0;
|
||||
segIt->ppcAddrMin = 0;
|
||||
segIt->ppcAddrMax = 0;
|
||||
}
|
||||
}
|
||||
// certain instructions can change the segment state
|
||||
// ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside)
|
||||
// jumpmarks mark the segment as a jump destination (within the same function)
|
||||
for(sint32 s=0; s<ppcImlGenContext.segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
while( ppcImlGenContext.segmentList[s]->imlListCount > 0 )
|
||||
while (segIt->imlListCount > 0)
|
||||
{
|
||||
if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER )
|
||||
if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER)
|
||||
{
|
||||
// mark segment as enterable
|
||||
if( ppcImlGenContext.segmentList[s]->isEnterable )
|
||||
if (segIt->isEnterable)
|
||||
assert_dbg(); // should not happen?
|
||||
ppcImlGenContext.segmentList[s]->isEnterable = true;
|
||||
ppcImlGenContext.segmentList[s]->enterPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_ppcEnter.ppcAddress;
|
||||
segIt->isEnterable = true;
|
||||
segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress;
|
||||
// remove ppc_enter instruction
|
||||
ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP;
|
||||
ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
|
||||
ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0;
|
||||
segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP;
|
||||
segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
|
||||
segIt->imlList[0].associatedPPCAddress = 0;
|
||||
}
|
||||
else if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK )
|
||||
else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK )
|
||||
{
|
||||
// mark segment as jump destination
|
||||
if( ppcImlGenContext.segmentList[s]->isJumpDestination )
|
||||
if(segIt->isJumpDestination )
|
||||
assert_dbg(); // should not happen?
|
||||
ppcImlGenContext.segmentList[s]->isJumpDestination = true;
|
||||
ppcImlGenContext.segmentList[s]->jumpDestinationPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_jumpmark.address;
|
||||
segIt->isJumpDestination = true;
|
||||
segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address;
|
||||
// remove jumpmark instruction
|
||||
ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP;
|
||||
ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
|
||||
ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0;
|
||||
segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP;
|
||||
segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
|
||||
segIt->imlList[0].associatedPPCAddress = 0;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
// the first segment is always enterable as the recompiled functions entrypoint
|
||||
ppcImlGenContext.segmentList[0]->isEnterable = true;
|
||||
ppcImlGenContext.segmentList[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress;
|
||||
ppcImlGenContext.segmentList2[0]->isEnterable = true;
|
||||
ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress;
|
||||
|
||||
// link segments for further inter-segment optimization
|
||||
PPCRecompilerIML_linkSegments(&ppcImlGenContext);
|
||||
|
||||
// optimization pass - replace segments with conditional MOVs if possible
|
||||
for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if (imlSegment->nextSegmentBranchNotTaken == NULL || imlSegment->nextSegmentBranchTaken == NULL)
|
||||
if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr)
|
||||
continue; // not a branching segment
|
||||
PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment);
|
||||
PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(segIt);
|
||||
if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0)
|
||||
continue;
|
||||
PPCRecImlSegment_t* conditionalSegment = imlSegment->nextSegmentBranchNotTaken;
|
||||
PPCRecImlSegment_t* finalSegment = imlSegment->nextSegmentBranchTaken;
|
||||
if(imlSegment->nextSegmentBranchTaken != imlSegment->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken)
|
||||
PPCRecImlSegment_t* conditionalSegment = segIt->nextSegmentBranchNotTaken;
|
||||
PPCRecImlSegment_t* finalSegment = segIt->nextSegmentBranchTaken;
|
||||
if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken)
|
||||
continue;
|
||||
if (imlSegment->nextSegmentBranchNotTaken->imlListCount > 4)
|
||||
if (segIt->nextSegmentBranchNotTaken->imlListCount > 4)
|
||||
continue;
|
||||
if(conditionalSegment->list_prevSegments.size() != 1)
|
||||
if (conditionalSegment->list_prevSegments.size() != 1)
|
||||
continue; // the reduced segment must not be the target of any other branch
|
||||
if(conditionalSegment->isEnterable)
|
||||
if (conditionalSegment->isEnterable)
|
||||
continue;
|
||||
// check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment)
|
||||
bool canReduceSegment = true;
|
||||
@ -4788,16 +4768,16 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList + f;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
|
||||
PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(imlSegment), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet);
|
||||
PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet);
|
||||
else
|
||||
assert_dbg();
|
||||
}
|
||||
// update segment links
|
||||
// source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment
|
||||
PPCRecompilerIML_removeLink(imlSegment, conditionalSegment);
|
||||
PPCRecompilerIML_removeLink(imlSegment, finalSegment);
|
||||
PPCRecompilerIML_removeLink(segIt, conditionalSegment);
|
||||
PPCRecompilerIML_removeLink(segIt, finalSegment);
|
||||
PPCRecompilerIML_removeLink(conditionalSegment, finalSegment);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, finalSegment);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(segIt, finalSegment);
|
||||
// remove all instructions from conditional segment
|
||||
conditionalSegment->imlListCount = 0;
|
||||
|
||||
@ -4805,23 +4785,23 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1)
|
||||
{
|
||||
// todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments()
|
||||
PPCRecompilerIML_removeLink(imlSegment, finalSegment);
|
||||
PPCRecompilerIML_removeLink(segIt, finalSegment);
|
||||
if (finalSegment->nextSegmentBranchNotTaken)
|
||||
{
|
||||
PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchNotTaken;
|
||||
PPCRecompilerIML_removeLink(finalSegment, tempSegment);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, tempSegment);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(segIt, tempSegment);
|
||||
}
|
||||
if (finalSegment->nextSegmentBranchTaken)
|
||||
{
|
||||
PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchTaken;
|
||||
PPCRecompilerIML_removeLink(finalSegment, tempSegment);
|
||||
PPCRecompilerIml_setLinkBranchTaken(imlSegment, tempSegment);
|
||||
PPCRecompilerIml_setLinkBranchTaken(segIt, tempSegment);
|
||||
}
|
||||
// copy IML instructions
|
||||
for (sint32 f = 0; f < finalSegment->imlListCount; f++)
|
||||
{
|
||||
memcpy(PPCRecompiler_appendInstruction(imlSegment), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t));
|
||||
memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t));
|
||||
}
|
||||
finalSegment->imlListCount = 0;
|
||||
|
||||
@ -4832,33 +4812,32 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
}
|
||||
|
||||
// insert cycle counter instruction in every segment that has a cycle count greater zero
|
||||
for(sint32 s=0; s<ppcImlGenContext.segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if( imlSegment->ppcAddrMin == 0 )
|
||||
if( segIt->ppcAddrMin == 0 )
|
||||
continue;
|
||||
// count number of PPC instructions in segment
|
||||
// note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC
|
||||
// note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions
|
||||
uint32 lastPPCInstAddr = 0;
|
||||
uint32 ppcCount2 = 0;
|
||||
for (sint32 i = 0; i < imlSegment->imlListCount; i++)
|
||||
for (sint32 i = 0; i < segIt->imlListCount; i++)
|
||||
{
|
||||
if (imlSegment->imlList[i].associatedPPCAddress == 0)
|
||||
if (segIt->imlList[i].associatedPPCAddress == 0)
|
||||
continue;
|
||||
if (imlSegment->imlList[i].associatedPPCAddress == lastPPCInstAddr)
|
||||
if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr)
|
||||
continue;
|
||||
lastPPCInstAddr = imlSegment->imlList[i].associatedPPCAddress;
|
||||
lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress;
|
||||
ppcCount2++;
|
||||
}
|
||||
//uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions
|
||||
uint32 cycleCount = ppcCount2;// ppcCount / 4;
|
||||
if( cycleCount > 0 )
|
||||
{
|
||||
PPCRecompiler_pushBackIMLInstructions(imlSegment, 0, 1);
|
||||
imlSegment->imlList[0].type = PPCREC_IML_TYPE_MACRO;
|
||||
imlSegment->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
|
||||
imlSegment->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES;
|
||||
imlSegment->imlList[0].op_macro.param = cycleCount;
|
||||
PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1);
|
||||
segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO;
|
||||
segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER;
|
||||
segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES;
|
||||
segIt->imlList[0].op_macro.param = cycleCount;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4866,10 +4845,10 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop.
|
||||
// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example)
|
||||
uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located
|
||||
for(sint32 s=0; s<ppcImlGenContext.segmentListCount; s++)
|
||||
for(size_t s=0; s<ppcImlGenContext.segmentList2.size(); s++)
|
||||
{
|
||||
// todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?)
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList2[s];
|
||||
if( imlSegment->imlListCount == 0 )
|
||||
continue;
|
||||
if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin)
|
||||
@ -4891,12 +4870,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
|
||||
PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2);
|
||||
imlSegment = NULL;
|
||||
PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList[s+0];
|
||||
PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList[s+1];
|
||||
PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList[s+2];
|
||||
PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0];
|
||||
PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1];
|
||||
PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2];
|
||||
// create entry point segment
|
||||
PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentListCount, 1);
|
||||
PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList[ppcImlGenContext.segmentListCount-1];
|
||||
PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1);
|
||||
PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1];
|
||||
// relink segments
|
||||
PPCRecompilerIML_relinkInputSegment(imlSegmentP2, imlSegmentP0);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1);
|
||||
@ -4972,16 +4951,15 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
}
|
||||
|
||||
// insert name store instructions at the end of each segment but before branch instructions
|
||||
for(sint32 s=0; s<ppcImlGenContext.segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s];
|
||||
if( ppcImlGenContext.segmentList[s]->imlListCount == 0 )
|
||||
if(segIt->imlListCount == 0 )
|
||||
continue; // ignore empty segments
|
||||
// analyze segment for register usage
|
||||
PPCImlOptimizerUsedRegisters_t registersUsed;
|
||||
for(sint32 i=0; i<imlSegment->imlListCount; i++)
|
||||
for(sint32 i=0; i<segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, imlSegment->imlList+i, ®istersUsed);
|
||||
PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, segIt->imlList+i, ®istersUsed);
|
||||
//PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, registersUsed.readGPR1);
|
||||
sint32 accessedTempReg[5];
|
||||
// intermediate FPRs
|
||||
@ -4997,7 +4975,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext
|
||||
uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]];
|
||||
if( regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0+32 )
|
||||
{
|
||||
imlSegment->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
|
||||
segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1019,13 +1019,12 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
|
||||
// inefficient algorithm for optimizing away excess registers
|
||||
// we simply load, use and store excess registers into other unused registers when we need to
|
||||
// first we remove all name load and store instructions that involve out-of-bounds registers
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
sint32 imlIndex = 0;
|
||||
while( imlIndex < imlSegment->imlListCount )
|
||||
while( imlIndex < segIt->imlListCount )
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+imlIndex;
|
||||
PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+imlIndex;
|
||||
if( imlInstructionItr->type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr->type == PPCREC_IML_TYPE_FPR_NAME_R )
|
||||
{
|
||||
if( imlInstructionItr->op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS )
|
||||
@ -1039,16 +1038,15 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
|
||||
}
|
||||
}
|
||||
// replace registers
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
sint32 imlIndex = 0;
|
||||
while( imlIndex < imlSegment->imlListCount )
|
||||
while( imlIndex < segIt->imlListCount )
|
||||
{
|
||||
PPCImlOptimizerUsedRegisters_t registersUsed;
|
||||
while( true )
|
||||
{
|
||||
PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, ®istersUsed);
|
||||
PPCRecompiler_checkRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, ®istersUsed);
|
||||
if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS )
|
||||
{
|
||||
// get index of register to replace
|
||||
@ -1091,16 +1089,16 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
|
||||
bool replacedRegisterIsUsed = true;
|
||||
if( unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0+32) )
|
||||
{
|
||||
replacedRegisterIsUsed = imlSegment->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0];
|
||||
replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0];
|
||||
}
|
||||
// replace registers that are out of range
|
||||
PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, fprToReplace, unusedRegisterIndex);
|
||||
PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, fprToReplace, unusedRegisterIndex);
|
||||
// add load/store name after instruction
|
||||
PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex+1, 2);
|
||||
PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2);
|
||||
// add load/store before current instruction
|
||||
PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex, 2);
|
||||
PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2);
|
||||
// name_unusedRegister = unusedRegister
|
||||
PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndex+0);
|
||||
PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+(imlIndex+0);
|
||||
memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t));
|
||||
if( replacedRegisterIsUsed )
|
||||
{
|
||||
@ -1113,7 +1111,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
|
||||
}
|
||||
else
|
||||
imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP;
|
||||
imlInstructionItr = imlSegment->imlList+(imlIndex+1);
|
||||
imlInstructionItr = segIt->imlList+(imlIndex+1);
|
||||
memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t));
|
||||
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME;
|
||||
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
|
||||
@ -1122,7 +1120,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
|
||||
imlInstructionItr->op_r_name.copyWidth = 32;
|
||||
imlInstructionItr->op_r_name.flags = 0;
|
||||
// name_gprToReplace = unusedRegister
|
||||
imlInstructionItr = imlSegment->imlList+(imlIndex+3);
|
||||
imlInstructionItr = segIt->imlList+(imlIndex+3);
|
||||
memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t));
|
||||
imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R;
|
||||
imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN;
|
||||
@ -1131,7 +1129,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte
|
||||
imlInstructionItr->op_r_name.copyWidth = 32;
|
||||
imlInstructionItr->op_r_name.flags = 0;
|
||||
// unusedRegister = name_unusedRegister
|
||||
imlInstructionItr = imlSegment->imlList+(imlIndex+4);
|
||||
imlInstructionItr = segIt->imlList+(imlIndex+4);
|
||||
memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t));
|
||||
if( replacedRegisterIsUsed )
|
||||
{
|
||||
@ -1223,7 +1221,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
|
||||
ppcRecManageRegisters_t rCtx = { 0 };
|
||||
for (sint32 i = 0; i < 64; i++)
|
||||
rCtx.ppcRegToMapping[i] = -1;
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
|
||||
sint32 idx = 0;
|
||||
sint32 currentUseIndex = 0;
|
||||
PPCImlOptimizerUsedRegisters_t registersUsed;
|
||||
@ -1374,7 +1372,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon
|
||||
|
||||
bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false)
|
||||
return false;
|
||||
@ -1530,9 +1528,9 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, P
|
||||
}
|
||||
else if (imlSegment->nextSegmentIsUncertain)
|
||||
{
|
||||
if (ppcImlGenContext->segmentListCount >= 5)
|
||||
if (ppcImlGenContext->segmentList2.size() >= 5)
|
||||
{
|
||||
return 7; // for more complex functions we assume that CR is not passed on
|
||||
return 7; // for more complex functions we assume that CR is not passed on (hack)
|
||||
}
|
||||
}
|
||||
return currentOverwriteMask;
|
||||
@ -1568,35 +1566,33 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PP
|
||||
|
||||
void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
|
||||
for(sint32 i=0; i<imlSegment->imlListCount; i++)
|
||||
for(sint32 i=0; i<segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i;
|
||||
PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP)
|
||||
{
|
||||
if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex);
|
||||
imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written
|
||||
imlSegment->crBitsRead |= (crBitFlag);
|
||||
segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written
|
||||
segIt->crBitsRead |= (crBitFlag);
|
||||
}
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex);
|
||||
imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written
|
||||
imlSegment->crBitsRead |= (crBitFlag);
|
||||
segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written
|
||||
segIt->crBitsRead |= (crBitFlag);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR)
|
||||
{
|
||||
imlSegment->crBitsRead |= 0xFFFFFFFF;
|
||||
segIt->crBitsRead |= 0xFFFFFFFF;
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF)
|
||||
{
|
||||
imlSegment->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
|
||||
segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32);
|
||||
}
|
||||
else if( imlInstruction->type == PPCREC_IML_TYPE_CR )
|
||||
{
|
||||
@ -1604,7 +1600,7 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_SET)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten);
|
||||
segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten);
|
||||
}
|
||||
else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR ||
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ORC ||
|
||||
@ -1612,38 +1608,37 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext
|
||||
imlInstruction->operation == PPCREC_IML_OP_CR_ANDC)
|
||||
{
|
||||
uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD);
|
||||
imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten);
|
||||
segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten);
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crA);
|
||||
imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead);
|
||||
segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead);
|
||||
crBitFlag = 1 << (imlInstruction->op_cr.crB);
|
||||
imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead);
|
||||
segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead);
|
||||
}
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 )
|
||||
{
|
||||
imlSegment->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4));
|
||||
segIt->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4));
|
||||
}
|
||||
else if( (imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER )
|
||||
{
|
||||
// overwrites CR0
|
||||
imlSegment->crBitsWritten |= (0xF<<0);
|
||||
segIt->crBitsWritten |= (0xF<<0);
|
||||
}
|
||||
}
|
||||
}
|
||||
// flag instructions that write to CR where we can ignore individual CR bits
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
for(sint32 i=0; i<imlSegment->imlListCount; i++)
|
||||
for(sint32 i=0; i<segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i;
|
||||
PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i;
|
||||
if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 )
|
||||
{
|
||||
uint32 crBitFlags = 0xF<<((uint32)imlInstruction->crRegister*4);
|
||||
uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment);
|
||||
uint32 crIgnoreMask = crOverwriteMask & ~imlSegment->crBitsRead;
|
||||
uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt);
|
||||
uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead;
|
||||
imlInstruction->crIgnoreMask = crIgnoreMask;
|
||||
}
|
||||
}
|
||||
@ -1805,20 +1800,18 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
|
||||
*/
|
||||
void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
|
||||
for (sint32 i = 0; i < imlSegment->imlListCount; i++)
|
||||
for (sint32 i = 0; i < segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i;
|
||||
PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData);
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
|
||||
{
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData);
|
||||
PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1891,16 +1884,14 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp
|
||||
*/
|
||||
void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
|
||||
for (sint32 i = 0; i < imlSegment->imlListCount; i++)
|
||||
for (sint32 i = 0; i < segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i;
|
||||
PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian )
|
||||
{
|
||||
PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData);
|
||||
PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1940,12 +1931,11 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32
|
||||
*/
|
||||
void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
for (sint32 i = 0; i < imlSegment->imlListCount; i++)
|
||||
for (sint32 i = 0; i < segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i;
|
||||
PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i;
|
||||
if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD || imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED)
|
||||
{
|
||||
if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 &&
|
||||
@ -2167,9 +2157,8 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment)
|
||||
void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// check if this segment has a conditional branch
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
_reorderConditionModifyInstructions(imlSegment);
|
||||
_reorderConditionModifyInstructions(segIt);
|
||||
}
|
||||
}
|
||||
|
@ -628,21 +628,20 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// start with frequently executed segments first
|
||||
sint32 maxLoopDepth = 0;
|
||||
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
maxLoopDepth = std::max(maxLoopDepth, ppcImlGenContext->segmentList[i]->loopDepth);
|
||||
maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth);
|
||||
}
|
||||
while (true)
|
||||
{
|
||||
bool done = false;
|
||||
for (sint32 d = maxLoopDepth; d >= 0; d--)
|
||||
{
|
||||
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i];
|
||||
if (imlSegment->loopDepth != d)
|
||||
if (segIt->loopDepth != d)
|
||||
continue;
|
||||
done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, imlSegment);
|
||||
done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, segIt);
|
||||
if (done == false)
|
||||
break;
|
||||
}
|
||||
@ -932,9 +931,9 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext,
|
||||
|
||||
void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
@ -947,10 +946,10 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen
|
||||
{
|
||||
// insert empty segments after every non-taken branch if the linked segment has more than one input
|
||||
// this gives the register allocator more room to create efficient spill code
|
||||
sint32 segmentIndex = 0;
|
||||
while (segmentIndex < ppcImlGenContext->segmentListCount)
|
||||
size_t segmentIndex = 0;
|
||||
while (segmentIndex < ppcImlGenContext->segmentList2.size())
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex];
|
||||
if (imlSegment->nextSegmentIsUncertain)
|
||||
{
|
||||
segmentIndex++;
|
||||
@ -972,8 +971,8 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen
|
||||
continue;
|
||||
}
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1);
|
||||
PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList[segmentIndex + 0];
|
||||
PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList[segmentIndex + 1];
|
||||
PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0];
|
||||
PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1];
|
||||
PPCRecImlSegment_t* nextSegment = imlSegment->nextSegmentBranchNotTaken;
|
||||
PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment);
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment);
|
||||
@ -981,14 +980,14 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen
|
||||
segmentIndex++;
|
||||
}
|
||||
// detect loops
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
imlSegment->momentaryIndex = s;
|
||||
}
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
@ -1009,4 +1008,411 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext)
|
||||
PPCRecRA_generateMoveInstructions(ppcImlGenContext);
|
||||
|
||||
PPCRecRA_deleteAllRanges(ppcImlGenContext);
|
||||
}
|
||||
|
||||
|
||||
bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX);
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
|
||||
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
|
||||
}
|
||||
// scan instructions for usage range
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
|
||||
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
|
||||
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// for each register calculate min/max index of usage range within each segment
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, segIt);
|
||||
}
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range)
|
||||
{
|
||||
if (imlSegment->raDistances.isProcessed[vGPR])
|
||||
{
|
||||
// return already existing segment
|
||||
return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR];
|
||||
}
|
||||
imlSegment->raDistances.isProcessed[vGPR] = true;
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
return nullptr;
|
||||
// create subrange
|
||||
cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr);
|
||||
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd);
|
||||
// traverse forward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
}
|
||||
// traverse backward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
|
||||
}
|
||||
}
|
||||
return subrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, i) == false)
|
||||
continue;
|
||||
if (imlSegment->raDistances.isProcessed[i])
|
||||
continue;
|
||||
raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]);
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
|
||||
}
|
||||
// create lookup table of ranges
|
||||
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// parse instructions and convert to locations
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
// handle accessed GPR
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
bool isWrite = (t == 3);
|
||||
// add location
|
||||
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (index < vGPR2Subrange[virtualRegister]->start.index)
|
||||
assert_dbg();
|
||||
if (index + 1 > vGPR2Subrange[virtualRegister]->end.index)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
return;
|
||||
}
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START;
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
}
|
||||
// propagate backwards
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR);
|
||||
}
|
||||
}
|
||||
|
||||
void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (routeDepth < 2)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// extend starting range to end of segment
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR);
|
||||
// extend all the connecting segments in both directions
|
||||
for (sint32 i = 1; i < (routeDepth - 1); i++)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
}
|
||||
// extend the final segment towards the beginning
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth - 1], vGPR);
|
||||
}
|
||||
|
||||
void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
if (routeDepth >= 64)
|
||||
{
|
||||
forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress);
|
||||
return;
|
||||
}
|
||||
route[routeDepth] = currentSegment;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX)
|
||||
{
|
||||
// measure distance to end of segment
|
||||
distanceLeft -= currentSegment->imlListCount;
|
||||
if (distanceLeft > 0)
|
||||
{
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// measure distance to range
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (distanceLeft < currentSegment->imlListCount)
|
||||
return; // range too far away
|
||||
}
|
||||
else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft)
|
||||
return; // out of range
|
||||
// found close range -> connect ranges
|
||||
_PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// count instructions to end of initial segment
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
sint32 instructionsUntilEndOfSeg;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
instructionsUntilEndOfSeg = 0;
|
||||
else
|
||||
instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd;
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (instructionsUntilEndOfSeg < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
|
||||
if (remainingScanDist <= 0)
|
||||
return; // can't reach end
|
||||
|
||||
// also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch
|
||||
PPCRecImlSegment_t* route[64];
|
||||
route[0] = currentSegment;
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageStart == INT_MAX)
|
||||
continue; // not used
|
||||
// check and extend if possible
|
||||
PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i);
|
||||
}
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
assert_dbg();
|
||||
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
std::vector<PPCRecImlSegment_t*> list_segments;
|
||||
list_segments.reserve(1000);
|
||||
sint32 index = 0;
|
||||
imlSegment->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(imlSegment);
|
||||
while (index < list_segments.size())
|
||||
{
|
||||
PPCRecImlSegment_t* currentSegment = list_segments[index];
|
||||
PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment);
|
||||
// follow flow
|
||||
if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
if (imlSegment->list_prevSegments.empty())
|
||||
{
|
||||
if (imlSegment->raRangeExtendProcessed)
|
||||
assert_dbg(); // should not happen
|
||||
PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
auto localLoopDepth = imlSegment->loopDepth;
|
||||
if (localLoopDepth <= 0)
|
||||
continue; // not inside a loop
|
||||
// look for loop exit
|
||||
bool hasLoopExit = false;
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if (hasLoopExit == false)
|
||||
continue;
|
||||
|
||||
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
|
||||
continue; // range not set or does not reach end of segment
|
||||
if (imlSegment->nextSegmentBranchTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i);
|
||||
if (imlSegment->nextSegmentBranchNotTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// merge close ranges
|
||||
PPCRecRA_mergeCloseRangesV2(ppcImlGenContext);
|
||||
// extra pass to move register stores out of loops
|
||||
PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext);
|
||||
// calculate liveness ranges
|
||||
for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
bool isRead = false;
|
||||
bool isWritten = false;
|
||||
bool isOverwritten = false;
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.isRead)
|
||||
{
|
||||
isRead = true;
|
||||
}
|
||||
if (location.isWrite)
|
||||
{
|
||||
if (isRead == false)
|
||||
isOverwritten = true;
|
||||
isWritten = true;
|
||||
}
|
||||
}
|
||||
subrange->_noLoad = isOverwritten;
|
||||
subrange->hasStore = isWritten;
|
||||
|
||||
if (subrange->start.index == RA_INTER_RANGE_START)
|
||||
subrange->_noLoad = true;
|
||||
}
|
||||
|
||||
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore
|
||||
// first do a per-subrange pass
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
|
||||
}
|
||||
}
|
||||
// then do a second pass where we scan along subrange flow
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm
|
||||
{
|
||||
_analyzeRangeDataFlow(subrange);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,414 +0,0 @@
|
||||
#include "PPCRecompiler.h"
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "PPCRecompilerImlRanges.h"
|
||||
#include <queue>
|
||||
|
||||
bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX);
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
imlSegment->raDistances.reg[i].usageStart = INT_MAX;
|
||||
imlSegment->raDistances.reg[i].usageEnd = INT_MIN;
|
||||
}
|
||||
// scan instructions for usage range
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR);
|
||||
imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction
|
||||
imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// for each register calculate min/max index of usage range within each segment
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]);
|
||||
}
|
||||
}
|
||||
|
||||
raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range)
|
||||
{
|
||||
if (imlSegment->raDistances.isProcessed[vGPR])
|
||||
{
|
||||
// return already existing segment
|
||||
return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR];
|
||||
}
|
||||
imlSegment->raDistances.isProcessed[vGPR] = true;
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
return nullptr;
|
||||
// create subrange
|
||||
cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr);
|
||||
raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd);
|
||||
// traverse forward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range);
|
||||
cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START);
|
||||
}
|
||||
}
|
||||
// traverse backward
|
||||
if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START)
|
||||
{
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range);
|
||||
}
|
||||
}
|
||||
// return subrange
|
||||
return subrange;
|
||||
}
|
||||
|
||||
void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
if( _isRangeDefined(imlSegment, i) == false )
|
||||
continue;
|
||||
if( imlSegment->raDistances.isProcessed[i])
|
||||
continue;
|
||||
raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]);
|
||||
PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range);
|
||||
}
|
||||
// create lookup table of ranges
|
||||
raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR];
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++)
|
||||
{
|
||||
vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i];
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// parse instructions and convert to locations
|
||||
sint32 index = 0;
|
||||
PPCImlOptimizerUsedRegisters_t gprTracking;
|
||||
while (index < imlSegment->imlListCount)
|
||||
{
|
||||
// end loop at suffix instruction
|
||||
if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index))
|
||||
break;
|
||||
// get accessed GPRs
|
||||
PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking);
|
||||
// handle accessed GPR
|
||||
for (sint32 t = 0; t < 4; t++)
|
||||
{
|
||||
sint32 virtualRegister = gprTracking.gpr[t];
|
||||
if (virtualRegister < 0)
|
||||
continue;
|
||||
bool isWrite = (t == 3);
|
||||
// add location
|
||||
PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (index < vGPR2Subrange[virtualRegister]->start.index)
|
||||
assert_dbg();
|
||||
if (index+1 > vGPR2Subrange[virtualRegister]->end.index)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
// next instruction
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
return;
|
||||
}
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END;
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR)
|
||||
{
|
||||
if (_isRangeDefined(imlSegment, vGPR) == false)
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START;
|
||||
}
|
||||
else
|
||||
{
|
||||
imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START;
|
||||
}
|
||||
// propagate backwards
|
||||
for (auto& it : imlSegment->list_prevSegments)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR);
|
||||
}
|
||||
}
|
||||
|
||||
void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (routeDepth < 2)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// extend starting range to end of segment
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR);
|
||||
// extend all the connecting segments in both directions
|
||||
for (sint32 i = 1; i < (routeDepth - 1); i++)
|
||||
{
|
||||
PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR);
|
||||
}
|
||||
// extend the final segment towards the beginning
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR);
|
||||
}
|
||||
|
||||
void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth)
|
||||
{
|
||||
if (routeDepth >= 64)
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded for function 0x{:08x}", ppcImlGenContext->functionRef->ppcAddress);
|
||||
return;
|
||||
}
|
||||
route[routeDepth] = currentSegment;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX)
|
||||
{
|
||||
// measure distance to end of segment
|
||||
distanceLeft -= currentSegment->imlListCount;
|
||||
if (distanceLeft > 0)
|
||||
{
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// measure distance to range
|
||||
if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END)
|
||||
{
|
||||
if (distanceLeft < currentSegment->imlListCount)
|
||||
return; // range too far away
|
||||
}
|
||||
else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft)
|
||||
return; // out of range
|
||||
// found close range -> connect ranges
|
||||
_PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
// count instructions to end of initial segment
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START)
|
||||
assert_dbg();
|
||||
sint32 instructionsUntilEndOfSeg;
|
||||
if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END)
|
||||
instructionsUntilEndOfSeg = 0;
|
||||
else
|
||||
instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd;
|
||||
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (instructionsUntilEndOfSeg < 0)
|
||||
assert_dbg();
|
||||
#endif
|
||||
sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg;
|
||||
if (remainingScanDist <= 0)
|
||||
return; // can't reach end
|
||||
|
||||
// also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch
|
||||
PPCRecImlSegment_t* route[64];
|
||||
route[0] = currentSegment;
|
||||
if (currentSegment->nextSegmentBranchNotTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken)
|
||||
{
|
||||
_PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if(imlSegment->raDistances.reg[i].usageStart == INT_MAX)
|
||||
continue; // not used
|
||||
// check and extend if possible
|
||||
PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i);
|
||||
}
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
assert_dbg();
|
||||
if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain)
|
||||
assert_dbg();
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment)
|
||||
{
|
||||
std::vector<PPCRecImlSegment_t*> list_segments;
|
||||
list_segments.reserve(1000);
|
||||
sint32 index = 0;
|
||||
imlSegment->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(imlSegment);
|
||||
while (index < list_segments.size())
|
||||
{
|
||||
PPCRecImlSegment_t* currentSegment = list_segments[index];
|
||||
PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment);
|
||||
// follow flow
|
||||
if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchNotTaken);
|
||||
}
|
||||
if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false)
|
||||
{
|
||||
currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true;
|
||||
list_segments.push_back(currentSegment->nextSegmentBranchTaken);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
if (imlSegment->list_prevSegments.empty())
|
||||
{
|
||||
if (imlSegment->raRangeExtendProcessed)
|
||||
assert_dbg(); // should not happen
|
||||
PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
auto localLoopDepth = imlSegment->loopDepth;
|
||||
if( localLoopDepth <= 0 )
|
||||
continue; // not inside a loop
|
||||
// look for loop exit
|
||||
bool hasLoopExit = false;
|
||||
if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth)
|
||||
{
|
||||
hasLoopExit = true;
|
||||
}
|
||||
if(hasLoopExit == false)
|
||||
continue;
|
||||
|
||||
// extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop)
|
||||
for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries
|
||||
{
|
||||
if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END)
|
||||
continue; // range not set or does not reach end of segment
|
||||
if(imlSegment->nextSegmentBranchTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i);
|
||||
if(imlSegment->nextSegmentBranchNotTaken)
|
||||
PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// merge close ranges
|
||||
PPCRecRA_mergeCloseRangesV2(ppcImlGenContext);
|
||||
// extra pass to move register stores out of loops
|
||||
PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext);
|
||||
// calculate liveness ranges
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment);
|
||||
}
|
||||
}
|
||||
|
||||
void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange)
|
||||
{
|
||||
bool isRead = false;
|
||||
bool isWritten = false;
|
||||
bool isOverwritten = false;
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if (location.isRead)
|
||||
{
|
||||
isRead = true;
|
||||
}
|
||||
if (location.isWrite)
|
||||
{
|
||||
if (isRead == false)
|
||||
isOverwritten = true;
|
||||
isWritten = true;
|
||||
}
|
||||
}
|
||||
subrange->_noLoad = isOverwritten;
|
||||
subrange->hasStore = isWritten;
|
||||
|
||||
if (subrange->start.index == RA_INTER_RANGE_START)
|
||||
subrange->_noLoad = true;
|
||||
}
|
||||
|
||||
void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange);
|
||||
|
||||
void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
// this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore
|
||||
// first do a per-subrange pass
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges)
|
||||
{
|
||||
PPCRecRA_analyzeSubrangeDataDependencyV2(subrange);
|
||||
}
|
||||
}
|
||||
// then do a second pass where we scan along subrange flow
|
||||
for (auto& range : ppcImlGenContext->raInfo.list_ranges)
|
||||
{
|
||||
for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm
|
||||
{
|
||||
_analyzeRangeDataFlow(subrange);
|
||||
}
|
||||
}
|
||||
}
|
@ -3,11 +3,11 @@
|
||||
|
||||
PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
for(PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset )
|
||||
if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset )
|
||||
{
|
||||
return ppcImlGenContext->segmentList[s];
|
||||
return segIt;
|
||||
}
|
||||
}
|
||||
debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset);
|
||||
@ -94,17 +94,18 @@ void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPC
|
||||
|
||||
void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
size_t segCount = ppcImlGenContext->segmentList2.size();
|
||||
for(size_t s=0; s<segCount; s++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s];
|
||||
|
||||
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount;
|
||||
PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1];
|
||||
bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size();
|
||||
PPCRecImlSegment_t* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1];
|
||||
// handle empty segment
|
||||
if( imlSegment->imlListCount == 0 )
|
||||
{
|
||||
if (isLastSegment == false)
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment
|
||||
PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment
|
||||
else
|
||||
imlSegment->nextSegmentIsUncertain = true;
|
||||
continue;
|
||||
@ -143,15 +144,15 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
|
||||
void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext)
|
||||
{
|
||||
sint32 initialSegmentCount = ppcImlGenContext->segmentListCount;
|
||||
for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++)
|
||||
size_t initialSegmentCount = ppcImlGenContext->segmentList2.size();
|
||||
for (size_t i = 0; i < initialSegmentCount; i++)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i];
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[i];
|
||||
if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable)
|
||||
{
|
||||
// spawn new segment at end
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1);
|
||||
PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1];
|
||||
PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentList2.size(), 1);
|
||||
PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1];
|
||||
entrySegment->isEnterable = true;
|
||||
entrySegment->enterPPCAddress = imlSegment->enterPPCAddress;
|
||||
// create jump instruction
|
||||
|
@ -2299,13 +2299,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
|
||||
|
||||
// generate iml instruction code
|
||||
bool codeGenerationFailed = false;
|
||||
for(sint32 s=0; s<ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s];
|
||||
ppcImlGenContext->segmentList[s]->x64Offset = x64GenContext.codeBufferIndex;
|
||||
for(sint32 i=0; i<imlSegment->imlListCount; i++)
|
||||
segIt->x64Offset = x64GenContext.codeBufferIndex;
|
||||
for(sint32 i=0; i<segIt->imlListCount; i++)
|
||||
{
|
||||
PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i;
|
||||
PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i;
|
||||
|
||||
if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME )
|
||||
{
|
||||
@ -2352,7 +2351,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
|
||||
}
|
||||
else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP )
|
||||
{
|
||||
if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlSegment, imlInstruction) == false )
|
||||
if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false )
|
||||
{
|
||||
codeGenerationFailed = true;
|
||||
}
|
||||
@ -2503,11 +2502,11 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
|
||||
uint32 x64Offset = 0xFFFFFFFF;
|
||||
if (x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_PPC)
|
||||
{
|
||||
for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++)
|
||||
for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2)
|
||||
{
|
||||
if (ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset)
|
||||
if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset)
|
||||
{
|
||||
x64Offset = ppcImlGenContext->segmentList[s]->x64Offset;
|
||||
x64Offset = segIt->x64Offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user