diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 91d257b2..58b4ba8b 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -74,7 +74,7 @@ add_library(CemuCafe HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp HW/Espresso/Recompiler/PPCRecompilerImlRanges.h - HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp + HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 24e87bd1..78d8fad9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -173,9 +173,8 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // collect list of PPC-->x64 entry points entryPointsOut.clear(); - for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++) + for(PPCRecImlSegment_t* imlSegment : ppcImlGenContext.segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; if (imlSegment->isEnterable == false) continue; diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index 2e40f19d..4c07cfaa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -321,9 +321,10 @@ struct ppcImlGenContext_t sint32 imlListSize; sint32 imlListCount; // list of segments - PPCRecImlSegment_t** segmentList; - sint32 segmentListSize; - sint32 segmentListCount; + //PPCRecImlSegment_t** segmentList; + //sint32 segmentListSize; + //sint32 segmentListCount; + std::vector segmentList2; // code generation control bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode // register allocator info diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index b9685488..4fb45f50 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -3450,10 +3450,9 @@ void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segment void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { - for(sint32 f=0; fsegmentListCount; f++) + for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[f]; - PPCRecompiler_dumpIMLSegment(imlSegment, f); + PPCRecompiler_dumpIMLSegment(ppcImlGenContext->segmentList2[i], i); debug_printf("\n"); } } @@ -3548,43 +3547,18 @@ PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlS return imlSegment->imlList + index; } -void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count) +PPCRecImlSegment_t* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext) { - if( (ppcImlGenContext->segmentListCount+count) > ppcImlGenContext->segmentListSize ) - { - // allocate space for more segments - ppcImlGenContext->segmentListSize += count; - ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*)); - } - for(sint32 i=(sint32)ppcImlGenContext->segmentListCount-1; i>=index; i--) - { - memcpy(ppcImlGenContext->segmentList+(i+count), ppcImlGenContext->segmentList+i, sizeof(PPCRecImlSegment_t*)); - } - ppcImlGenContext->segmentListCount += count; - for(sint32 i=0; isegmentList+index+i, 0x00, sizeof(PPCRecImlSegment_t*)); - ppcImlGenContext->segmentList[index+i] = (PPCRecImlSegment_t*)malloc(sizeof(PPCRecImlSegment_t)); - memset(ppcImlGenContext->segmentList[index+i], 0x00, sizeof(PPCRecImlSegment_t)); - ppcImlGenContext->segmentList[index + i]->list_prevSegments = std::vector(); - } + PPCRecImlSegment_t* segment = new PPCRecImlSegment_t(); + ppcImlGenContext->segmentList2.emplace_back(segment); + return segment; } -/* - * Allocate and init a new iml instruction segment - */ -PPCRecImlSegment_t* PPCRecompiler_generateImlSegment(ppcImlGenContext_t* ppcImlGenContext) +void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count) { - if( ppcImlGenContext->segmentListCount >= ppcImlGenContext->segmentListSize ) - { - // allocate space for more segments - ppcImlGenContext->segmentListSize *= 2; - ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*)); - } - PPCRecImlSegment_t* ppcRecSegment = new PPCRecImlSegment_t(); - ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount] = ppcRecSegment; - ppcImlGenContext->segmentListCount++; - return ppcRecSegment; + ppcImlGenContext->segmentList2.insert(ppcImlGenContext->segmentList2.begin() + index, count, nullptr); + for (sint32 i = 0; i < count; i++) + ppcImlGenContext->segmentList2[index + i] = new PPCRecImlSegment_t(); } void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) @@ -3594,17 +3568,25 @@ void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) free(ppcImlGenContext->imlList); ppcImlGenContext->imlList = nullptr; } - for(sint32 i=0; isegmentListCount; i++) + + for (PPCRecImlSegment_t* imlSegment : ppcImlGenContext->segmentList2) { - free(ppcImlGenContext->segmentList[i]->imlList); - delete ppcImlGenContext->segmentList[i]; - } - ppcImlGenContext->segmentListCount = 0; - if (ppcImlGenContext->segmentList) - { - free(ppcImlGenContext->segmentList); - ppcImlGenContext->segmentList = nullptr; + free(imlSegment->imlList); + delete imlSegment; } + ppcImlGenContext->segmentList2.clear(); + + //for(sint32 i=0; isegmentListCount; i++) + //{ + // free(ppcImlGenContext->segmentList[i]->imlList); + // delete ppcImlGenContext->segmentList[i]; + //} + //ppcImlGenContext->segmentListCount = 0; + //if (ppcImlGenContext->segmentList) + //{ + // free(ppcImlGenContext->segmentList); + // ppcImlGenContext->segmentList = nullptr; + //} } bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml) @@ -4598,9 +4580,8 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // divide iml instructions into segments // each segment is defined by one or more instructions with no branches or jump destinations in between // a branch instruction may only be the very last instruction of a segment - ppcImlGenContext.segmentListCount = 0; - ppcImlGenContext.segmentListSize = 2; - ppcImlGenContext.segmentList = (PPCRecImlSegment_t**)malloc(ppcImlGenContext.segmentListSize*sizeof(PPCRecImlSegment_t*)); + cemu_assert_debug(ppcImlGenContext.segmentList2.empty()); + sint32 segmentStart = 0; sint32 segmentImlIndex = 0; while( segmentImlIndex < ppcImlGenContext.imlListCount ) @@ -4619,7 +4600,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) { // segment ends after current instruction - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); + PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart+1; ppcRecSegment->ppcAddress = 0xFFFFFFFF; @@ -4631,7 +4612,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // segment ends before current instruction if( segmentImlIndex > segmentStart ) { - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); + PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart; ppcRecSegment->ppcAddress = 0xFFFFFFFF; @@ -4643,123 +4624,122 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if( segmentImlIndex != segmentStart ) { // final segment - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); + PPCRecImlSegment_t* ppcRecSegment = PPCRecompilerIml_appendSegment(&ppcImlGenContext); ppcRecSegment->startOffset = segmentStart; ppcRecSegment->count = segmentImlIndex-segmentStart; ppcRecSegment->ppcAddress = 0xFFFFFFFF; segmentStart = segmentImlIndex; } // move iml instructions into the segments - for(sint32 s=0; sstartOffset; - uint32 imlCount = ppcImlGenContext.segmentList[s]->count; + uint32 imlStartIndex = segIt->startOffset; + uint32 imlCount = segIt->count; if( imlCount > 0 ) { - ppcImlGenContext.segmentList[s]->imlListSize = imlCount + 4; - ppcImlGenContext.segmentList[s]->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)*ppcImlGenContext.segmentList[s]->imlListSize); - ppcImlGenContext.segmentList[s]->imlListCount = imlCount; - memcpy(ppcImlGenContext.segmentList[s]->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); + segIt->imlListSize = imlCount + 4; + segIt->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)* segIt->imlListSize); + segIt->imlListCount = imlCount; + memcpy(segIt->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); } else { // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - ppcImlGenContext.segmentList[s]->imlList = NULL; - ppcImlGenContext.segmentList[s]->imlListSize = 0; - ppcImlGenContext.segmentList[s]->imlListCount = 0; + segIt->imlList = nullptr; + segIt->imlListSize = 0; + segIt->imlListCount = 0; } - ppcImlGenContext.segmentList[s]->startOffset = 9999999; - ppcImlGenContext.segmentList[s]->count = 9999999; + segIt->startOffset = 9999999; + segIt->count = 9999999; } // clear segment-independent iml list free(ppcImlGenContext.imlList); ppcImlGenContext.imlList = NULL; ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for(sint32 s=0; simlListCount; i++) + for(sint32 i=0; i< segIt->imlListCount; i++) { - if( ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress == 0 ) + if(segIt->imlList[i].associatedPPCAddress == 0 ) continue; //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) // continue; // jumpmarks and no-op instructions must not affect segment ppc address range - segmentPPCAddrMin = std::min(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMin); - segmentPPCAddrMax = std::max(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMax); + segmentPPCAddrMin = std::min(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMin); + segmentPPCAddrMax = std::max(segIt->imlList[i].associatedPPCAddress, segmentPPCAddrMax); } if( segmentPPCAddrMin != 0xFFFFFFFF ) { - ppcImlGenContext.segmentList[s]->ppcAddrMin = segmentPPCAddrMin; - ppcImlGenContext.segmentList[s]->ppcAddrMax = segmentPPCAddrMax; + segIt->ppcAddrMin = segmentPPCAddrMin; + segIt->ppcAddrMax = segmentPPCAddrMax; } else { - ppcImlGenContext.segmentList[s]->ppcAddrMin = 0; - ppcImlGenContext.segmentList[s]->ppcAddrMax = 0; + segIt->ppcAddrMin = 0; + segIt->ppcAddrMax = 0; } } // certain instructions can change the segment state // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) // jumpmarks mark the segment as a jump destination (within the same function) - for(sint32 s=0; simlListCount > 0 ) + while (segIt->imlListCount > 0) { - if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER ) + if (segIt->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER) { // mark segment as enterable - if( ppcImlGenContext.segmentList[s]->isEnterable ) + if (segIt->isEnterable) assert_dbg(); // should not happen? - ppcImlGenContext.segmentList[s]->isEnterable = true; - ppcImlGenContext.segmentList[s]->enterPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_ppcEnter.ppcAddress; + segIt->isEnterable = true; + segIt->enterPPCAddress = segIt->imlList[0].op_ppcEnter.ppcAddress; // remove ppc_enter instruction - ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0; + segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; + segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + segIt->imlList[0].associatedPPCAddress = 0; } - else if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) + else if(segIt->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) { // mark segment as jump destination - if( ppcImlGenContext.segmentList[s]->isJumpDestination ) + if(segIt->isJumpDestination ) assert_dbg(); // should not happen? - ppcImlGenContext.segmentList[s]->isJumpDestination = true; - ppcImlGenContext.segmentList[s]->jumpDestinationPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_jumpmark.address; + segIt->isJumpDestination = true; + segIt->jumpDestinationPPCAddress = segIt->imlList[0].op_jumpmark.address; // remove jumpmark instruction - ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0; + segIt->imlList[0].type = PPCREC_IML_TYPE_NO_OP; + segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + segIt->imlList[0].associatedPPCAddress = 0; } else break; } } // the first segment is always enterable as the recompiled functions entrypoint - ppcImlGenContext.segmentList[0]->isEnterable = true; - ppcImlGenContext.segmentList[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; + ppcImlGenContext.segmentList2[0]->isEnterable = true; + ppcImlGenContext.segmentList2[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; // link segments for further inter-segment optimization PPCRecompilerIML_linkSegments(&ppcImlGenContext); // optimization pass - replace segments with conditional MOVs if possible - for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext.segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; - if (imlSegment->nextSegmentBranchNotTaken == NULL || imlSegment->nextSegmentBranchTaken == NULL) + if (segIt->nextSegmentBranchNotTaken == nullptr || segIt->nextSegmentBranchTaken == nullptr) continue; // not a branching segment - PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); + PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(segIt); if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) continue; - PPCRecImlSegment_t* conditionalSegment = imlSegment->nextSegmentBranchNotTaken; - PPCRecImlSegment_t* finalSegment = imlSegment->nextSegmentBranchTaken; - if(imlSegment->nextSegmentBranchTaken != imlSegment->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) + PPCRecImlSegment_t* conditionalSegment = segIt->nextSegmentBranchNotTaken; + PPCRecImlSegment_t* finalSegment = segIt->nextSegmentBranchTaken; + if (segIt->nextSegmentBranchTaken != segIt->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) continue; - if (imlSegment->nextSegmentBranchNotTaken->imlListCount > 4) + if (segIt->nextSegmentBranchNotTaken->imlListCount > 4) continue; - if(conditionalSegment->list_prevSegments.size() != 1) + if (conditionalSegment->list_prevSegments.size() != 1) continue; // the reduced segment must not be the target of any other branch - if(conditionalSegment->isEnterable) + if (conditionalSegment->isEnterable) continue; // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) bool canReduceSegment = true; @@ -4788,16 +4768,16 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext { PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList + f; if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(imlSegment), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); + PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(segIt), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); else assert_dbg(); } // update segment links // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - PPCRecompilerIML_removeLink(imlSegment, conditionalSegment); - PPCRecompilerIML_removeLink(imlSegment, finalSegment); + PPCRecompilerIML_removeLink(segIt, conditionalSegment); + PPCRecompilerIML_removeLink(segIt, finalSegment); PPCRecompilerIML_removeLink(conditionalSegment, finalSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, finalSegment); + PPCRecompilerIml_setLinkBranchNotTaken(segIt, finalSegment); // remove all instructions from conditional segment conditionalSegment->imlListCount = 0; @@ -4805,23 +4785,23 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) { // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - PPCRecompilerIML_removeLink(imlSegment, finalSegment); + PPCRecompilerIML_removeLink(segIt, finalSegment); if (finalSegment->nextSegmentBranchNotTaken) { PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, tempSegment); + PPCRecompilerIml_setLinkBranchNotTaken(segIt, tempSegment); } if (finalSegment->nextSegmentBranchTaken) { PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchTaken; PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchTaken(imlSegment, tempSegment); + PPCRecompilerIml_setLinkBranchTaken(segIt, tempSegment); } // copy IML instructions for (sint32 f = 0; f < finalSegment->imlListCount; f++) { - memcpy(PPCRecompiler_appendInstruction(imlSegment), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t)); + memcpy(PPCRecompiler_appendInstruction(segIt), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t)); } finalSegment->imlListCount = 0; @@ -4832,33 +4812,32 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert cycle counter instruction in every segment that has a cycle count greater zero - for(sint32 s=0; sppcAddrMin == 0 ) + if( segIt->ppcAddrMin == 0 ) continue; // count number of PPC instructions in segment - // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC + // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC since they generate no IML instructions uint32 lastPPCInstAddr = 0; uint32 ppcCount2 = 0; - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - if (imlSegment->imlList[i].associatedPPCAddress == 0) + if (segIt->imlList[i].associatedPPCAddress == 0) continue; - if (imlSegment->imlList[i].associatedPPCAddress == lastPPCInstAddr) + if (segIt->imlList[i].associatedPPCAddress == lastPPCInstAddr) continue; - lastPPCInstAddr = imlSegment->imlList[i].associatedPPCAddress; + lastPPCInstAddr = segIt->imlList[i].associatedPPCAddress; ppcCount2++; } //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions uint32 cycleCount = ppcCount2;// ppcCount / 4; if( cycleCount > 0 ) { - PPCRecompiler_pushBackIMLInstructions(imlSegment, 0, 1); - imlSegment->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegment->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegment->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - imlSegment->imlList[0].op_macro.param = cycleCount; + PPCRecompiler_pushBackIMLInstructions(segIt, 0, 1); + segIt->imlList[0].type = PPCREC_IML_TYPE_MACRO; + segIt->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; + segIt->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + segIt->imlList[0].op_macro.param = cycleCount; } } @@ -4866,10 +4845,10 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for(sint32 s=0; sppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList2[s]; if( imlSegment->imlListCount == 0 ) continue; if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) @@ -4891,12 +4870,12 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); imlSegment = NULL; - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList[s+0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList[s+1]; - PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList[s+2]; + PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList2[s+0]; + PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList2[s+1]; + PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList2[s+2]; // create entry point segment - PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentListCount, 1); - PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList[ppcImlGenContext.segmentListCount-1]; + PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); + PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size()-1]; // relink segments PPCRecompilerIML_relinkInputSegment(imlSegmentP2, imlSegmentP0); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); @@ -4972,16 +4951,15 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext } // insert name store instructions at the end of each segment but before branch instructions - for(sint32 s=0; simlListCount == 0 ) + if(segIt->imlListCount == 0 ) continue; // ignore empty segments // analyze segment for register usage PPCImlOptimizerUsedRegisters_t registersUsed; - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlListCount; i++) { - PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); + PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, segIt->imlList+i, ®istersUsed); //PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, registersUsed.readGPR1); sint32 accessedTempReg[5]; // intermediate FPRs @@ -4997,7 +4975,7 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; if( regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0+32 ) { - imlSegment->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; + segIt->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; } } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp index 45e27664..d14c6e00 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp @@ -1019,13 +1019,12 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte // inefficient algorithm for optimizing away excess registers // we simply load, use and store excess registers into other unused registers when we need to // first we remove all name load and store instructions that involve out-of-bounds registers - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; sint32 imlIndex = 0; - while( imlIndex < imlSegment->imlListCount ) + while( imlIndex < segIt->imlListCount ) { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+imlIndex; + PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+imlIndex; if( imlInstructionItr->type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr->type == PPCREC_IML_TYPE_FPR_NAME_R ) { if( imlInstructionItr->op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) @@ -1039,16 +1038,15 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte } } // replace registers - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; sint32 imlIndex = 0; - while( imlIndex < imlSegment->imlListCount ) + while( imlIndex < segIt->imlListCount ) { PPCImlOptimizerUsedRegisters_t registersUsed; while( true ) { - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, ®istersUsed); + PPCRecompiler_checkRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, ®istersUsed); if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) { // get index of register to replace @@ -1091,16 +1089,16 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte bool replacedRegisterIsUsed = true; if( unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0+32) ) { - replacedRegisterIsUsed = imlSegment->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; + replacedRegisterIsUsed = segIt->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; } // replace registers that are out of range - PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, fprToReplace, unusedRegisterIndex); + PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, segIt->imlList+imlIndex, fprToReplace, unusedRegisterIndex); // add load/store name after instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex+1, 2); + PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex+1, 2); // add load/store before current instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex, 2); + PPCRecompiler_pushBackIMLInstructions(segIt, imlIndex, 2); // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndex+0); + PPCRecImlInstruction_t* imlInstructionItr = segIt->imlList+(imlIndex+0); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); if( replacedRegisterIsUsed ) { @@ -1113,7 +1111,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte } else imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr = imlSegment->imlList+(imlIndex+1); + imlInstructionItr = segIt->imlList+(imlIndex+1); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -1122,7 +1120,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; // name_gprToReplace = unusedRegister - imlInstructionItr = imlSegment->imlList+(imlIndex+3); + imlInstructionItr = segIt->imlList+(imlIndex+3); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; @@ -1131,7 +1129,7 @@ bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenConte imlInstructionItr->op_r_name.copyWidth = 32; imlInstructionItr->op_r_name.flags = 0; // unusedRegister = name_unusedRegister - imlInstructionItr = imlSegment->imlList+(imlIndex+4); + imlInstructionItr = segIt->imlList+(imlIndex+4); memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); if( replacedRegisterIsUsed ) { @@ -1223,7 +1221,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon ppcRecManageRegisters_t rCtx = { 0 }; for (sint32 i = 0; i < 64; i++) rCtx.ppcRegToMapping[i] = -1; - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; sint32 idx = 0; sint32 currentUseIndex = 0; PPCImlOptimizerUsedRegisters_t registersUsed; @@ -1374,7 +1372,7 @@ bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenCon bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (sint32 s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) return false; @@ -1530,9 +1528,9 @@ uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, P } else if (imlSegment->nextSegmentIsUncertain) { - if (ppcImlGenContext->segmentListCount >= 5) + if (ppcImlGenContext->segmentList2.size() >= 5) { - return 7; // for more complex functions we assume that CR is not passed on + return 7; // for more complex functions we assume that CR is not passed on (hack) } } return currentOverwriteMask; @@ -1568,35 +1566,33 @@ uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PP void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) { - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) { if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) { uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written - imlSegment->crBitsRead |= (crBitFlag); + segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + segIt->crBitsRead |= (crBitFlag); } } else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) { uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written - imlSegment->crBitsRead |= (crBitFlag); + segIt->crBitsInput |= (crBitFlag&~segIt->crBitsWritten); // flag bits that have not already been written + segIt->crBitsRead |= (crBitFlag); } else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) { - imlSegment->crBitsRead |= 0xFFFFFFFF; + segIt->crBitsRead |= 0xFFFFFFFF; } else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - imlSegment->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); + segIt->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) { @@ -1604,7 +1600,7 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext imlInstruction->operation == PPCREC_IML_OP_CR_SET) { uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten); + segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); } else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || @@ -1612,38 +1608,37 @@ void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) { uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten); + segIt->crBitsWritten |= (crBitFlag & ~segIt->crBitsWritten); crBitFlag = 1 << (imlInstruction->op_cr.crA); - imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead); + segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); crBitFlag = 1 << (imlInstruction->op_cr.crB); - imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead); + segIt->crBitsRead |= (crBitFlag & ~segIt->crBitsRead); } else cemu_assert_unimplemented(); } else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) { - imlSegment->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4)); + segIt->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4)); } else if( (imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) { // overwrites CR0 - imlSegment->crBitsWritten |= (0xF<<0); + segIt->crBitsWritten |= (0xF<<0); } } } // flag instructions that write to CR where we can ignore individual CR bits - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - for(sint32 i=0; iimlListCount; i++) + for(sint32 i=0; iimlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) { uint32 crBitFlags = 0xF<<((uint32)imlInstruction->crRegister*4); - uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment); - uint32 crIgnoreMask = crOverwriteMask & ~imlSegment->crBitsRead; + uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, segIt); + uint32 crIgnoreMask = crOverwriteMask & ~segIt->crBitsRead; imlInstruction->crIgnoreMask = crIgnoreMask; } } @@ -1805,20 +1800,18 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI */ void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); } else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); } } } @@ -1891,16 +1884,14 @@ void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* pp */ void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian ) { - PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); + PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); } } } @@ -1940,12 +1931,11 @@ bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 */ void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - for (sint32 i = 0; i < imlSegment->imlListCount; i++) + for (sint32 i = 0; i < segIt->imlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList + i; if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD || imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) { if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && @@ -2167,9 +2157,8 @@ void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext) { // check if this segment has a conditional branch - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - _reorderConditionModifyInstructions(imlSegment); + _reorderConditionModifyInstructions(segIt); } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp index 88d387e6..3158303a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp @@ -628,21 +628,20 @@ void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) { // start with frequently executed segments first sint32 maxLoopDepth = 0; - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - maxLoopDepth = std::max(maxLoopDepth, ppcImlGenContext->segmentList[i]->loopDepth); + maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth); } while (true) { bool done = false; for (sint32 d = maxLoopDepth; d >= 0; d--) { - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; - if (imlSegment->loopDepth != d) + if (segIt->loopDepth != d) continue; - done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, imlSegment); + done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, segIt); if (done == false) break; } @@ -932,9 +931,9 @@ void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); } } @@ -947,10 +946,10 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen { // insert empty segments after every non-taken branch if the linked segment has more than one input // this gives the register allocator more room to create efficient spill code - sint32 segmentIndex = 0; - while (segmentIndex < ppcImlGenContext->segmentListCount) + size_t segmentIndex = 0; + while (segmentIndex < ppcImlGenContext->segmentList2.size()) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; if (imlSegment->nextSegmentIsUncertain) { segmentIndex++; @@ -972,8 +971,8 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen continue; } PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1); - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList[segmentIndex + 0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList[segmentIndex + 1]; + PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0]; + PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1]; PPCRecImlSegment_t* nextSegment = imlSegment->nextSegmentBranchNotTaken; PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment); PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment); @@ -981,14 +980,14 @@ void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGen segmentIndex++; } // detect loops - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; imlSegment->momentaryIndex = s; } - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment); } } @@ -1009,4 +1008,411 @@ void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) PPCRecRA_generateMoveInstructions(ppcImlGenContext); PPCRecRA_deleteAllRanges(ppcImlGenContext); +} + + +bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR) +{ + return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); +} + +void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + { + imlSegment->raDistances.reg[i].usageStart = INT_MAX; + imlSegment->raDistances.reg[i].usageEnd = INT_MIN; + } + // scan instructions for usage range + sint32 index = 0; + PPCImlOptimizerUsedRegisters_t gprTracking; + while (index < imlSegment->imlListCount) + { + // end loop at suffix instruction + if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) + break; + // get accessed GPRs + PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + for (sint32 t = 0; t < 4; t++) + { + sint32 virtualRegister = gprTracking.gpr[t]; + if (virtualRegister < 0) + continue; + cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); + imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction + imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index + 1); // index after instruction + } + // next instruction + index++; + } +} + +void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +{ + // for each register calculate min/max index of usage range within each segment + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) + { + PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, segIt); + } +} + +raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range) +{ + if (imlSegment->raDistances.isProcessed[vGPR]) + { + // return already existing segment + return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; + } + imlSegment->raDistances.isProcessed[vGPR] = true; + if (_isRangeDefined(imlSegment, vGPR) == false) + return nullptr; + // create subrange + cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); + raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); + // traverse forward + if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + { + if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + } + if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + } + } + // traverse backward + if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + { + for (auto& it : imlSegment->list_prevSegments) + { + if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); + } + } + return subrange; +} + +void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + { + if (_isRangeDefined(imlSegment, i) == false) + continue; + if (imlSegment->raDistances.isProcessed[i]) + continue; + raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); + PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); + } + // create lookup table of ranges + raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR]; + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) + { + vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; +#ifdef CEMU_DEBUG_ASSERT + if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr) + assert_dbg(); +#endif + } + // parse instructions and convert to locations + sint32 index = 0; + PPCImlOptimizerUsedRegisters_t gprTracking; + while (index < imlSegment->imlListCount) + { + // end loop at suffix instruction + if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) + break; + // get accessed GPRs + PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); + // handle accessed GPR + for (sint32 t = 0; t < 4; t++) + { + sint32 virtualRegister = gprTracking.gpr[t]; + if (virtualRegister < 0) + continue; + bool isWrite = (t == 3); + // add location + PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); +#ifdef CEMU_DEBUG_ASSERT + if (index < vGPR2Subrange[virtualRegister]->start.index) + assert_dbg(); + if (index + 1 > vGPR2Subrange[virtualRegister]->end.index) + assert_dbg(); +#endif + } + // next instruction + index++; + } +} + +void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) +{ + if (_isRangeDefined(imlSegment, vGPR) == false) + { + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; + imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; + return; + } + imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; +} + +void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) +{ + if (_isRangeDefined(imlSegment, vGPR) == false) + { + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; + imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; + } + else + { + imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; + } + // propagate backwards + for (auto& it : imlSegment->list_prevSegments) + { + PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); + } +} + +void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth) +{ +#ifdef CEMU_DEBUG_ASSERT + if (routeDepth < 2) + assert_dbg(); +#endif + // extend starting range to end of segment + PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); + // extend all the connecting segments in both directions + for (sint32 i = 1; i < (routeDepth - 1); i++) + { + PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); + } + // extend the final segment towards the beginning + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth - 1], vGPR); +} + +void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth) +{ + if (routeDepth >= 64) + { + forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress); + return; + } + route[routeDepth] = currentSegment; + if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) + { + // measure distance to end of segment + distanceLeft -= currentSegment->imlListCount; + if (distanceLeft > 0) + { + if (currentSegment->nextSegmentBranchNotTaken) + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); + if (currentSegment->nextSegmentBranchTaken) + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); + } + return; + } + else + { + // measure distance to range + if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) + { + if (distanceLeft < currentSegment->imlListCount) + return; // range too far away + } + else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) + return; // out of range + // found close range -> connect ranges + _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); + } +} + +void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR) +{ +#ifdef CEMU_DEBUG_ASSERT + if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) + assert_dbg(); +#endif + // count instructions to end of initial segment + if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) + assert_dbg(); + sint32 instructionsUntilEndOfSeg; + if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + instructionsUntilEndOfSeg = 0; + else + instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd; + +#ifdef CEMU_DEBUG_ASSERT + if (instructionsUntilEndOfSeg < 0) + assert_dbg(); +#endif + sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; + if (remainingScanDist <= 0) + return; // can't reach end + + // also dont forget: Extending is easier if we allow 'non symmetric' branches. E.g. register range one enters one branch + PPCRecImlSegment_t* route[64]; + route[0] = currentSegment; + if (currentSegment->nextSegmentBranchNotTaken) + { + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); + } + if (currentSegment->nextSegmentBranchTaken) + { + _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); + } +} + +void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + { + if (imlSegment->raDistances.reg[i].usageStart == INT_MAX) + continue; // not used + // check and extend if possible + PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); + } +#ifdef CEMU_DEBUG_ASSERT + if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) + assert_dbg(); + if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain) + assert_dbg(); +#endif +} + +void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) +{ + std::vector list_segments; + list_segments.reserve(1000); + sint32 index = 0; + imlSegment->raRangeExtendProcessed = true; + list_segments.push_back(imlSegment); + while (index < list_segments.size()) + { + PPCRecImlSegment_t* currentSegment = list_segments[index]; + PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); + // follow flow + if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) + { + currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true; + list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); + } + if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false) + { + currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true; + list_segments.push_back(currentSegment->nextSegmentBranchTaken); + } + index++; + } +} + +void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) +{ + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + if (imlSegment->list_prevSegments.empty()) + { + if (imlSegment->raRangeExtendProcessed) + assert_dbg(); // should not happen + PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); + } + } +} + +void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) +{ + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + auto localLoopDepth = imlSegment->loopDepth; + if (localLoopDepth <= 0) + continue; // not inside a loop + // look for loop exit + bool hasLoopExit = false; + if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth) + { + hasLoopExit = true; + } + if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth) + { + hasLoopExit = true; + } + if (hasLoopExit == false) + continue; + + // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) + for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + { + if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) + continue; // range not set or does not reach end of segment + if (imlSegment->nextSegmentBranchTaken) + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); + if (imlSegment->nextSegmentBranchNotTaken) + PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); + } + } +} + +void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) +{ + // merge close ranges + PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); + // extra pass to move register stores out of loops + PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); + // calculate liveness ranges + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; + PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); + } +} + +void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) +{ + bool isRead = false; + bool isWritten = false; + bool isOverwritten = false; + for (auto& location : subrange->list_locations) + { + if (location.isRead) + { + isRead = true; + } + if (location.isWrite) + { + if (isRead == false) + isOverwritten = true; + isWritten = true; + } + } + subrange->_noLoad = isOverwritten; + subrange->hasStore = isWritten; + + if (subrange->start.index == RA_INTER_RANGE_START) + subrange->_noLoad = true; +} + +void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) +{ + // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore + // first do a per-subrange pass + for (auto& range : ppcImlGenContext->raInfo.list_ranges) + { + for (auto& subrange : range->list_subranges) + { + PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); + } + } + // then do a second pass where we scan along subrange flow + for (auto& range : ppcImlGenContext->raInfo.list_ranges) + { + for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm + { + _analyzeRangeDataFlow(subrange); + } + } } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp deleted file mode 100644 index abb47e92..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp +++ /dev/null @@ -1,414 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" -#include - -bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); -} - -void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - imlSegment->raDistances.reg[i].usageStart = INT_MAX; - imlSegment->raDistances.reg[i].usageEnd = INT_MIN; - } - // scan instructions for usage range - sint32 index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) - { - // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) - break; - // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); - imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction - } - // next instruction - index++; - } -} - -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // for each register calculate min/max index of usage range within each segment - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]); - } -} - -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range) -{ - if (imlSegment->raDistances.isProcessed[vGPR]) - { - // return already existing segment - return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; - } - imlSegment->raDistances.isProcessed[vGPR] = true; - if (_isRangeDefined(imlSegment, vGPR) == false) - return nullptr; - // create subrange - cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); - // traverse forward - if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - { - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); - } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); - } - } - // traverse backward - if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - for (auto& it : imlSegment->list_prevSegments) - { - if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); - } - } - // return subrange - return subrange; -} - -void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - if( _isRangeDefined(imlSegment, i) == false ) - continue; - if( imlSegment->raDistances.isProcessed[i]) - continue; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); - PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); - } - // create lookup table of ranges - raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; -#ifdef CEMU_DEBUG_ASSERT - if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr) - assert_dbg(); -#endif - } - // parse instructions and convert to locations - sint32 index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) - { - // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) - break; - // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - // handle accessed GPR - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - bool isWrite = (t == 3); - // add location - PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); -#ifdef CEMU_DEBUG_ASSERT - if (index < vGPR2Subrange[virtualRegister]->start.index) - assert_dbg(); - if (index+1 > vGPR2Subrange[virtualRegister]->end.index) - assert_dbg(); -#endif - } - // next instruction - index++; - } -} - -void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; - return; - } - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; -} - -void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; - } - else - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - } - // propagate backwards - for (auto& it : imlSegment->list_prevSegments) - { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); - } -} - -void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth) -{ -#ifdef CEMU_DEBUG_ASSERT - if (routeDepth < 2) - assert_dbg(); -#endif - // extend starting range to end of segment - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); - // extend all the connecting segments in both directions - for (sint32 i = 1; i < (routeDepth - 1); i++) - { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); - } - // extend the final segment towards the beginning - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR); -} - -void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth) -{ - if (routeDepth >= 64) - { - cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded for function 0x{:08x}", ppcImlGenContext->functionRef->ppcAddress); - return; - } - route[routeDepth] = currentSegment; - if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) - { - // measure distance to end of segment - distanceLeft -= currentSegment->imlListCount; - if (distanceLeft > 0) - { - if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); - if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); - } - return; - } - else - { - // measure distance to range - if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) - { - if (distanceLeft < currentSegment->imlListCount) - return; // range too far away - } - else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) - return; // out of range - // found close range -> connect ranges - _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); - } -} - -void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR) -{ -#ifdef CEMU_DEBUG_ASSERT - if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) - assert_dbg(); -#endif - // count instructions to end of initial segment - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) - assert_dbg(); - sint32 instructionsUntilEndOfSeg; - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - instructionsUntilEndOfSeg = 0; - else - instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd; - -#ifdef CEMU_DEBUG_ASSERT - if (instructionsUntilEndOfSeg < 0) - assert_dbg(); -#endif - sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; - if (remainingScanDist <= 0) - return; // can't reach end - - // also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch - PPCRecImlSegment_t* route[64]; - route[0] = currentSegment; - if (currentSegment->nextSegmentBranchNotTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); - } - if (currentSegment->nextSegmentBranchTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); - } -} - -void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries - { - if(imlSegment->raDistances.reg[i].usageStart == INT_MAX) - continue; // not used - // check and extend if possible - PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); - } -#ifdef CEMU_DEBUG_ASSERT - if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) - assert_dbg(); - if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain) - assert_dbg(); -#endif -} - -void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - std::vector list_segments; - list_segments.reserve(1000); - sint32 index = 0; - imlSegment->raRangeExtendProcessed = true; - list_segments.push_back(imlSegment); - while (index < list_segments.size()) - { - PPCRecImlSegment_t* currentSegment = list_segments[index]; - PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); - // follow flow - if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) - { - currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true; - list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); - } - if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false) - { - currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true; - list_segments.push_back(currentSegment->nextSegmentBranchTaken); - } - index++; - } -} - -void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - if (imlSegment->list_prevSegments.empty()) - { - if (imlSegment->raRangeExtendProcessed) - assert_dbg(); // should not happen - PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); - } - } -} - -void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - auto localLoopDepth = imlSegment->loopDepth; - if( localLoopDepth <= 0 ) - continue; // not inside a loop - // look for loop exit - bool hasLoopExit = false; - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth) - { - hasLoopExit = true; - } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth) - { - hasLoopExit = true; - } - if(hasLoopExit == false) - continue; - - // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries - { - if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) - continue; // range not set or does not reach end of segment - if(imlSegment->nextSegmentBranchTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); - if(imlSegment->nextSegmentBranchNotTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); - } - } -} - -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // merge close ranges - PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); - // extra pass to move register stores out of loops - PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); - // calculate liveness ranges - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); - } -} - -void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) -{ - bool isRead = false; - bool isWritten = false; - bool isOverwritten = false; - for (auto& location : subrange->list_locations) - { - if (location.isRead) - { - isRead = true; - } - if (location.isWrite) - { - if (isRead == false) - isOverwritten = true; - isWritten = true; - } - } - subrange->_noLoad = isOverwritten; - subrange->hasStore = isWritten; - - if (subrange->start.index == RA_INTER_RANGE_START) - subrange->_noLoad = true; -} - -void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange); - -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore - // first do a per-subrange pass - for (auto& range : ppcImlGenContext->raInfo.list_ranges) - { - for (auto& subrange : range->list_subranges) - { - PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); - } - } - // then do a second pass where we scan along subrange flow - for (auto& range : ppcImlGenContext->raInfo.list_ranges) - { - for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm - { - _analyzeRangeDataFlow(subrange); - } - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index fcbe64be..05fd93e7 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -3,11 +3,11 @@ PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) { - for(sint32 s=0; ssegmentListCount; s++) + for(PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset ) + if(segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset ) { - return ppcImlGenContext->segmentList[s]; + return segIt; } } debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); @@ -94,17 +94,18 @@ void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPC void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) { - for(sint32 s=0; ssegmentListCount; s++) + size_t segCount = ppcImlGenContext->segmentList2.size(); + for(size_t s=0; ssegmentList[s]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[s]; - bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount; - PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1]; + bool isLastSegment = (s+1)>=ppcImlGenContext->segmentList2.size(); + PPCRecImlSegment_t* nextSegment = isLastSegment?nullptr:ppcImlGenContext->segmentList2[s+1]; // handle empty segment if( imlSegment->imlListCount == 0 ) { if (isLastSegment == false) - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment + PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList2[s+1]); // continue execution to next segment else imlSegment->nextSegmentIsUncertain = true; continue; @@ -143,15 +144,15 @@ void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) { - sint32 initialSegmentCount = ppcImlGenContext->segmentListCount; - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + size_t initialSegmentCount = ppcImlGenContext->segmentList2.size(); + for (size_t i = 0; i < initialSegmentCount; i++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; + PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList2[i]; if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) { // spawn new segment at end - PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1); - PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1]; + PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentList2.size(), 1); + PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1]; entrySegment->isEnterable = true; entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp index a30295b5..b2d934c8 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp @@ -2299,13 +2299,12 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo // generate iml instruction code bool codeGenerationFailed = false; - for(sint32 s=0; ssegmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - ppcImlGenContext->segmentList[s]->x64Offset = x64GenContext.codeBufferIndex; - for(sint32 i=0; iimlListCount; i++) + segIt->x64Offset = x64GenContext.codeBufferIndex; + for(sint32 i=0; iimlListCount; i++) { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; + PPCRecImlInstruction_t* imlInstruction = segIt->imlList+i; if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) { @@ -2352,7 +2351,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo } else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) { - if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlSegment, imlInstruction) == false ) + if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, segIt, imlInstruction) == false ) { codeGenerationFailed = true; } @@ -2503,11 +2502,11 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo uint32 x64Offset = 0xFFFFFFFF; if (x64GenContext.relocateOffsetTable[i].type == X64_RELOC_LINK_TO_PPC) { - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) + for (PPCRecImlSegment_t* segIt : ppcImlGenContext->segmentList2) { - if (ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset) + if (segIt->isJumpDestination && segIt->jumpDestinationPPCAddress == ppcOffset) { - x64Offset = ppcImlGenContext->segmentList[s]->x64Offset; + x64Offset = segIt->x64Offset; break; } }