diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h index 9dec696d..e4e6252f 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -17,7 +17,8 @@ enum class IMLRegFormat : uint8 I8, // I1 ? F64, - F32 + F32, + TYPE_COUNT, }; class IMLReg @@ -86,10 +87,9 @@ public: return IsValid() && GetRegID() == regId; } - // risky + // compare all fields bool operator==(const IMLReg& other) const { - //__debugbreak(); return m_raw == other.m_raw; } diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp index 351306ec..e540518e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -9,9 +9,45 @@ #include +struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment +{ + IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {}; + + void TrackInstruction(sint32 index) + { + usageStart = std::min(usageStart, index); + usageEnd = std::max(usageEnd, index + 1); // exclusive index + } + + sint32 usageStart; + sint32 usageEnd; + bool isProcessed{false}; + IMLRegFormat regBaseFormat; +}; + struct IMLRegisterAllocatorContext { IMLRegisterAllocatorParameters* raParam; + ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec + + std::unordered_map regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary + // first pass + std::vector> perSegmentAbstractRanges; + // second pass + + // helper methods + inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment) + { + return perSegmentAbstractRanges[imlSegment->momentaryIndex]; + } + + inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const + { + auto it = regIdToBaseFormat.find(regId); + cemu_assert_debug(it != regIdToBaseFormat.cend()); + return it->second; + } + }; uint32 recRACurrentIterationIndex = 0; @@ -455,7 +491,9 @@ bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenCon continue; } // find free register for current subrangeItr and segment - IMLPhysRegisterSet physRegSet = ctx.raParam->physicalRegisterPool; + IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->range->virtualRegister); + IMLPhysRegisterSet physRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); + cemu_assert_debug(physRegSet.HasAnyAvailable()); // register uses type with no valid pool for (auto& liverangeItr : livenessTimeline.activeRanges) { cemu_assert_debug(liverangeItr->range->physicalRegister >= 0); @@ -976,10 +1014,6 @@ void IMLRA_GenerateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) } } -void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); -void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext); -void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext); - void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) { // insert empty segments after every non-taken branch if the linked segment has more than one input @@ -1030,97 +1064,100 @@ void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) } } -void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) +IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) { - IMLRegisterAllocatorContext ctx; - ctx.raParam = &raParam; - - IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); - - ppcImlGenContext->raInfo.list_ranges = std::vector(); - - IMLRA_CalculateLivenessRanges(ppcImlGenContext); - IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext); - IMLRA_AssignRegisters(ctx, ppcImlGenContext); - - IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); - IMLRA_GenerateMoveInstructions(ppcImlGenContext); - - PPCRecRA_deleteAllRanges(ppcImlGenContext); + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segMap.find(regId); + return it != segMap.end() ? &it->second : nullptr; } - -bool _isRangeDefined(IMLSegment* imlSegment, sint32 vGPR) +// scan instructions and establish register usage range for segment +void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); -} - -void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) -{ - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) - { - imlSegment->raDistances.reg[i].usageStart = INT_MAX; - imlSegment->raDistances.reg[i].usageEnd = INT_MIN; - } - // scan instructions for usage range - size_t index = 0; + size_t instructionIndex = 0; IMLUsedRegisters gprTracking; - while (index < imlSegment->imlList.size()) + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + while (instructionIndex < imlSegment->imlList.size()) { - imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); + imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking); gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { IMLRegID gprId = gprReg.GetRegID(); - cemu_assert_debug(gprId < IML_RA_VIRT_REG_COUNT_MAX); - imlSegment->raDistances.reg[gprId].usageStart = std::min(imlSegment->raDistances.reg[gprId].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[gprId].usageEnd = std::max(imlSegment->raDistances.reg[gprId].usageEnd, index + 1); // index after instruction + auto it = segDistMap.find(gprId); + if (it == segDistMap.end()) + { + segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1); + ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat()); + } + else + { + it->second.TrackInstruction(instructionIndex); +#ifdef CEMU_DEBUG_ASSERT + cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same +#endif + } }); - index++; + instructionIndex++; } } -void IMLRA_CalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) { // for each register calculate min/max index of usage range within each segment - for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + size_t dbgIndex = 0; + for (IMLSegment* segIt : ctx.deprGenContext->segmentList2) { - PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, segIt); + cemu_assert_debug(segIt->momentaryIndex == dbgIndex); + IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt); + dbgIndex++; } } -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) +raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, sint32 vGPR, raLivenessRange_t* range) { - if (imlSegment->raDistances.isProcessed[vGPR]) + IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR); + if (!abstractRange) + return nullptr; + if (abstractRange->isProcessed) { // return already existing segment return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; } - imlSegment->raDistances.isProcessed[vGPR] = true; - if (_isRangeDefined(imlSegment, vGPR) == false) - return nullptr; + abstractRange->isProcessed = true; // create subrange cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); + raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ctx.deprGenContext, range, imlSegment, abstractRange->usageStart, abstractRange->usageEnd); // traverse forward - if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + if (abstractRange->usageEnd == RA_INTER_RANGE_END) { - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + if (imlSegment->nextSegmentBranchTaken) { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR); + if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); + } } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + if (imlSegment->nextSegmentBranchNotTaken) { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR); + if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, range); + cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); + } } } // traverse backward - if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) + if (abstractRange->usageStart == RA_INTER_RANGE_START) { for (auto& it : imlSegment->list_prevSegments) { - if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); + IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR); + if(!prevRange) + continue; + if (prevRange->usageEnd == RA_INTER_RANGE_END) + PPCRecRA_convertToMappedRanges(ctx, it, vGPR, range); } } // for subranges which exit the segment at the end there is a hard requirement that they cover the suffix instruction @@ -1135,17 +1172,19 @@ raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlG return subrange; } -void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +// take abstract range data and create LivenessRanges +void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) { - if (_isRangeDefined(imlSegment, i) == false) + if(it.second.isProcessed) continue; - if (imlSegment->raDistances.isProcessed[i]) - continue; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); - PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); + IMLRegID regId = it.first; + raLivenessRange_t* range = PPCRecRA_createRangeBase(ctx.deprGenContext, regId, ctx.deprGenContext->mappedRegister[regId]); + PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, range); } + // create lookup table of ranges raLivenessSubrange_t* vGPR2Subrange[IML_RA_VIRT_REG_COUNT_MAX]; for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) @@ -1168,7 +1207,10 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[gprId], index, !isWritten, isWritten); #ifdef CEMU_DEBUG_ASSERT if ((sint32)index < vGPR2Subrange[gprId]->start.index) + { + IMLRARegAbstractLiveness* dbgAbstractRange = _GetAbstractRange(ctx, imlSegment, gprId); assert_dbg(); + } if ((sint32)index + 1 > vGPR2Subrange[gprId]->end.index) assert_dbg(); #endif @@ -1177,57 +1219,63 @@ void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, } } -void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR) +void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) { - if (_isRangeDefined(imlSegment, vGPR) == false) + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segDistMap.find(regId); + if (it == segDistMap.end()) { + sint32 startIndex; if(imlSegment->HasSuffixInstruction()) - imlSegment->raDistances.reg[vGPR].usageStart = imlSegment->GetSuffixInstructionIndex(); + startIndex = imlSegment->GetSuffixInstructionIndex(); else - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; - return; - } - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; -} - -void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; + startIndex = RA_INTER_RANGE_END; + segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END); } else { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; + it->second.usageEnd = RA_INTER_RANGE_END; + } +} + +void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) +{ + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segDistMap.find(regId); + if (it == segDistMap.end()) + { + segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START); + } + else + { + it->second.usageStart = RA_INTER_RANGE_START; } // propagate backwards for (auto& it : imlSegment->list_prevSegments) { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); + IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId); } } -void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, IMLSegment** route, sint32 routeDepth) +void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth) { #ifdef CEMU_DEBUG_ASSERT if (routeDepth < 2) assert_dbg(); #endif // extend starting range to end of segment - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); + IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId); // extend all the connecting segments in both directions for (sint32 i = 1; i < (routeDepth - 1); i++) { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); + IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId); } // extend the final segment towards the beginning - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth - 1], vGPR); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId); } -void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* currentSegment, sint32 vGPR, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) +void _PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) { if (routeDepth >= 64) { @@ -1235,53 +1283,47 @@ void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLS return; } route[routeDepth] = currentSegment; - if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) + + IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID); + + if (!range) { - // measure distance to end of segment + // measure distance over entire segment distanceLeft -= (sint32)currentSegment->imlList.size(); if (distanceLeft > 0) { if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1); if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1); } return; } else { // measure distance to range - if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) + if (range->usageStart == RA_INTER_RANGE_END) { if (distanceLeft < (sint32)currentSegment->imlList.size()) return; // range too far away } - else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) + else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft) return; // out of range // found close range -> connect ranges - _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); + IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1); } } -void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* currentSegment, sint32 vGPR) +void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID) { -#ifdef CEMU_DEBUG_ASSERT - if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) - assert_dbg(); -#endif + cemu_assert_debug(range->usageEnd >= 0); // count instructions to end of initial segment - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) - assert_dbg(); sint32 instructionsUntilEndOfSeg; - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) + if (range->usageEnd == RA_INTER_RANGE_END) instructionsUntilEndOfSeg = 0; else - instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - currentSegment->raDistances.reg[vGPR].usageEnd; - -#ifdef CEMU_DEBUG_ASSERT - if (instructionsUntilEndOfSeg < 0) - assert_dbg(); -#endif + instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd; + cemu_assert_debug(instructionsUntilEndOfSeg >= 0); sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; if (remainingScanDist <= 0) return; // can't reach end @@ -1289,23 +1331,17 @@ void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, IMLSe IMLSegment* route[64]; route[0] = currentSegment; if (currentSegment->nextSegmentBranchNotTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); - } + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1); if (currentSegment->nextSegmentBranchTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); - } + _PPCRecRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1); } -void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) { - if (imlSegment->raDistances.reg[i].usageStart == INT_MAX) - continue; // not used - // check and extend if possible - PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); + PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first); } #ifdef CEMU_DEBUG_ASSERT if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) @@ -1315,7 +1351,7 @@ void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, #endif } -void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) { std::vector list_segments; list_segments.reserve(1000); @@ -1325,7 +1361,7 @@ void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IM while (index < list_segments.size()) { IMLSegment* currentSegment = list_segments[index]; - PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); + PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment); // follow flow if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) { @@ -1341,25 +1377,24 @@ void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, IM } } -void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_mergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) { - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; if (imlSegment->list_prevSegments.empty()) { - if (imlSegment->raRangeExtendProcessed) - assert_dbg(); // should not happen - PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); + cemu_assert_debug(!imlSegment->raRangeExtendProcessed); // should not be processed yet + PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment); } } } -void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_extendAbstracRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) { - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; auto localLoopDepth = imlSegment->loopDepth; if (localLoopDepth <= 0) continue; // not inside a loop @@ -1376,31 +1411,28 @@ void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) if (hasLoopExit == false) continue; - // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) - for (sint32 i = 0; i < IML_RA_VIRT_REG_COUNT_MAX; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries + // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) { - if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) - continue; // range not set or does not reach end of segment + if(it.second.usageEnd != RA_INTER_RANGE_END) + continue; if (imlSegment->nextSegmentBranchTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first); if (imlSegment->nextSegmentBranchNotTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first); } } } -void IMLRA_ProcessFlowAndCalculateLivenessRanges(ppcImlGenContext_t* ppcImlGenContext) +void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) { - // merge close ranges - PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); + IMLRA_mergeCloseAbstractRanges(ctx); // extra pass to move register stores out of loops - PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); + IMLRA_extendAbstracRangesOutOfLoops(ctx); // calculate liveness ranges - for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) - { - IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; - PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); - } + for (auto& segIt : ctx.deprGenContext->segmentList2) + IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); } void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) @@ -1447,4 +1479,28 @@ void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) _analyzeRangeDataFlow(subrange); } } -} \ No newline at end of file +} + +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) +{ + IMLRegisterAllocatorContext ctx; + ctx.raParam = &raParam; + ctx.deprGenContext = ppcImlGenContext; + + IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); + + ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment + + ppcImlGenContext->raInfo.list_ranges = std::vector(); + + ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); + + IMLRA_CalculateLivenessRanges(ctx); + IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx); + IMLRA_AssignRegisters(ctx, ppcImlGenContext); + + IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); + IMLRA_GenerateMoveInstructions(ppcImlGenContext); + + PPCRecRA_deleteAllRanges(ppcImlGenContext); +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h index 87e36b00..5e0d0f04 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -88,7 +88,12 @@ private: struct IMLRegisterAllocatorParameters { - IMLPhysRegisterSet physicalRegisterPool; + inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat) + { + return perTypePhysPool[stdx::to_underlying(regFormat)]; + } + + IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)];// physicalRegisterPool; }; void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h index 8ef0669e..4e90d529 100644 --- a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -54,9 +54,9 @@ struct raLivenessSubrange_t struct raLivenessRange_t { - sint32 virtualRegister; + IMLRegID virtualRegister; sint32 physicalRegister; - sint32 name; + IMLName name; std::vector list_subranges; }; @@ -70,16 +70,6 @@ struct PPCSegmentRegisterAllocatorInfo_t raLivenessSubrange_t* linkedList_perVirtualGPR[IML_RA_VIRT_REG_COUNT_MAX]{}; }; -struct PPCRecVGPRDistances_t -{ - struct _RegArrayEntry - { - sint32 usageStart{}; - sint32 usageEnd{}; - }reg[IML_RA_VIRT_REG_COUNT_MAX]; - bool isProcessed[IML_RA_VIRT_REG_COUNT_MAX]{}; -}; - struct IMLSegment { sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) @@ -113,7 +103,7 @@ struct IMLSegment uint32 crBitsWritten{}; // bits that are written in this segment // register allocator info PPCSegmentRegisterAllocatorInfo_t raInfo{}; - PPCRecVGPRDistances_t raDistances{}; + //PPCRecVGPRDistances_t raDistances{}; bool raRangeExtendProcessed{}; // segment state API diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 7ca247ba..b040275e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -218,7 +218,6 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP // collect list of PPC-->x64 entry points cemuLog_log(LogType::Force, "[Recompiler] Successfully compiled {:08x} - {:08x} Segments: {}", ppcRecFunc->ppcAddress, ppcRecFunc->ppcAddress + ppcRecFunc->ppcSize, ppcImlGenContext.segmentList2.size()); - cemu_assert_debug(ppcImlGenContext.imlListCount == 0); entryPointsOut.clear(); for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) @@ -295,18 +294,20 @@ bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) } IMLRegisterAllocatorParameters raParam; - raParam.physicalRegisterPool.SetAvailable(X86_REG_RAX); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RDX); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RBX); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RBP); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RSI); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RDI); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R8); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R9); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R10); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R11); - raParam.physicalRegisterPool.SetAvailable(X86_REG_R12); - raParam.physicalRegisterPool.SetAvailable(X86_REG_RCX); + + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); + gprPhysPool.SetAvailable(X86_REG_RAX); + gprPhysPool.SetAvailable(X86_REG_RDX); + gprPhysPool.SetAvailable(X86_REG_RBX); + gprPhysPool.SetAvailable(X86_REG_RBP); + gprPhysPool.SetAvailable(X86_REG_RSI); + gprPhysPool.SetAvailable(X86_REG_RDI); + gprPhysPool.SetAvailable(X86_REG_R8); + gprPhysPool.SetAvailable(X86_REG_R9); + gprPhysPool.SetAvailable(X86_REG_R10); + gprPhysPool.SetAvailable(X86_REG_R11); + gprPhysPool.SetAvailable(X86_REG_R12); + gprPhysPool.SetAvailable(X86_REG_RCX); IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index c80fad8d..7f9817aa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -45,10 +45,6 @@ struct ppcImlGenContext_t uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; // temporary floating point registers (single and double precision) uint32 mappedFPRRegister[256]; - // list of intermediate instructions - IMLInstruction* imlList; - sint32 imlListSize; - sint32 imlListCount; // list of segments std::vector segmentList2; // code generation control @@ -66,16 +62,8 @@ struct ppcImlGenContext_t ~ppcImlGenContext_t() { - if (imlList) - { - free(imlList); - imlList = nullptr; - } - for (IMLSegment* imlSegment : segmentList2) - { delete imlSegment; - } segmentList2.clear(); } @@ -117,6 +105,12 @@ struct ppcImlGenContext_t segmentList2[i] = new IMLSegment(); return { segmentList2.data() + index, count}; } + + void UpdateSegmentIndices() + { + for (size_t i = 0; i < segmentList2.size(); i++) + segmentList2[i]->momentaryIndex = (sint32)i; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)();