mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-02-19 19:42:48 +01:00
PPCRec: Update spill cost calculation
This commit is contained in:
parent
70c99fd626
commit
96d7c754f9
@ -3,9 +3,6 @@
|
||||
#include "IMLInstruction.h"
|
||||
#include "IMLSegment.h"
|
||||
|
||||
// analyzer
|
||||
bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment);
|
||||
|
||||
// optimizer passes
|
||||
void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext);
|
||||
|
@ -3,53 +3,3 @@
|
||||
#include "util/helpers/fixedSizeList.h"
|
||||
|
||||
#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h"
|
||||
|
||||
/*
|
||||
* Analyzes a single segment and returns true if it is a finite loop
|
||||
*/
|
||||
bool IMLAnalyzer_IsTightFiniteLoop(IMLSegment* imlSegment)
|
||||
{
|
||||
return false; // !!! DISABLED !!!
|
||||
|
||||
bool isTightFiniteLoop = false;
|
||||
// base criteria, must jump to beginning of same segment
|
||||
if (imlSegment->nextSegmentBranchTaken != imlSegment)
|
||||
return false;
|
||||
// loops using BDNZ are assumed to always be finite
|
||||
for(const IMLInstruction& instIt : imlSegment->imlList)
|
||||
{
|
||||
if (instIt.type == PPCREC_IML_TYPE_R_S32 && instIt.operation == PPCREC_IML_OP_SUB)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// for non-BDNZ loops, check for common patterns
|
||||
// risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB)
|
||||
// this catches most loops with load-update and store-update instructions, but also those with decrementing counters
|
||||
FixedSizeList<IMLReg, 64, true> list_modifiedRegisters;
|
||||
for (const IMLInstruction& instIt : imlSegment->imlList)
|
||||
{
|
||||
if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB) )
|
||||
{
|
||||
list_modifiedRegisters.addUnique(instIt.op_r_immS32.regR);
|
||||
}
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
// remove all registers from the list that are modified by non-ADD/SUB instructions
|
||||
// todo: We should also cover the case where ADD+SUB on the same register cancel the effect out
|
||||
IMLUsedRegisters registersUsed;
|
||||
for (const IMLInstruction& instIt : imlSegment->imlList)
|
||||
{
|
||||
if (instIt.type == PPCREC_IML_TYPE_R_S32 && (instIt.operation == PPCREC_IML_OP_ADD || instIt.operation == PPCREC_IML_OP_SUB))
|
||||
continue;
|
||||
instIt.CheckRegisterUsage(®istersUsed);
|
||||
registersUsed.ForEachWrittenGPR([&](IMLReg r) { list_modifiedRegisters.remove(r); });
|
||||
}
|
||||
if (list_modifiedRegisters.count > 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
@ -15,7 +15,6 @@
|
||||
#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed
|
||||
#define DEBUG_RA_INSTRUCTION_GEN 0
|
||||
|
||||
|
||||
struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment
|
||||
{
|
||||
IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd)
|
||||
@ -38,7 +37,7 @@ struct IMLRegisterAllocatorContext
|
||||
IMLRegisterAllocatorParameters* raParam;
|
||||
ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec
|
||||
|
||||
std::unordered_map<IMLRegID, IMLRegFormat> regIdToBaseFormat; // a vector would be more efficient but it also means that reg ids have to be continuous and not completely arbitrary
|
||||
std::unordered_map<IMLRegID, IMLRegFormat> regIdToBaseFormat;
|
||||
// first pass
|
||||
std::vector<std::unordered_map<IMLRegID, IMLRARegAbstractLiveness>> perSegmentAbstractRanges;
|
||||
|
||||
@ -781,11 +780,11 @@ class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy
|
||||
cemu_assert_debug(currentRangeStart.IsInstructionIndex());
|
||||
distance2 = std::min<sint32>(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment
|
||||
// calculate split cost of candidate
|
||||
sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
|
||||
sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2);
|
||||
// calculate additional split cost of currentRange if hole is not large enough
|
||||
if (distance2 < requiredSize2)
|
||||
{
|
||||
cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
|
||||
cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2);
|
||||
// we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes)
|
||||
cost += (requiredSize2 - distance2) / 10;
|
||||
}
|
||||
@ -889,7 +888,7 @@ class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy
|
||||
continue;
|
||||
// calculate additional cost due to split
|
||||
cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register?
|
||||
sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
|
||||
sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
|
||||
// add small additional cost for the remaining range (prefer larger holes)
|
||||
cost += ((requiredSize2 - distance) / 2) / 10;
|
||||
if (cost < strategyCost)
|
||||
@ -959,11 +958,11 @@ class RASpillStrategy_ExplodeRange : public RASpillStrategy
|
||||
IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance);
|
||||
if (distance < 2)
|
||||
continue;
|
||||
sint32 cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate);
|
||||
sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
|
||||
// if the hole is not large enough, add cost of splitting current subrange
|
||||
if (distance < requiredSize2)
|
||||
{
|
||||
cost += PPCRecRARange_estimateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
|
||||
cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance);
|
||||
// add small additional cost for the remaining range (prefer larger holes)
|
||||
cost += ((requiredSize2 - distance) / 2) / 10;
|
||||
}
|
||||
@ -1032,7 +1031,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
|
||||
if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister()))
|
||||
continue;
|
||||
sint32 cost;
|
||||
cost = PPCRecRARange_estimateCostAfterRangeExplode(candidate);
|
||||
cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate);
|
||||
// compare with current best candidate for this strategy
|
||||
if (cost < strategyCost)
|
||||
{
|
||||
@ -1043,7 +1042,7 @@ class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy
|
||||
}
|
||||
// add current range as a candidate too
|
||||
sint32 ownCost;
|
||||
ownCost = PPCRecRARange_estimateCostAfterRangeExplode(currentRange);
|
||||
ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange);
|
||||
if (ownCost < strategyCost)
|
||||
{
|
||||
strategyCost = ownCost;
|
||||
@ -1859,7 +1858,7 @@ static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
|
||||
if (subrangeItr->hasStore)
|
||||
continue; // this ending already stores, no extra cost
|
||||
alreadyStoredInAllEndings = false;
|
||||
sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment);
|
||||
sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment);
|
||||
delayStoreCost = std::max(storeCost, delayStoreCost);
|
||||
}
|
||||
if (alreadyStoredInAllEndings)
|
||||
@ -1867,7 +1866,7 @@ static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange)
|
||||
subrange->hasStore = false;
|
||||
subrange->hasStoreDelayed = true;
|
||||
}
|
||||
else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment))
|
||||
else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment))
|
||||
{
|
||||
subrange->hasStore = false;
|
||||
subrange->hasStoreDelayed = true;
|
||||
|
@ -642,7 +642,7 @@ void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 inde
|
||||
subrange->list_locations.emplace_back(index, isRead, isWrite);
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment)
|
||||
sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment)
|
||||
{
|
||||
sint32 v = imlSegment->loopDepth + 1;
|
||||
v *= 5;
|
||||
@ -668,13 +668,13 @@ sint32 PPCRecRARange_estimateTotalCost(std::span<raLivenessRange*> ranges)
|
||||
if (!subrange->interval2.ExtendsPreviousSegment())
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
mostExpensiveRead = std::max(mostExpensiveRead, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment));
|
||||
readCount++;
|
||||
}
|
||||
if (!subrange->interval2.ExtendsIntoNextSegment())
|
||||
{
|
||||
//cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment);
|
||||
mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment));
|
||||
mostExpensiveWrite = std::max(mostExpensiveWrite, IMLRA_GetSegmentReadWriteCost(subrange->imlSegment));
|
||||
writeCount++;
|
||||
}
|
||||
}
|
||||
@ -683,21 +683,34 @@ sint32 PPCRecRARange_estimateTotalCost(std::span<raLivenessRange*> ranges)
|
||||
return cost;
|
||||
}
|
||||
|
||||
// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it
|
||||
sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange)
|
||||
// calculate additional cost of range that it would have after calling _ExplodeRange() on it
|
||||
sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange)
|
||||
{
|
||||
auto ranges = subrange->GetAllSubrangesInCluster();
|
||||
sint32 cost = -PPCRecRARange_estimateTotalCost(ranges);
|
||||
sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges);
|
||||
for (auto& subrange : ranges)
|
||||
{
|
||||
if (subrange->list_locations.empty())
|
||||
continue;
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store
|
||||
continue; // this range would be deleted and thus has no cost
|
||||
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
|
||||
bool hasAdditionalLoad = subrange->interval2.ExtendsPreviousSegment();
|
||||
bool hasAdditionalStore = subrange->interval2.ExtendsIntoNextSegment();
|
||||
if(hasAdditionalLoad && !subrange->list_locations.front().isRead && subrange->list_locations.front().isWrite) // if written before read, then a load isn't necessary
|
||||
{
|
||||
cost += segmentLoadStoreCost;
|
||||
}
|
||||
if(hasAdditionalStore)
|
||||
{
|
||||
bool hasWrite = std::find_if(subrange->list_locations.begin(), subrange->list_locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != subrange->list_locations.end();
|
||||
if(!hasWrite) // ranges which don't modify their value do not need to be stored
|
||||
cost += segmentLoadStoreCost;
|
||||
}
|
||||
}
|
||||
// todo - properly calculating all the data-flow dependency based costs is more complex so this currently is an approximation
|
||||
return cost;
|
||||
}
|
||||
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition)
|
||||
sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition)
|
||||
{
|
||||
// validation
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
@ -719,9 +732,53 @@ sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange,
|
||||
if (splitInstructionIndex > subrange->list_locations.back().index)
|
||||
return 0;
|
||||
|
||||
// todo - determine exact cost of split subranges
|
||||
// this can be optimized, but we should change list_locations to track instruction edges instead of instruction indices
|
||||
std::vector<raLivenessLocation_t> headLocations;
|
||||
std::vector<raLivenessLocation_t> tailLocations;
|
||||
for (auto& location : subrange->list_locations)
|
||||
{
|
||||
if(location.GetReadPos() < splitPosition || location.GetWritePos() < splitPosition)
|
||||
headLocations.push_back(location);
|
||||
if(location.GetReadPos() >= splitPosition || location.GetWritePos() >= splitPosition)
|
||||
tailLocations.push_back(location);
|
||||
}
|
||||
// fixup locations
|
||||
if(!headLocations.empty() && headLocations.back().GetWritePos() >= splitPosition)
|
||||
{
|
||||
headLocations.back().isWrite = false;
|
||||
if(!headLocations.back().isRead && !headLocations.back().isWrite)
|
||||
headLocations.pop_back();
|
||||
}
|
||||
if(!tailLocations.empty() && tailLocations.front().GetReadPos() < splitPosition)
|
||||
{
|
||||
tailLocations.front().isRead = false;
|
||||
if(!tailLocations.front().isRead && !tailLocations.front().isWrite)
|
||||
tailLocations.erase(tailLocations.begin());
|
||||
}
|
||||
|
||||
cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store
|
||||
// based on
|
||||
sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment);
|
||||
|
||||
auto CalculateCostFromLocationRange = [segmentLoadStoreCost](const std::vector<raLivenessLocation_t>& locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32
|
||||
{
|
||||
if(locations.empty())
|
||||
return 0;
|
||||
sint32 cost = 0;
|
||||
if(locations.front().isRead && trackLoadCost)
|
||||
cost += segmentLoadStoreCost; // not overwritten, so there is a load cost
|
||||
bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raLivenessLocation_t& loc) { return loc.isWrite; }) != locations.end();
|
||||
if(hasWrite && trackStoreCost)
|
||||
cost += segmentLoadStoreCost; // modified, so there is a store cost
|
||||
return cost;
|
||||
};
|
||||
|
||||
sint32 baseCost = CalculateCostFromLocationRange(subrange->list_locations);
|
||||
|
||||
bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().isRead && tailLocations.front().isWrite;
|
||||
|
||||
sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true);
|
||||
cemu_assert_debug(newCost >= baseCost);
|
||||
cost = newCost - baseCost;
|
||||
|
||||
return cost;
|
||||
}
|
@ -1,18 +1,6 @@
|
||||
#pragma once
|
||||
#include "IMLRegisterAllocator.h"
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() = default;
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
};
|
||||
|
||||
struct raLivenessSubrangeLink
|
||||
{
|
||||
struct raLivenessRange* prev;
|
||||
@ -167,6 +155,28 @@ private:
|
||||
|
||||
};
|
||||
|
||||
struct raLivenessLocation_t
|
||||
{
|
||||
sint32 index;
|
||||
bool isRead;
|
||||
bool isWrite;
|
||||
|
||||
raLivenessLocation_t() = default;
|
||||
|
||||
raLivenessLocation_t(sint32 index, bool isRead, bool isWrite)
|
||||
: index(index), isRead(isRead), isWrite(isWrite) {};
|
||||
|
||||
raInstructionEdge GetReadPos()
|
||||
{
|
||||
return raInstructionEdge(index, true);
|
||||
}
|
||||
|
||||
raInstructionEdge GetWritePos()
|
||||
{
|
||||
return raInstructionEdge(index, false);
|
||||
}
|
||||
};
|
||||
|
||||
struct raInterval
|
||||
{
|
||||
raInterval()
|
||||
@ -354,7 +364,7 @@ void PPCRecRA_updateOrAddSubrangeLocation(raLivenessRange* subrange, sint32 inde
|
||||
void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange);
|
||||
|
||||
// cost estimation
|
||||
sint32 PPCRecRARange_getReadWriteCost(IMLSegment* imlSegment);
|
||||
sint32 PPCRecRARange_estimateCostAfterRangeExplode(raLivenessRange* subrange);
|
||||
sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment);
|
||||
sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange);
|
||||
//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex);
|
||||
sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition);
|
||||
sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition);
|
Loading…
x
Reference in New Issue
Block a user