PPCRec: Optimizations

This commit is contained in:
Exzap 2024-10-28 23:09:56 +01:00
parent 099d1d4e1f
commit e33272651c
8 changed files with 91 additions and 123 deletions

View File

@ -11,4 +11,16 @@
</Expand>
</Type>
<Type Name="boost::container::static_vector&lt;*&gt;">
<DisplayString>{{ size={m_holder.m_size} }}</DisplayString>
<Expand>
<Item Name="[size]" ExcludeView="simple">m_holder.m_size</Item>
<Item Name="[capacity]" ExcludeView="simple">static_capacity</Item>
<ArrayItems>
<Size>m_holder.m_size</Size>
<ValuePointer>($T1*)m_holder.storage.data</ValuePointer>
</ArrayItems>
</Expand>
</Type>
</AutoVisualizer>

View File

@ -7,6 +7,7 @@
#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
#include "util/MemMapper/MemMapper.h"
#include "Common/cpu_features.h"
#include <boost/container/static_vector.hpp>
static x86Assembler64::GPR32 _reg32(IMLReg physReg)
{
@ -658,29 +659,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction,
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
cemu_assert_unimplemented();
//if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)
//{
// // registerResult = immS32 (conditional)
// if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER)
// {
// assert_dbg();
// }
// x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32);
// uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex;
// x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0);
// if (imlInstruction->op_conditional_r_s32.bitMustBeSet)
// x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
// else
// x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP);
// return true;
//}
return false;
}
bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
{
auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR);
@ -973,47 +951,71 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc
return true;
}
bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
bool PPCRecompilerX64Gen_IsSameCompare(IMLInstruction* imlInstructionA, IMLInstruction* imlInstructionB)
{
auto regR = _reg8(imlInstruction->op_compare.regR);
auto regA = _reg32(imlInstruction->op_compare.regA);
auto regB = _reg32(imlInstruction->op_compare.regB);
X86Cond cond = _x86Cond(imlInstruction->op_compare.cond);
bool keepR = regR == regA || regR == regB;
if(!keepR)
{
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_dd(regA, regB);
x64GenContext->emitter->SETcc_b(cond, regR);
}
else
{
x64GenContext->emitter->CMP_dd(regA, regB);
x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
x64GenContext->emitter->SETcc_b(cond, regR);
}
return true;
if(imlInstructionA->type != imlInstructionB->type)
return false;
if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE)
return imlInstructionA->op_compare.regA == imlInstructionB->op_compare.regA && imlInstructionA->op_compare.regB == imlInstructionB->op_compare.regB;
else if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE_S32)
return imlInstructionA->op_compare_s32.regA == imlInstructionB->op_compare_s32.regA && imlInstructionA->op_compare_s32.immS32 == imlInstructionB->op_compare_s32.immS32;
return false;
}
bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction)
bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, sint32& extraInstructionsProcessed)
{
extraInstructionsProcessed = 0;
boost::container::static_vector<IMLInstruction*, 4> compareInstructions;
compareInstructions.push_back(imlInstruction);
for(sint32 i=1; i<4; i++)
{
IMLInstruction* nextIns = x64GenContext->GetNextInstruction(i);
if(!nextIns || !PPCRecompilerX64Gen_IsSameCompare(imlInstruction, nextIns))
break;
compareInstructions.push_back(nextIns);
}
auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool
{
if(ins->type == PPCREC_IML_TYPE_COMPARE)
return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB);
else if(ins->type == PPCREC_IML_TYPE_COMPARE_S32)
return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA);
};
auto GetRegR = [](IMLInstruction* insn)
{
return insn->type == PPCREC_IML_TYPE_COMPARE ? _reg32_from_reg8(_reg8(insn->op_compare.regR)) : _reg32_from_reg8(_reg8(insn->op_compare_s32.regR));
};
// prefer XOR method for zeroing out registers if possible
for(auto& it : compareInstructions)
{
if(OperandOverlapsWithR(it))
continue;
auto regR = GetRegR(it);
x64GenContext->emitter->XOR_dd(regR, regR); // zero bytes unaffected by SETcc
}
// emit the compare instruction
if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
{
auto regA = _reg32(imlInstruction->op_compare.regA);
auto regB = _reg32(imlInstruction->op_compare.regB);
x64GenContext->emitter->CMP_dd(regA, regB);
}
else if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
{
auto regR = _reg8(imlInstruction->op_compare_s32.regR);
auto regA = _reg32(imlInstruction->op_compare_s32.regA);
sint32 imm = imlInstruction->op_compare_s32.immS32;
X86Cond cond = _x86Cond(imlInstruction->op_compare_s32.cond);
bool keepR = regR == regA;
if(!keepR)
{
x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); // zero bytes unaffected by SETcc
x64GenContext->emitter->CMP_di32(regA, imm);
x64GenContext->emitter->SETcc_b(cond, regR);
}
else
// emit the SETcc instructions
for(auto& it : compareInstructions)
{
x64GenContext->emitter->CMP_di32(regA, imm);
auto regR = _reg8(it->op_compare.regR);
X86Cond cond = _x86Cond(it->op_compare.cond);
if(OperandOverlapsWithR(it))
x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0);
x64GenContext->emitter->SETcc_b(cond, regR);
}
extraInstructionsProcessed = (sint32)compareInstructions.size() - 1;
return true;
}
@ -1383,6 +1385,7 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
segIt->x64Offset = x64GenContext.emitter->GetWriteIndex();
for(size_t i=0; i<segIt->imlList.size(); i++)
{
x64GenContext.m_currentInstructionEmitIndex = i;
IMLInstruction* imlInstruction = segIt->imlList.data() + i;
if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME )
@ -1403,11 +1406,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
codeGenerationFailed = true;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
if (PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
codeGenerationFailed = true;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32)
{
if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
@ -1428,13 +1426,11 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo
if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false)
codeGenerationFailed = true;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE)
else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
{
PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
}
else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32)
{
PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction);
sint32 extraInstructionsProcessed;
PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, extraInstructionsProcessed);
i += extraInstructionsProcessed;
}
else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP)
{

View File

@ -15,6 +15,7 @@ struct x64GenContext_t
{
IMLSegment* currentSegment{};
x86Assembler64* emitter;
sint32 m_currentInstructionEmitIndex;
x64GenContext_t()
{
@ -26,6 +27,14 @@ struct x64GenContext_t
delete emitter;
}
IMLInstruction* GetNextInstruction(sint32 relativeIndex = 1)
{
sint32 index = m_currentInstructionEmitIndex + relativeIndex;
if(index < 0 || index >= (sint32)currentSegment->imlList.size())
return nullptr;
return currentSegment->imlList.data() + index;
}
// relocate offsets
std::vector<x64RelocEntry_t> relocateOffsetTable2;
};

View File

@ -424,23 +424,6 @@ void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& di
{
strOutput.addFmt("CYCLE_CHECK");
}
else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
strOutput.addFmt("{} ", IMLDebug_GetRegName(inst.op_conditional_r_s32.regR));
bool displayAsHex = false;
if (inst.operation == PPCREC_IML_OP_ASSIGN)
{
displayAsHex = true;
strOutput.add("=");
}
else
strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", inst.operation);
if (displayAsHex)
strOutput.addFmt(" 0x{:x}", inst.op_conditional_r_s32.immS32);
else
strOutput.addFmt(" {}", inst.op_conditional_r_s32.immS32);
strOutput.add(" (conditional)");
}
else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC)
{
strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond));

View File

@ -80,17 +80,6 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
registersUsed->writtenGPR1 = op_r_immS32.regR;
}
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
if (operation == PPCREC_IML_OP_ASSIGN)
{
// result is written, but also considered read (in case the condition is false the input is preserved)
registersUsed->readGPR1 = op_conditional_r_s32.regR;
registersUsed->writtenGPR1 = op_conditional_r_s32.regR;
}
else
cemu_assert_unimplemented();
}
else if (type == PPCREC_IML_TYPE_R_R_S32)
{
registersUsed->writtenGPR1 = op_r_r_s32.regR;
@ -117,10 +106,14 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
else if (type == PPCREC_IML_TYPE_R_R_R)
{
// in all cases result is written and other operands are read only
// with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read
registersUsed->writtenGPR1 = op_r_r_r.regR;
if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB))
{
registersUsed->readGPR1 = op_r_r_r.regA;
registersUsed->readGPR2 = op_r_r_r.regB;
}
}
else if (type == PPCREC_IML_TYPE_R_R_R_CARRY)
{
registersUsed->writtenGPR1 = op_r_r_r_carry.regR;
@ -502,10 +495,6 @@ void IMLInstruction::RewriteGPR(const std::unordered_map<IMLRegID, IMLRegID>& tr
{
op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_CONDITIONAL_R_S32)
{
op_conditional_r_s32.regR = replaceRegisterIdMultiple(op_conditional_r_s32.regR, translationTable);
}
else if (type == PPCREC_IML_TYPE_R_R_S32)
{
op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable);

View File

@ -250,9 +250,6 @@ enum
// atomic
PPCREC_IML_TYPE_ATOMIC_CMP_STORE,
// conditional (legacy)
PPCREC_IML_TYPE_CONDITIONAL_R_S32,
// function call
PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address

View File

@ -54,23 +54,6 @@ IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext
return &inst;
}
void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet)
{
if(imlInstruction == NULL)
imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext);
else
memset(imlInstruction, 0, sizeof(IMLInstruction));
imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32;
imlInstruction->operation = operation;
// r_s32 operation
imlInstruction->op_conditional_r_s32.regR = registerIndex;
imlInstruction->op_conditional_r_s32.immS32 = immS32;
// condition
imlInstruction->op_conditional_r_s32.crRegisterIndex = crRegisterIndex;
imlInstruction->op_conditional_r_s32.crBitIndex = crBitIndex;
imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet;
}
void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian)
{
cemu_assert_debug(registerMemory1.IsValid());
@ -559,7 +542,6 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
uint32 BO, BI, BD;
PPC_OPC_TEMPL_B(opcode, BO, BI, BD);
// decodeOp_BC(uint32 opcode, uint32& BD, BOField& BO, uint32& BI, bool& AA, bool& LK)
Espresso::BOField boField(BO);
uint32 crRegister = BI/4;

View File

@ -2225,7 +2225,7 @@ void MainWindow::RecreateMenu()
wxMenu* debugDumpMenu = new wxMenu;
debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_TEXTURES, _("&Textures"), wxEmptyString)->Check(ActiveSettings::DumpTexturesEnabled());
debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_SHADERS, _("&Shaders"), wxEmptyString)->Check(ActiveSettings::DumpShadersEnabled());
debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, _("&Recompiler functions"), wxEmptyString)->Check(ActiveSettings::DumpRecompilerFunctionsEnabled());
debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_RECOMPILER_FUNCTIONS, _("&Recompiled functions"), wxEmptyString)->Check(ActiveSettings::DumpRecompilerFunctionsEnabled());
debugDumpMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_DUMP_CURL_REQUESTS, _("&nlibcurl HTTP/HTTPS requests"), wxEmptyString);
// debug submenu
wxMenu* debugMenu = new wxMenu();