diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp index ed812b46..aa066310 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -57,24 +57,6 @@ void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContex x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo); } -/* -* Overwrites the currently cached (in x64 cf) cr* register -* Should be called before each x64 instruction which overwrites the current status flags (with mappedCRRegister set to PPCREC_CR_TEMPORARY unless explicitly set by PPC instruction) -*/ -void PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 mappedCRRegister, sint32 crState) -{ - x64GenContext->activeCRRegister = mappedCRRegister; - x64GenContext->activeCRState = crState; -} - -/* -* Reset cached cr* register without storing it first -*/ -void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext) -{ - x64GenContext->activeCRRegister = PPC_REC_INVALID_REGISTER; -} - void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset) { uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset; @@ -116,7 +98,6 @@ void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcIm if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, X86_REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); // todo: Set CR SO if XER SO bit is set - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, crRegister, PPCREC_CR_STATE_TYPE_LOGICAL); } void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) @@ -153,7 +134,6 @@ void ATTR_MS_ABI PPCRecompiler_getTBU(PPCInterpreter_t* hCPU, uint32 gprIndex) bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) { uint32 branchDstReg = imlInstruction->op_macro.param; @@ -419,7 +399,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 16 ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available if (indexed) { x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -445,8 +424,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == 8 ) { - if( indexed ) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: Optimize by using only MOVZX/MOVSX if( indexed ) x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); @@ -459,7 +436,6 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p } else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_storeLoad.immS32 != 0 ) assert_dbg(); // not supported if( indexed ) @@ -507,8 +483,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; if (imlInstruction->op_storeLoad.copyWidth == 32) { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 valueRegister; if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData) { @@ -532,8 +506,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->op_storeLoad.copyWidth == 16) { - if (indexed || swapEndian) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); if (swapEndian) x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); @@ -546,8 +518,6 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->op_storeLoad.copyWidth == 8) { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (indexed && realRegisterMem == realRegisterData) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); @@ -561,10 +531,9 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->op_storeLoad.immS32 != 0) assert_dbg(); // todo - // reset cr0 LT, GT and EQ + // reset cr0 LT, GT and EQ sint32 crRegister = 0; x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); x64Gen_mov_mem8Reg64_imm8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); @@ -650,19 +619,16 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); } else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, imlInstruction->op_r_r.registerResult, reg32ToReg16(imlInstruction->op_r_r.registerA)); } else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= registerA @@ -683,7 +649,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); x64Gen_not_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); @@ -692,7 +657,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); x64Gen_neg_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult); @@ -701,7 +665,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // count leading zeros - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) if(g_CPUFeatures.x86.lzcnt) @@ -726,7 +689,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { // registerA CMP registerB (arithmetic compare) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) { return false; // a NO-OP instruction @@ -735,11 +697,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp { return false; } - // update state of cr register - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC); - else - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_r_r.registerResult, imlInstruction->op_r_r.registerA); // set cr bits @@ -770,7 +727,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) { x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, imlInstruction->op_r_r.registerA); @@ -810,30 +766,24 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, { // registerResult &= immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_and_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_OR ) { // registerResult |= immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_or_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) { // registerResult ^= immS32 cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); x64Gen_xor_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, (uint32)imlInstruction->op_r_immS32.immS32); } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); // registerResult <<<= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); if( (imlInstruction->op_r_immS32.immS32&0x80) ) assert_dbg(); // should not happen @@ -842,7 +792,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) { // registerResult CMP immS32 (arithmetic compare) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) { debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n"); @@ -853,11 +802,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n"); return false; } - // update state of cr register - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC); - else - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); // create compare instruction x64Gen_cmp_reg64Low32_imm32(x64GenContext, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32); // set cr bits @@ -886,7 +830,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 destRegister = imlInstruction->op_r_immS32.registerIndex; x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); for(sint32 f=0; f<32; f++) @@ -897,7 +840,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, } else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); uint32 srcRegister = imlInstruction->op_r_immS32.registerIndex; uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); for (sint32 f = 0; f < 32; f++) @@ -928,66 +870,7 @@ bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCR } x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - if (imlInstruction->op_conditional_r_s32.crRegisterIndex == x64GenContext->activeCRRegister) - { - if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_LOGICAL) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); - return true; - } - } - } - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); if (imlInstruction->op_conditional_r_s32.bitMustBeSet) x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, imlInstruction->op_conditional_r_s32.registerIndex, REG_RESV_TEMP); @@ -1002,10 +885,9 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->operation == PPCREC_IML_OP_ADD) + if (imlInstruction->operation == PPCREC_IML_OP_ADD) { // registerResult = registerOperand1 + registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1027,7 +909,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) { // registerResult = registerOperand1 - registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1060,7 +941,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegA = imlInstruction->op_r_r_r.registerA; sint32 rRegB = imlInstruction->op_r_r_r.registerB; @@ -1080,7 +960,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand1 * registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1103,7 +982,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) { // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1146,7 +1024,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, } else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1184,8 +1061,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // x86's shift and rotate instruction have the shift amount hardwired to the CL register // since our register allocator doesn't support instruction based fixed phys registers yet // we'll instead have to temporarily shuffle registers around @@ -1266,7 +1141,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1302,7 +1176,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1340,7 +1213,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, else if( imlInstruction->operation == PPCREC_IML_OP_ORC ) { // registerResult = registerOperand1 | ~registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_r.registerResult; sint32 rRegOperand1 = imlInstruction->op_r_r_r.registerA; sint32 rRegOperand2 = imlInstruction->op_r_r_r.registerB; @@ -1361,8 +1233,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR); auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA); auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB); @@ -1399,7 +1269,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunc bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); auto regR = _reg8(imlInstruction->op_compare.registerResult); auto regA = _reg32(imlInstruction->op_compare.registerOperandA); auto regB = _reg32(imlInstruction->op_compare.registerOperandB); @@ -1412,7 +1281,6 @@ bool PPCRecompilerX64Gen_imlInstruction_compare(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); auto regR = _reg8(imlInstruction->op_compare_s32.registerResult); auto regA = _reg32(imlInstruction->op_compare_s32.registerOperandA); sint32 imm = imlInstruction->op_compare_s32.immS32; @@ -1425,7 +1293,6 @@ bool PPCRecompilerX64Gen_imlInstruction_compare_s32(PPCRecFunction_t* PPCRecFunc bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); auto regBool = _reg8(imlInstruction->op_conditionalJump2.registerBool); bool mustBeTrue = imlInstruction->op_conditionalJump2.mustBeTrue; x64GenContext->emitter->TEST_bb(regBool, regBool); @@ -1436,7 +1303,6 @@ bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); x64GenContext->emitter->JMP_j32(0); return true; @@ -1453,7 +1319,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction if( imlInstruction->operation == PPCREC_IML_OP_ADD ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1464,7 +1329,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else if (imlInstruction->operation == PPCREC_IML_OP_SUB) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); x64Gen_sub_reg64Low32_imm32(x64GenContext, regResult, immS32); @@ -1474,7 +1338,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_XOR) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if (regResult != regOperand) x64Gen_mov_reg64_reg64(x64GenContext, regResult, regOperand); if (imlInstruction->operation == PPCREC_IML_OP_AND) @@ -1487,7 +1350,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) { // registerResult = ((registerResult<<op_r_r_s32.immS32; uint32 mb = (vImm>>0)&0xFF; uint32 me = (vImm>>8)&0xFF; @@ -1510,7 +1372,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) { // registerResult = registerOperand * immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 rRegResult = imlInstruction->op_r_r_s32.registerResult; sint32 rRegOperand = imlInstruction->op_r_r_s32.registerA; sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; @@ -1523,7 +1384,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) x64Gen_mov_reg64_reg64(x64GenContext, imlInstruction->op_r_r_s32.registerResult, imlInstruction->op_r_r_s32.registerA); @@ -1544,8 +1404,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR); auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA); sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; @@ -1604,73 +1462,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec else { uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex; - if (imlInstruction->op_conditionalJump.crRegisterIndex == x64GenContext->activeCRRegister ) - { - if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_LOGICAL) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); - return true; - } - } - cemu_assert_debug(false); // should not reach? - } x64Gen_bt_mem8(x64GenContext, X86_REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); cemu_assert_debug(imlSegment->GetBranchTaken()); PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, (void*)imlSegment->GetBranchTaken()); @@ -1689,7 +1480,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRec bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // some tests (all performed on a i7-4790K) // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency) // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC @@ -1707,7 +1497,6 @@ bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction */ bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // while these instruction do not directly affect eflags, they change the CR bit if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) { // clear cr bit @@ -1848,7 +1637,6 @@ uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) { x64GenContext_t x64GenContext{}; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // generate iml instruction code bool codeGenerationFailed = false; @@ -2104,7 +1892,6 @@ bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenCo void PPCRecompilerX64Gen_generateEnterRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // start of recompiler entry function x64Gen_push_reg64(&x64GenContext, X86_REG_RAX); @@ -2180,7 +1967,6 @@ void PPCRecompilerX64Gen_generateEnterRecompilerCode() void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() { x64GenContext_t x64GenContext{}; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; // update instruction pointer // LR is in EDX diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index b9cb0585..1683c5b9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -26,9 +26,6 @@ struct x64GenContext_t delete emitter; } - // cr state - sint32 activeCRRegister{}; // current x86 condition flags reflect this cr* register - sint32 activeCRState{}; // describes the way in which x86 flags map to the cr register (signed / unsigned) // relocate offsets std::vector relocateOffsetTable2; }; @@ -75,8 +72,6 @@ enum bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext); - void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset); void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions(); diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp index e50052d5..14d05d5a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -267,7 +267,6 @@ void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGen // load from memory bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; @@ -578,7 +577,6 @@ void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGe // store to memory bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); sint32 realRegisterXMM = imlInstruction->op_storeLoad.registerData; sint32 realRegisterMem = imlInstruction->op_storeLoad.registerMem; sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; @@ -690,7 +688,6 @@ void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) // FPR op FPR void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) { if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) @@ -969,8 +966,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction */ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) { if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) @@ -1062,7 +1057,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti */ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); @@ -1156,7 +1150,6 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunc */ void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) { cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 63fb5f72..f74cd225 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -226,12 +226,8 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP return ppcRecFunc; } -void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext); - bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) { - PPCRecompiler_FixLoops(ppcImlGenContext); - // isolate entry points from function flow (enterable segments must not be the target of any other segment) // this simplifies logic during register allocation PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index a4dd4101..8377671a 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -25,7 +25,6 @@ uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGe // IML instruction generation void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode); void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index f7492e59..435a5a7e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -64,11 +64,6 @@ void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGe ppcImlGenContext->emitInst().make_r_r(operation, registerResult, registerA, crRegister, crMode); } -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint8 crRegister, uint32 crMode) -{ - ppcImlGenContext->emitInst().make_r_s32(operation, registerIndex, immS32, crRegister, crMode); -} - void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name) { // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32) @@ -502,7 +497,7 @@ bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MFCR, gprReg, 0); return true; } @@ -512,7 +507,7 @@ bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opco uint32 crMask; PPC_OPC_TEMPL_XFX(opcode, rS, crMask); uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_MTCRF, gprReg, crMask); return true; } @@ -730,7 +725,7 @@ bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco branchDestReg = tmpRegister; } uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); } if (!BO.decrementerIgnore()) @@ -856,7 +851,7 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // rA not used, instruction turns into simple value assignment // rD = imm uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm); } // never updates any cr return true; @@ -3970,19 +3965,8 @@ bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunction return true; } -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) +void IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext_t& ppcImlGenContext) { - ppcImlGenContext.boundaryTracker = &boundaryTracker; - if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) - return false; - - // set range - // todo - support non-continuous functions for the range tracking? - ppcRecRange_t recRange; - recRange.ppcAddress = ppcRecFunc->ppcAddress; - recRange.ppcSize = ppcRecFunc->ppcSize; - ppcRecFunc->list_ranges.push_back(recRange); - // optimization pass - replace segments with conditional MOVs if possible for (IMLSegment* segIt : ppcImlGenContext.segmentList2) { @@ -4006,16 +3990,16 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext for (sint32 f = 0; f < conditionalSegment->imlList.size(); f++) { IMLInstruction* imlInstruction = conditionalSegment->imlList.data() + f; - if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) continue; // todo: Register to register copy canReduceSegment = false; break; } - if( canReduceSegment == false ) + if (canReduceSegment == false) continue; - + // remove the branch instruction uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; @@ -4068,104 +4052,23 @@ bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) } - return true; } -void PPCRecompiler_FixLoops(ppcImlGenContext_t& ppcImlGenContext) +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) { - return; // deprecated + ppcImlGenContext.boundaryTracker = &boundaryTracker; + if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) + return false; - //// find segments that have a (conditional) jump instruction that points in reverse direction of code flow - //// for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. - //// todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) - //uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - //for (size_t s = 0; s < ppcImlGenContext.segmentList2.size(); s++) - //{ - // // todo: This currently uses segment->ppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - // IMLSegment* imlSegment = ppcImlGenContext.segmentList2[s]; - // if (imlSegment->imlList.empty()) - // continue; - // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - // continue; - // if (imlSegment->imlList[imlSegment->imlList.size() - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlList.size() - 1].op_conditionalJump.jumpAccordingToSegment) - // continue; + // IMLOptimizer_replaceWithConditionalMov(ppcImlGenContext); - // // exclude non-infinite tight loops - // if (IMLAnalyzer_IsTightFiniteLoop(imlSegment)) - // continue; - // // potential loop segment found, split this segment into four: - // // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. - // // P2: This segment contains the iml instructions of the original segment - // // PEntry: This segment is used to enter the function, it jumps to P0 - // // All segments are considered to be part of the same PPC instruction range - // // The first segment also retains the jump destination and enterable properties from the original segment. - // //debug_printf("--- Insert cycle counter check ---\n"); + // set range + // todo - support non-continuous functions for the range tracking? + ppcRecRange_t recRange; + recRange.ppcAddress = ppcRecFunc->ppcAddress; + recRange.ppcSize = ppcRecFunc->ppcSize; + ppcRecFunc->list_ranges.push_back(recRange); - // PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - // imlSegment = NULL; - // IMLSegment* imlSegmentP0 = ppcImlGenContext.segmentList2[s + 0]; - // IMLSegment* imlSegmentP1 = ppcImlGenContext.segmentList2[s + 1]; - // IMLSegment* imlSegmentP2 = ppcImlGenContext.segmentList2[s + 2]; - // // create entry point segment - // PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentList2.size(), 1); - // IMLSegment* imlSegmentPEntry = ppcImlGenContext.segmentList2[ppcImlGenContext.segmentList2.size() - 1]; - // // relink segments - // IMLSegment_RelinkInputSegment(imlSegmentP2, imlSegmentP0); - // IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - // IMLSegment_SetLinkBranchTaken(imlSegmentP0, imlSegmentP2); - // IMLSegment_SetLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - // // update segments - // uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - // if (imlSegmentP2->isEnterable) - // enterPPCAddress = imlSegmentP2->enterPPCAddress; - // imlSegmentP0->ppcAddress = 0xFFFFFFFF; - // imlSegmentP1->ppcAddress = 0xFFFFFFFF; - // imlSegmentP2->ppcAddress = 0xFFFFFFFF; - // cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - // // move segment properties from segment P2 to segment P0 - // imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - // imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - // imlSegmentP0->isEnterable = false; - // //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - // imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - // imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - // imlSegmentP2->isJumpDestination = false; - // imlSegmentP2->jumpDestinationPPCAddress = 0; - // imlSegmentP2->isEnterable = false; - // imlSegmentP2->enterPPCAddress = 0; - // imlSegmentP2->ppcAddrMin = 0; - // imlSegmentP2->ppcAddrMax = 0; - // // setup enterable segment - // if (enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF) - // { - // imlSegmentPEntry->isEnterable = true; - // imlSegmentPEntry->ppcAddress = enterPPCAddress; - // imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - // } - // // assign new jumpmark to segment P2 - // imlSegmentP2->isJumpDestination = true; - // imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - // currentLoopEscapeJumpMarker++; - // // create ppc_leave instruction in segment P1 - // PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - // imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - // imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - // imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - // imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - // imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // // create cycle-based conditional instruction in segment P0 - // PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - // imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - // imlSegmentP0->imlList[0].operation = 0; - // imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - // imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - // imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // // jump instruction for PEntry - // PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - // PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList.data() + 0); - - // // skip the newly created segments - // s += 2; - //} -} \ No newline at end of file + + return true; +}