diff --git a/Source/Core/Core/HW/DSP.cpp b/Source/Core/Core/HW/DSP.cpp index 5b16f23139..23e6b0698c 100644 --- a/Source/Core/Core/HW/DSP.cpp +++ b/Source/Core/Core/HW/DSP.cpp @@ -37,6 +37,7 @@ #include "Core/HW/Memmap.h" #include "Core/HW/MMIO.h" #include "Core/HW/ProcessorInterface.h" +#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" namespace DSP @@ -157,6 +158,9 @@ static ARAMInfo g_ARAM; static DSPState g_dspState; static AudioDMA g_audioDMA; static ARAM_DMA g_arDMA; +static u32 last_mmaddr; +static u32 last_aram_dma_count; +static bool instant_dma; union ARAM_Info { @@ -194,6 +198,9 @@ void DoState(PointerWrap &p) p.Do(g_AR_MODE); p.Do(g_AR_REFRESH); p.Do(dsp_slice); + p.Do(last_mmaddr); + p.Do(last_aram_dma_count); + p.Do(instant_dma); dsp_emulator->DoState(p); } @@ -212,6 +219,12 @@ static void CompleteARAM(u64 userdata, int cyclesLate) GenerateDSPInterrupt(INT_ARAM); } +void EnableInstantDMA() +{ + CoreTiming::RemoveEvent(et_CompleteARAM); + CompleteARAM(0, 0); + instant_dma = true; +} DSPEmulator *GetDSPEmulator() { @@ -249,6 +262,11 @@ void Init(bool hle) g_AR_MODE = 1; // ARAM Controller has init'd g_AR_REFRESH = 156; // 156MHz + instant_dma = false; + + last_aram_dma_count = 0; + last_mmaddr = 0; + et_GenerateDSPInterrupt = CoreTiming::RegisterEvent("DSPint", GenerateDSPInterrupt); et_CompleteARAM = CoreTiming::RegisterEvent("ARAMint", CompleteARAM); } @@ -516,28 +534,20 @@ void UpdateAudioDMA() static void Do_ARAM_DMA() { g_dspState.DSPControl.DMAState = 1; - if (g_arDMA.Cnt.count == 32) - { - // Beyond Good and Evil (GGEE41) sends count 32 - // Lost Kingdoms 2 needs the exception check here in DSP HLE mode - CompleteARAM(0, 0); - CoreTiming::ForceExceptionCheck(100); - } - else - { - CoreTiming::ScheduleEvent_Threadsafe(0, et_CompleteARAM); - // Force an early exception check on large transfers. Fixes RE2 audio. - // NFS:HP2 (<= 6144) - // Viewtiful Joe (<= 6144) - // Sonic Mega Collection (> 2048) - // Paper Mario battles (> 32) - // Mario Super Baseball (> 32) - // Knockout Kings 2003 loading (> 32) - // WWE DOR (> 32) - if (g_arDMA.Cnt.count > 2048 && g_arDMA.Cnt.count <= 6144) - CoreTiming::ForceExceptionCheck(100); - } + // ARAM DMA transfer rate has been measured on real hw + int ticksToTransfer = (g_arDMA.Cnt.count / 32) * 246; + + if (instant_dma) + ticksToTransfer = 0; + + CoreTiming::ScheduleEvent_Threadsafe(ticksToTransfer, et_CompleteARAM); + + if (instant_dma) + CoreTiming::ForceExceptionCheck(100); + + last_mmaddr = g_arDMA.MMAddr; + last_aram_dma_count = g_arDMA.Cnt.count; // Real hardware DMAs in 32byte chunks, but we can get by with 8byte chunks if (g_arDMA.Cnt.dir) @@ -663,5 +673,14 @@ u8 *GetARAMPtr() return g_ARAM.ptr; } +u64 DMAInProgress() +{ + if (g_dspState.DSPControl.DMAState == 1) + { + return ((u64)last_mmaddr << 32 | (last_mmaddr + last_aram_dma_count)); + } + return 0; +} + } // end of namespace DSP diff --git a/Source/Core/Core/HW/DSP.h b/Source/Core/Core/HW/DSP.h index ecbd04c8ea..6051857113 100644 --- a/Source/Core/Core/HW/DSP.h +++ b/Source/Core/Core/HW/DSP.h @@ -76,5 +76,7 @@ u8* GetARAMPtr(); void UpdateAudioDMA(); void UpdateDSPSlice(int cycles); +u64 DMAInProgress(); +void EnableInstantDMA(); }// end of namespace DSP diff --git a/Source/Core/Core/HW/GPFifo.cpp b/Source/Core/Core/HW/GPFifo.cpp index 3643f88fca..033d253510 100644 --- a/Source/Core/Core/HW/GPFifo.cpp +++ b/Source/Core/Core/HW/GPFifo.cpp @@ -8,8 +8,8 @@ #include "Core/HW/GPFifo.h" #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" +#include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/JitCommon/JitBase.h" #include "VideoCommon/VideoBackendBase.h" @@ -86,18 +86,7 @@ void STACKALIGN CheckGatherPipe() memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount); // Profile where the FIFO writes are occurring. - if (jit && PC != 0 && (jit->js.fifoWriteAddresses.find(PC)) == (jit->js.fifoWriteAddresses.end())) - { - // Log only stores, fp stores and ps stores, filtering out other instructions arrived via optimizeGatherPipe - int type = GetOpInfo(Memory::ReadUnchecked_U32(PC))->type; - if (type == OPTYPE_STORE || type == OPTYPE_STOREFP || (type == OPTYPE_PS && !strcmp(GetOpInfo(Memory::ReadUnchecked_U32(PC))->opname, "psq_st"))) - { - jit->js.fifoWriteAddresses.insert(PC); - - // Invalidate the JIT block so that it gets recompiled with the external exception check included. - jit->GetBlockCache()->InvalidateICache(PC, 4); - } - } + JitInterface::CompileExceptionCheck(JitInterface::EXCEPTIONS_FIFO_WRITE); } } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 5b8e6351ff..86d42e3239 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -5,6 +5,7 @@ #include "Common/CommonTypes.h" #include "Common/MathUtil.h" +#include "Core/HW/DSP.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/Interpreter/Interpreter.h" #include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h" @@ -325,24 +326,40 @@ void Interpreter::dcbf(UGeckoInstruction _inst) { NPC = PC + 12; }*/ - u32 address = Helper_Get_EA_X(_inst); - JitInterface::InvalidateICache(address & ~0x1f, 32); + u32 address = Helper_Get_EA_X(_inst); + JitInterface::InvalidateICache(address & ~0x1f, 32); } void Interpreter::dcbi(UGeckoInstruction _inst) { // Removes a block from data cache. Since we don't emulate the data cache, we don't need to do anything to the data cache // However, we invalidate the jit block cache on dcbi - u32 address = Helper_Get_EA_X(_inst); - JitInterface::InvalidateICache(address & ~0x1f, 32); + u32 address = Helper_Get_EA_X(_inst); + JitInterface::InvalidateICache(address & ~0x1f, 32); + + // The following detects a situation where the game is writing to the dcache at the address being DMA'd. As we do not + // have dcache emulation, invalid data is being DMA'd causing audio glitches. The following code detects this and + // enables the DMA to complete instantly before the invalid data is written. Resident Evil 2 & 3 trigger this. + u64 dma_in_progress = DSP::DMAInProgress(); + if (dma_in_progress != 0) + { + u32 start_addr = (dma_in_progress >> 32) & Memory::RAM_MASK; + u32 end_addr = (dma_in_progress & Memory::RAM_MASK) & 0xffffffff; + u32 invalidated_addr = (address & Memory::RAM_MASK) & ~0x1f; + + if (invalidated_addr >= start_addr && invalidated_addr <= end_addr) + { + DSP::EnableInstantDMA(); + } + } } void Interpreter::dcbst(UGeckoInstruction _inst) { // Cache line flush. Since we don't emulate the data cache, we don't need to do anything. // Invalidate the jit block cache on dcbst in case new code has been loaded via the data cache - u32 address = Helper_Get_EA_X(_inst); - JitInterface::InvalidateICache(address & ~0x1f, 32); + u32 address = Helper_Get_EA_X(_inst); + JitInterface::InvalidateICache(address & ~0x1f, 32); } void Interpreter::dcbt(UGeckoInstruction _inst) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index 6d60ab2a64..b98b90e013 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -79,10 +79,10 @@ static GekkoOPTemplate primarytable[] = {54, Interpreter::stfd, {"stfd", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, {55, Interpreter::stfdu, {"stfdu", OPTYPE_STOREFP, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {56, Interpreter::psq_l, {"psq_l", OPTYPE_PS, FL_OUT_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {57, Interpreter::psq_lu, {"psq_lu", OPTYPE_PS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {60, Interpreter::psq_st, {"psq_st", OPTYPE_PS, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {61, Interpreter::psq_stu, {"psq_stu", OPTYPE_PS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {56, Interpreter::psq_l, {"psq_l", OPTYPE_LOADPS, FL_OUT_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {57, Interpreter::psq_lu, {"psq_lu", OPTYPE_LOADPS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {60, Interpreter::psq_st, {"psq_st", OPTYPE_STOREPS, FL_IN_FLOAT_S | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {61, Interpreter::psq_stu, {"psq_stu", OPTYPE_STOREPS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, //missing: 0, 5, 6, 9, 22, 30, 62, 58 {0, Interpreter::unknown_instruction, {"unknown_instruction", OPTYPE_UNKNOWN, 0, 0, 0, 0, 0}}, diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 9b88538935..89a61e59d3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -697,7 +697,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } // Add an external exception check if the instruction writes to the FIFO. - if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end()) + if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end() || + jit->js.dspARAMAddresses.find(ops[i].address) != jit->js.dspARAMAddresses.end()) { TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = J_CC(CC_NZ); @@ -707,7 +708,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SetJumpTarget(extException); TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = J_CC(CC_Z, true); - TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); + if (jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end()) + { + TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); + } + else + { + TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_DSP)); + } FixupBranch noCPInt = J_CC(CC_Z, true); gpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 52463ec619..fc40d3cfd0 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -97,6 +97,7 @@ protected: JitBlock *curBlock; std::unordered_set fifoWriteAddresses; + std::unordered_set dspARAMAddresses; }; PPCAnalyst::CodeBlock code_block; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 6bfd1c4009..a8986f7251 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -216,7 +216,7 @@ namespace JitInterface jit->GetBlockCache()->InvalidateICache(address, size); } - u32 Read_Opcode_JIT(u32 _Address) + u32 ReadOpcodeJIT(u32 _Address) { if (bMMU && !bFakeVMEM && (_Address & Memory::ADDR_MASK_MEM1)) { @@ -237,6 +237,37 @@ namespace JitInterface return inst; } + void CompileExceptionCheck(int type) + { + if (!jit) + return; + + std::unordered_set *exception_addresses; + + switch (type) + { + case EXCEPTIONS_FIFO_WRITE: + { + exception_addresses = &jit->js.fifoWriteAddresses; + break; + } + default: + ERROR_LOG(POWERPC, "Unknown exception check type"); + } + + if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end())) + { + int optype = GetOpInfo(Memory::ReadUnchecked_U32(PC))->type; + if (optype == OPTYPE_STORE || optype == OPTYPE_STOREFP || (optype == OPTYPE_STOREPS)) + { + exception_addresses->insert(PC); + + // Invalidate the JIT block so that it gets recompiled with the external exception check included. + jit->GetBlockCache()->InvalidateICache(PC, 4); + } + } + } + void Shutdown() { if (jit) diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h index a8ed783726..53b2b38312 100644 --- a/Source/Core/Core/PowerPC/JitInterface.h +++ b/Source/Core/Core/PowerPC/JitInterface.h @@ -11,6 +11,11 @@ namespace JitInterface { + enum + { + EXCEPTIONS_FIFO_WRITE + }; + void DoState(PointerWrap &p); CPUCoreBase *InitJitCore(int core); @@ -24,7 +29,7 @@ namespace JitInterface bool HandleFault(uintptr_t access_address, SContext* ctx); // used by JIT to read instructions - u32 Read_Opcode_JIT(const u32 _Address); + u32 ReadOpcodeJIT(const u32 _Address); // Clearing CodeCache void ClearCache(); @@ -33,6 +38,8 @@ namespace JitInterface void InvalidateICache(u32 address, u32 size); + void CompileExceptionCheck(int type); + void Shutdown(); } extern bool bMMU; diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 6cd1043c38..923e621dcd 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -654,7 +654,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 for (u32 i = 0; i < blockSize; ++i) { - UGeckoInstruction inst = JitInterface::Read_Opcode_JIT(address); + UGeckoInstruction inst = JitInterface::ReadOpcodeJIT(address); if (inst.hex != 0) { diff --git a/Source/Core/Core/PowerPC/PPCTables.h b/Source/Core/Core/PowerPC/PPCTables.h index bfc5c5303a..ea6c2ac978 100644 --- a/Source/Core/Core/PowerPC/PPCTables.h +++ b/Source/Core/Core/PowerPC/PPCTables.h @@ -70,6 +70,9 @@ enum OPTYPE_STOREFP , OPTYPE_DOUBLEFP, OPTYPE_SINGLEFP, + OPTYPE_LOADPS , + OPTYPE_STOREPS , + OPTYPE_FPU , OPTYPE_PS , OPTYPE_DCACHE , OPTYPE_ICACHE , diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 737a3eb50f..8d05d01fbc 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 34; +static const u32 STATE_VERSION = 35; enum {