From 1848e93790c8fc4f00e743c5c292ce334cf6c028 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sat, 16 Jan 2010 19:00:09 +0000 Subject: [PATCH] RIP "Optimize Quantizers" option. Now using the safe quantizer code from JITIL in all builds. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4854 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/ABI.cpp | 11 +- Source/Core/Core/Core.vcproj | 12 + Source/Core/Core/Src/ConfigManager.cpp | 2 - Source/Core/Core/Src/CoreParameter.h | 1 - Source/Core/Core/Src/HW/MemmapFunctions.cpp | 11 +- Source/Core/Core/Src/LuaInterface.cpp | 1 - Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 15 +- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 59 +-- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h | 7 +- .../Core/Core/Src/PowerPC/Jit64/JitRegCache.h | 2 +- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 304 +++---------- Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h | 12 +- .../Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp | 398 ------------------ Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h | 11 +- .../PowerPC/Jit64IL/Jit_LoadStorePaired.cpp | 9 +- .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 394 +++++++++++++++++ .../Core/Src/PowerPC/JitCommon/JitAsmCommon.h | 47 +++ .../Core/Src/PowerPC/JitCommon/JitCache.cpp | 18 +- .../Core/Src/PowerPC/JitCommon/JitCache.h | 6 +- .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 38 +- .../Core/Src/PowerPC/JitCommon/Jit_Util.h | 41 ++ Source/Core/Core/Src/SConscript | 3 +- Source/Core/DolphinWX/Src/BootManager.cpp | 1 - Source/Core/DolphinWX/Src/ConfigMain.cpp | 14 +- Source/Core/DolphinWX/Src/ConfigMain.h | 4 +- Source/Core/DolphinWX/Src/ISOProperties.cpp | 12 - Source/Core/DolphinWX/Src/ISOProperties.h | 3 +- 27 files changed, 613 insertions(+), 823 deletions(-) create mode 100644 Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp create mode 100644 Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h create mode 100644 Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp index 681ba5f432..522a8a572a 100644 --- a/Source/Core/Common/Src/ABI.cpp +++ b/Source/Core/Common/Src/ABI.cpp @@ -107,6 +107,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { ABI_RestoreStack(1 * 4); } +// Pass two registers as parameters. void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2) { ABI_AlignStack(2 * 4); @@ -216,18 +217,18 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { CALL(func); } -// Pass a register as a paremeter. +// Pass two registers as paremeters. void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { if (reg2 != ABI_PARAM1) { if (reg1 != ABI_PARAM1) - MOV(32, R(ABI_PARAM1), R(reg1)); + MOV(64, R(ABI_PARAM1), R(reg1)); if (reg2 != ABI_PARAM2) - MOV(32, R(ABI_PARAM2), R(reg2)); + MOV(64, R(ABI_PARAM2), R(reg2)); } else { if (reg2 != ABI_PARAM2) - MOV(32, R(ABI_PARAM2), R(reg2)); + MOV(64, R(ABI_PARAM2), R(reg2)); if (reg1 != ABI_PARAM1) - MOV(32, R(ABI_PARAM1), R(reg1)); + MOV(64, R(ABI_PARAM1), R(reg1)); } CALL(func); } diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj index 25a74714c0..c1d812659e 100644 --- a/Source/Core/Core/Core.vcproj +++ b/Source/Core/Core/Core.vcproj @@ -1938,6 +1938,18 @@ RelativePath=".\Src\PowerPC\JitCommon\Jit_Util.cpp" > + + + + + + diff --git a/Source/Core/Core/Src/ConfigManager.cpp b/Source/Core/Core/Src/ConfigManager.cpp index 766318d282..6c6fc5b555 100644 --- a/Source/Core/Core/Src/ConfigManager.cpp +++ b/Source/Core/Core/Src/ConfigManager.cpp @@ -108,7 +108,6 @@ void SConfig::SaveSettings() ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM); ini.Set("Core", "DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot); ini.Set("Core", "Apploader", m_LocalCoreStartupParameter.m_strApploader); - ini.Set("Core", "OptimizeQuantizers", m_LocalCoreStartupParameter.bOptimizeQuantizers); ini.Set("Core", "EnableCheats", m_LocalCoreStartupParameter.bEnableCheats); ini.Set("Core", "SelectedLanguage", m_LocalCoreStartupParameter.SelectedLanguage); ini.Set("Core", "MemcardA", m_strMemoryCardA); @@ -225,7 +224,6 @@ void SConfig::LoadSettings() ini.Get("Core", "DefaultGCM", &m_LocalCoreStartupParameter.m_strDefaultGCM); ini.Get("Core", "DVDRoot", &m_LocalCoreStartupParameter.m_strDVDRoot); ini.Get("Core", "Apploader", &m_LocalCoreStartupParameter.m_strApploader); - ini.Get("Core", "OptimizeQuantizers", &m_LocalCoreStartupParameter.bOptimizeQuantizers, true); ini.Get("Core", "EnableCheats", &m_LocalCoreStartupParameter.bEnableCheats, false); ini.Get("Core", "SelectedLanguage", &m_LocalCoreStartupParameter.SelectedLanguage, 0); ini.Get("Core", "MemcardA", &m_strMemoryCardA); diff --git a/Source/Core/Core/Src/CoreParameter.h b/Source/Core/Core/Src/CoreParameter.h index c3079a8835..eedeebfb89 100644 --- a/Source/Core/Core/Src/CoreParameter.h +++ b/Source/Core/Core/Src/CoreParameter.h @@ -63,7 +63,6 @@ struct SCoreStartupParameter bool bHLE_BS2; bool bUseFastMem; bool bLockThreads; - bool bOptimizeQuantizers; bool bEnableCheats; bool bEnableIsoCache; diff --git a/Source/Core/Core/Src/HW/MemmapFunctions.cpp b/Source/Core/Core/Src/HW/MemmapFunctions.cpp index b14be8156b..28e44baecf 100644 --- a/Source/Core/Core/Src/HW/MemmapFunctions.cpp +++ b/Source/Core/Core/Src/HW/MemmapFunctions.cpp @@ -126,7 +126,7 @@ inline void hwWriteIOBridge(u32 var, u32 addr) {WII_IOBridge::Write32(var, addr) inline void hwWriteIOBridge(u64 var, u32 addr) {PanicAlert("hwWriteIOBridge: There's no 64-bit HW write. %08x", addr);} template -void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag) +inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag) { // TODO: Figure out the fastest order of tests for both read and write (they are probably different). if ((em_address & 0xC8000000) == 0xC8000000) @@ -204,7 +204,7 @@ void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XC template -void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag) +inline void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag) { /* Debugging: CheckForBadAddresses##_type(em_address, data, false);*/ if ((em_address & 0xC8000000) == 0xC8000000) @@ -343,13 +343,6 @@ u16 Read_U16(const u32 _Address) u32 Read_U32(const u32 _Address) { - /*#if MAX_LOGLEVEL >= 4 - if (_Address == 0x00000000) - { - //PanicAlert("Program tried to read from [00000000]"); - //return 0x00000000; - } - #endif*/ u32 _var = 0; ReadFromHardware(_var, _Address, _Address, FLAG_READ); #ifdef ENABLE_MEM_CHECK diff --git a/Source/Core/Core/Src/LuaInterface.cpp b/Source/Core/Core/Src/LuaInterface.cpp index ef71d5355f..1fedd31a05 100644 --- a/Source/Core/Core/Src/LuaInterface.cpp +++ b/Source/Core/Core/Src/LuaInterface.cpp @@ -2751,7 +2751,6 @@ DEFINE_LUA_FUNCTION(emulua_loadrom, "filename") // General settings game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread); game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle); - game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers); game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF); game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack); // Wii settings diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 5280f0e0ad..40d68e0655 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -42,6 +42,7 @@ #include "../PPCAnalyst.h" #include "../JitCommon/JitCache.h" +#include "../JitCommon/Jit_Util.h" #include "JitRegCache.h" #include "x64Emitter.h" #include "x64Analyzer.h" @@ -93,7 +94,7 @@ public: }; -class Jit64 : public Gen::XCodeBlock +class Jit64 : public EmuCodeBlock { private: struct JitState @@ -182,26 +183,14 @@ public: void WriteRfiExitDestInEAX(); void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); - - void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); - void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0); - void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset); - void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); - void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); void GenerateCarry(Gen::X64Reg temp_reg); - void ForceSinglePrecisionS(Gen::X64Reg xmm); - void ForceSinglePrecisionP(Gen::X64Reg xmm); - void JitClearCA(); - void JitSetCA(); void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); - // OPCODES void unknown_instruction(UGeckoInstruction _inst); void Default(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 67567e9931..60c98721a7 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -216,61 +216,6 @@ void AsmRoutineManager::Generate() GenerateCommon(); } - -void AsmRoutineManager::GenFifoWrite(int size) -{ - // Assume value in ABI_PARAM1 - PUSH(ESI); - if (size != 32) - PUSH(EDX); - BSWAP(size, ABI_PARAM1); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - if (size != 32) { - MOV(32, R(EDX), R(ABI_PARAM1)); - MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX)); - } else { - MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1)); - } - ADD(32, R(ESI), Imm8(size >> 3)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - if (size != 32) - POP(EDX); - POP(ESI); - RET(); -} - -void AsmRoutineManager::GenFifoFloatWrite() -{ - // Assume value in XMM0 - PUSH(ESI); - PUSH(EDX); - MOVSS(M(&temp32), XMM0); - MOV(32, R(EDX), M(&temp32)); - BSWAP(32, EDX); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX)); - ADD(32, R(ESI), Imm8(4)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(EDX); - POP(ESI); - RET(); -} - -void AsmRoutineManager::GenFifoXmm64Write() -{ - // Assume value in XMM0. Assume pre-byteswapped (unlike the others here!) - PUSH(ESI); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0); - ADD(32, R(ESI), Imm8(8)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(ESI); - RET(); -} - void AsmRoutineManager::GenerateCommon() { // USES_CR @@ -298,7 +243,9 @@ void AsmRoutineManager::GenerateCommon() fifoDirectWriteXmm64 = AlignCode4(); GenFifoXmm64Write(); - computeRcFp = AlignCode16(); + GenQuantizedLoads(); + GenQuantizedStores(); + //CMPSD(R(XMM0), M(&zero), // TODO diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h index 923bee3469..ebfc871227 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.h @@ -19,6 +19,7 @@ #define _JITASM_H #include "x64Emitter.h" +#include "../JitCommon/JitAsmCommon.h" // In Dolphin, we don't use inline assembly. Instead, we generate all machine-near // code at runtime. In the case of fixed code like this, after writing it, we write @@ -34,14 +35,11 @@ // To add a new asm routine, just add another const here, and add the code to Generate. // Also, possibly increase the size of the code buffer. -class AsmRoutineManager : public Gen::XCodeBlock +class AsmRoutineManager : public CommonAsmRoutines { private: void Generate(); void GenerateCommon(); - void GenFifoWrite(int size); - void GenFifoFloatWrite(); - void GenFifoXmm64Write(); public: void Init() { @@ -65,7 +63,6 @@ public: const u8 *fpException; const u8 *computeRc; - const u8 *computeRcFp; const u8 *testExceptions; const u8 *dispatchPcInEAX; const u8 *doTiming; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h index 6f28b4af72..43566e189e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitRegCache.h @@ -70,7 +70,6 @@ protected: PPCCachedReg saved_regs[32]; X64CachedReg saved_xregs[NUMXREGS]; - void DiscardRegContentsIfCached(int preg); virtual const int *GetAllocationOrder(int &count) = 0; XEmitter *emit; @@ -79,6 +78,7 @@ public: virtual ~RegCache() {} virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0; + void DiscardRegContentsIfCached(int preg); void SetEmitter(XEmitter *emitter) {emit = emitter;} void FlushR(X64Reg reg); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 758e547537..718e0ebe59 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -39,7 +39,7 @@ const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0}; static u64 GC_ALIGNED16(temp64); - + // TODO(ector): Improve 64-bit version static void WriteDual32(u64 value, u32 address) { @@ -95,27 +95,23 @@ void Jit64::psq_st(UGeckoInstruction inst) JITDISABLE(LoadStorePaired) js.block_flags |= BLOCK_USE_GQR0 << inst.I; - if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) + if (js.blockSetsQuantizers || !inst.RA) { - Default(inst); - return; - } - if (!inst.RA) - { - // This really should never happen. Unless we change this to also support stwux + // TODO: Support these cases if it becomes necessary. Default(inst); return; } - const UGQR gqr(rSPR(SPR_GQR0 + inst.I)); - const EQuantizeType stType = static_cast(gqr.ST_TYPE); - int stScale = gqr.ST_SCALE; bool update = inst.OPCD == 61; int offset = inst.SIMM_12; int a = inst.RA; int s = inst.RS; // Fp numbers + const UGQR gqr(rSPR(SPR_GQR0 + inst.I)); + const EQuantizeType stType = static_cast(gqr.ST_TYPE); + int stScale = gqr.ST_SCALE; + if (inst.W) { // PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update); // It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only @@ -165,9 +161,11 @@ void Jit64::psq_st(UGeckoInstruction inst) Default(inst); return; } - return; } + // Is this specialization still worth it? Let's keep it for now. It's probably + // not very risky since a game most likely wouldn't use the same code to process + // floats as integers (but you never know....). if (stType == QUANTIZE_FLOAT) { if (gpr.R(a).IsImm() && !update && cpu_info.bSSSE3) @@ -182,115 +180,30 @@ void Jit64::psq_st(UGeckoInstruction inst) return; } } + } - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); - gpr.Lock(a); - fpr.Lock(s); - if (update) - gpr.LoadToX64(a, true, true); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); - TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); - if (update && offset) - MOV(32, gpr.R(a), R(ABI_PARAM2)); - CVTPD2PS(XMM0, fpr.R(s)); - SHUFPS(XMM0, R(XMM0), 1); - MOVQ_xmm(M(&temp64), XMM0); -#ifdef _M_X64 - MOV(64, R(ABI_PARAM1), M(&temp64)); - FixupBranch argh = J_CC(CC_NZ); - BSWAP(64, ABI_PARAM1); - MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - CALL(thunks.ProtectFunction((void *)&WriteDual32, 0)); + gpr.FlushLockX(EAX, EDX); + gpr.FlushLockX(ECX); + if (update) + gpr.LoadToX64(inst.RA, true, true); + fpr.LoadToX64(inst.RS, true); + MOV(32, R(ECX), gpr.R(inst.RA)); + if (offset) + ADD(32, R(ECX), Imm32((u32)offset)); + if (update && offset) + MOV(32, gpr.R(a), R(ECX)); + MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I])); + MOVZX(32, 8, EDX, R(AL)); + // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! +#ifdef _M_IX86 + SHL(32, R(EDX), Imm8(2)); #else - FixupBranch argh = J_CC(CC_NZ); - MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4)); - BSWAP(32, ABI_PARAM1); - AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1)); - MOV(32, R(ABI_PARAM1), M(&temp64)); - BSWAP(32, ABI_PARAM1); - MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1)); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4)); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); - MOV(32, R(ABI_PARAM1), M(((char*)&temp64))); - ADD(32, R(ABI_PARAM2), Imm32(4)); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); + SHL(32, R(EDX), Imm8(3)); #endif - SetJumpTarget(arg2); - gpr.UnlockAll(); - gpr.UnlockAllX(); - fpr.UnlockAll(); - } - else if (stType == QUANTIZE_U8) - { - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); - gpr.Lock(a); - fpr.Lock(s); - if (update) - gpr.LoadToX64(a, true, update); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); - if (update && offset) - MOV(32, gpr.R(a), R(ABI_PARAM2)); - MOVAPD(XMM0, fpr.R(s)); - MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); - MULPD(XMM0, R(XMM1)); - CVTPD2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - PACKUSWB(XMM0, R(XMM0)); - MOVD_xmm(M(&temp64), XMM0); - MOV(16, R(ABI_PARAM1), M(&temp64)); -#ifdef _M_X64 - MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); -#else - MOV(32, R(EAX), R(ABI_PARAM2)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1)); -#endif - if (update) - MOV(32, gpr.R(a), R(ABI_PARAM2)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - fpr.UnlockAll(); - } - else if (stType == QUANTIZE_S16) - { - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); - gpr.Lock(a); - fpr.Lock(s); - if (update) - gpr.LoadToX64(a, true, update); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); - if (update) - MOV(32, gpr.R(a), R(ABI_PARAM2)); - MOVAPD(XMM0, fpr.R(s)); - MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale])); - MULPD(XMM0, R(XMM1)); - SHUFPD(XMM0, R(XMM0), 1); - CVTPD2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - MOVD_xmm(M(&temp64), XMM0); - MOV(32, R(ABI_PARAM1), M(&temp64)); - SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0); - gpr.UnlockAll(); - gpr.UnlockAllX(); - fpr.UnlockAll(); - } - else { - // Dodger uses this. - // mario tennis - //PanicAlert("st %i:%i", stType, inst.W); - Default(inst); - } + CVTPD2PS(XMM0, fpr.R(s)); + CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized)); + gpr.UnlockAll(); + gpr.UnlockAllX(); } void Jit64::psq_l(UGeckoInstruction inst) @@ -300,144 +213,35 @@ void Jit64::psq_l(UGeckoInstruction inst) js.block_flags |= BLOCK_USE_GQR0 << inst.I; - if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) + if (js.blockSetsQuantizers || !inst.RA || inst.W) { Default(inst); return; } - const UGQR gqr(rSPR(SPR_GQR0 + inst.I)); - const EQuantizeType ldType = static_cast(gqr.LD_TYPE); - int ldScale = gqr.LD_SCALE; bool update = inst.OPCD == 57; - if (!inst.RA || inst.W) - { - // 0 1 during load - //PanicAlert("ld:%i %i", ldType, (int)inst.W); - Default(inst); - return; - } int offset = inst.SIMM_12; - switch (ldType) { - case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address. - { -#ifdef _M_X64 - gpr.LoadToX64(inst.RA, true, update); - fpr.LoadToX64(inst.RS, false); - if (cpu_info.bSSSE3) { - X64Reg xd = fpr.R(inst.RS).GetSimpleReg(); - MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); - PSHUFB(xd, M((void *)pbswapShuffle2x4)); - CVTPS2PD(xd, R(xd)); - } else { - MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); - BSWAP(64, RAX); - MOV(64, M(&psTemp[0]), R(RAX)); - X64Reg r = fpr.R(inst.RS).GetSimpleReg(); - CVTPS2PD(r, M(&psTemp[0])); - SHUFPD(r, R(r), 1); - } - if (update && offset != 0) - ADD(32, gpr.R(inst.RA), Imm32(offset)); - break; -#else - if (cpu_info.bSSSE3) { - gpr.LoadToX64(inst.RA, true, update); - fpr.LoadToX64(inst.RS, false); - X64Reg xd = fpr.R(inst.RS).GetSimpleReg(); - MOV(32, R(EAX), gpr.R(inst.RA)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset)); - PSHUFB(xd, M((void *)pbswapShuffle2x4)); - CVTPS2PD(xd, R(xd)); - } else { - gpr.FlushLockX(ECX); - gpr.LoadToX64(inst.RA, true, update); - // This can probably be optimized somewhat. - LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base)); - BSWAP(32, RAX); - MOV(32, M(&psTemp[0]), R(RAX)); - MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4)); - BSWAP(32, RAX); - MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX)); - fpr.LoadToX64(inst.RS, false, true); - X64Reg r = fpr.R(inst.RS).GetSimpleReg(); - CVTPS2PD(r, M(&psTemp[0])); - gpr.UnlockAllX(); - } - if (update && offset != 0) - ADD(32, gpr.R(inst.RA), Imm32(offset)); - break; -#endif - } - case QUANTIZE_U8: - { - gpr.LoadToX64(inst.RA, true, update); -#ifdef _M_X64 - MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); -#else - LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base)); -#endif - MOV(32, M(&temp64), R(EAX)); - MOVD_xmm(XMM0, M(&temp64)); - // SSE4 optimization opportunity here. - PXOR(XMM1, R(XMM1)); - PUNPCKLBW(XMM0, R(XMM1)); - PUNPCKLWD(XMM0, R(XMM1)); - CVTDQ2PD(XMM0, R(XMM0)); - fpr.LoadToX64(inst.RS, false, true); - X64Reg r = fpr.R(inst.RS).GetSimpleReg(); - MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale])); - MULPD(r, R(XMM0)); - if (update && offset != 0) - ADD(32, gpr.R(inst.RA), Imm32(offset)); - } - break; - case QUANTIZE_S16: - { - gpr.LoadToX64(inst.RA, true, update); -#ifdef _M_X64 - MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); -#else - LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base)); -#endif - BSWAP(32, EAX); - MOV(32, M(&temp64), R(EAX)); - fpr.LoadToX64(inst.RS, false, true); - X64Reg r = fpr.R(inst.RS).GetSimpleReg(); - MOVD_xmm(XMM0, M(&temp64)); - PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword.. - PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P - CVTDQ2PD(XMM0, R(XMM0)); - MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale])); - MULPD(r, R(XMM0)); - SHUFPD(r, R(r), 1); - if (update && offset != 0) - ADD(32, gpr.R(inst.RA), Imm32(offset)); - } - break; - /* - Dynamic quantizer. Todo when we have a test set. - MOVZX(32, 8, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]) + 3)); // it's in the high byte. - AND(32, R(EAX), Imm8(0x3F)); - MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD)); - MOVDDUP(r, MComplex(RCX, EAX, 8, 0)); - */ - default: - // 4 0 - // 6 0 //power tennis - // 5 0 - // PanicAlert("ld:%i %i", ldType, (int)inst.W); - Default(inst); - return; - } - - //u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12; + gpr.FlushLockX(EAX, EDX); + gpr.FlushLockX(ECX); + gpr.LoadToX64(inst.RA, true, true); + fpr.LoadToX64(inst.RS, false, true); + if (offset) + LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset)); + else + MOV(32, R(ECX), gpr.R(inst.RA)); + if (update && offset) + MOV(32, gpr.R(inst.RA), R(ECX)); + MOVZX(32, 16, EAX, M(((char *)&GQR(inst.I)) + 2)); + MOVZX(32, 8, EDX, R(AL)); + // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! (MComplex can do this, no?) +#ifdef _M_IX86 + SHL(32, R(EDX), Imm8(2)); +#else + SHL(32, R(EDX), Imm8(3)); +#endif + CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized)); + CVTPS2PD(fpr.RX(inst.RS), R(XMM0)); + gpr.UnlockAll(); + gpr.UnlockAllX(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h index e0838321a0..b6b163a113 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h @@ -32,6 +32,7 @@ #include "../PPCAnalyst.h" #include "../JitCommon/JitCache.h" +#include "../JitCommon/Jit_Util.h" #include "x64Emitter.h" #include "x64Analyzer.h" #include "IR.h" @@ -85,7 +86,7 @@ public: }; -class Jit64 : public Gen::XCodeBlock +class Jit64 : public EmuCodeBlock { private: struct JitState @@ -175,19 +176,10 @@ public: void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); - void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); - void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0); - void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset); - void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); void GenerateCarry(Gen::X64Reg temp_reg); - void ForceSinglePrecisionS(Gen::X64Reg xmm); - void ForceSinglePrecisionP(Gen::X64Reg xmm); - void JitClearCA(); - void JitSetCA(); void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp index 329e103524..0723b97a01 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp @@ -215,403 +215,6 @@ void AsmRoutineManager::Generate() GenerateCommon(); } -const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; - -const float m_quantizeTableS[] = -{ - (1 << 0), (1 << 1), (1 << 2), (1 << 3), - (1 << 4), (1 << 5), (1 << 6), (1 << 7), - (1 << 8), (1 << 9), (1 << 10), (1 << 11), - (1 << 12), (1 << 13), (1 << 14), (1 << 15), - (1 << 16), (1 << 17), (1 << 18), (1 << 19), - (1 << 20), (1 << 21), (1 << 22), (1 << 23), - (1 << 24), (1 << 25), (1 << 26), (1 << 27), - (1 << 28), (1 << 29), (1 << 30), (1 << 31), - 1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29), - 1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25), - 1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21), - 1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17), - 1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13), - 1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9), - 1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5), - 1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1), -}; - -const float m_dequantizeTableS[] = -{ - 1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3), - 1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7), - 1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11), - 1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15), - 1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19), - 1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23), - 1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27), - 1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31), - (1ULL << 32), (1 << 31), (1 << 30), (1 << 29), - (1 << 28), (1 << 27), (1 << 26), (1 << 25), - (1 << 24), (1 << 23), (1 << 22), (1 << 21), - (1 << 20), (1 << 19), (1 << 18), (1 << 17), - (1 << 16), (1 << 15), (1 << 14), (1 << 13), - (1 << 12), (1 << 11), (1 << 10), (1 << 9), - (1 << 8), (1 << 7), (1 << 6), (1 << 5), - (1 << 4), (1 << 3), (1 << 2), (1 << 1), -}; - -float psTemp[2]; - -const float m_65535 = 65535.0f; - - -#define QUANTIZE_OVERFLOW_SAFE - -// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range -// while it's OK for large negatives, it isn't for positives -// I don't know whether the overflow actually happens in any games -// but it potentially can cause problems, so we need some clamping - -// TODO(ector): Improve 64-bit version -static void WriteDual32(u64 value, u32 address) -{ - Memory::Write_U32((u32)(value >> 32), address); - Memory::Write_U32((u32)value, address + 4); -} - -void AsmRoutineManager::GenQuantizedStores() { - const u8* storePairedIllegal = AlignCode4(); - UD2(); - const u8* storePairedFloat = AlignCode4(); - // IN: value = XMM0, two singles in bottom. PPC address = ECX. -#ifdef _M_X64 - // INT3(); - MOVQ_xmm(M(&psTemp[0]), XMM0); - MOV(64, R(RAX), M(&psTemp[0])); - //INT3(); - //MOVQ_xmm(R(RAX), XMM0); - //INT3(); - ROL(64, R(RAX), Imm8(32)); // Swap the two - the big BSWAP will unswap. - TEST(32, R(ECX), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); - BSWAP(64, RAX); - MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX); - SetJumpTarget(arg2); -#else - MOVQ_xmm(M(&psTemp[0]), XMM0); - TEST(32, R(ECX), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); - MOV(32, R(EAX), M(&psTemp)); - BSWAP(32, EAX); - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX)); - MOV(32, R(EAX), M(((char*)&psTemp) + 4)); - BSWAP(32, EAX); - MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX)); - FixupBranch arg2 = J(); - SetJumpTarget(argh); - MOV(32, R(EAX), M(((char*)&psTemp))); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX); - MOV(32, R(EAX), M(((char*)&psTemp)+4)); - ADD(32, R(ECX), Imm32(4)); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX); - SetJumpTarget(arg2); -#endif - RET(); - - const u8* storePairedU8 = AlignCode4(); - //INT3(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE - MOVSS(XMM1, M((void *)&m_65535)); - PUNPCKLDQ(XMM1, R(XMM1)); - MINPS(XMM0, R(XMM1)); -#endif - CVTPS2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - PACKUSWB(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); -#ifdef _M_X64 - MOV(16, MComplex(RBX, RCX, 1, 0), R(AX)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(16, MDisp(ECX, (u32)Memory::base), R(AX)); -#endif - RET(); - - const u8* storePairedS8 = AlignCode4(); - //INT3(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE - MOVSS(XMM1, M((void *)&m_65535)); - PUNPCKLDQ(XMM1, R(XMM1)); - MINPS(XMM0, R(XMM1)); -#endif - CVTPS2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - PACKSSWB(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); -#ifdef _M_X64 - MOV(16, MComplex(RBX, RCX, 1, 0), R(AX)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(16, MDisp(ECX, (u32)Memory::base), R(AX)); -#endif - RET(); - - const u8* storePairedU16 = AlignCode4(); - //INT3(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); - - // PACKUSDW is available only in SSE4 - PXOR(XMM1, R(XMM1)); - MAXPS(XMM0, R(XMM1)); - MOVSS(XMM1, M((void *)&m_65535)); - PUNPCKLDQ(XMM1, R(XMM1)); - MINPS(XMM0, R(XMM1)); - - CVTPS2DQ(XMM0, R(XMM0)); - MOVQ_xmm(M(psTemp), XMM0); - // place ps[0] into the higher word, ps[1] into the lower - // so no need in ROL after BSWAP - MOVZX(32, 16, EAX, M((char*)psTemp + 0)); - SHL(32, R(EAX), Imm8(16)); - MOV(16, R(AX), M((char*)psTemp + 4)); - - BSWAP(32, EAX); - //ROL(32, R(EAX), Imm8(16)); -#ifdef _M_X64 - MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX)); -#endif - RET(); - - const u8* storePairedS16 = AlignCode4(); - //INT3(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE - MOVSS(XMM1, M((void *)&m_65535)); - PUNPCKLDQ(XMM1, R(XMM1)); - MINPS(XMM0, R(XMM1)); -#endif - CVTPS2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); - BSWAP(32, EAX); - ROL(32, R(EAX), Imm8(16)); -#ifdef _M_X64 - MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX)); -#endif - RET(); - - pairedStoreQuantized[0] = storePairedFloat; - pairedStoreQuantized[1] = storePairedIllegal; - pairedStoreQuantized[2] = storePairedIllegal; - pairedStoreQuantized[3] = storePairedIllegal; - pairedStoreQuantized[4] = storePairedU8; - pairedStoreQuantized[5] = storePairedU16; - pairedStoreQuantized[6] = storePairedS8; - pairedStoreQuantized[7] = storePairedS16; -} - -void AsmRoutineManager::GenQuantizedLoads() { - const u8* loadPairedIllegal = AlignCode4(); - UD2(); - const u8* loadPairedFloat = AlignCode4(); - if (cpu_info.bSSSE3) { -#ifdef _M_X64 - MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base)); -#endif - PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); - } else { -#ifdef _M_X64 - MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0)); - BSWAP(64, RCX); - ROL(64, R(RCX), Imm8(32)); - MOVQ_xmm(XMM0, R(RCX)); -#else -#if 0 - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base)); - PXOR(XMM1, R(XMM1)); - PSHUFLW(XMM0, R(XMM0), 0xB1); - MOVAPD(XMM1, R(XMM0)); - PSRLW(XMM0, 8); - PSLLW(XMM1, 8); - POR(XMM0, R(XMM1)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base)); - BSWAP(32, EAX); - MOV(32, M(&psTemp[0]), R(RAX)); - MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4)); - BSWAP(32, EAX); - MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX)); - MOVQ_xmm(XMM0, M(&psTemp[0])); -#endif -#endif - } - RET(); - - const u8* loadPairedU8 = AlignCode4(); -#ifdef _M_X64 - MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base)); -#endif - MOVD_xmm(XMM0, R(ECX)); - PXOR(XMM1, R(XMM1)); - PUNPCKLBW(XMM0, R(XMM1)); - PUNPCKLWD(XMM0, R(XMM1)); - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedS8 = AlignCode4(); -#ifdef _M_X64 - MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base)); -#endif - MOVD_xmm(XMM0, R(ECX)); - PUNPCKLBW(XMM0, R(XMM0)); - PUNPCKLWD(XMM0, R(XMM0)); - PSRAD(XMM0, 24); - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedU16 = AlignCode4(); -#ifdef _M_X64 - MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base)); -#endif - BSWAP(32, ECX); - ROL(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); - PXOR(XMM1, R(XMM1)); - PUNPCKLWD(XMM0, R(XMM1)); - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedS16 = AlignCode4(); -#ifdef _M_X64 - MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0)); -#else - AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base)); -#endif - BSWAP(32, ECX); - ROL(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); - PUNPCKLWD(XMM0, R(XMM0)); - PSRAD(XMM0, 16); - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - AND(32, R(EAX), Imm32(0xFC)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); - PUNPCKLDQ(XMM1, R(XMM1)); - MULPS(XMM0, R(XMM1)); - RET(); - - pairedLoadQuantized[0] = loadPairedFloat; - pairedLoadQuantized[1] = loadPairedIllegal; - pairedLoadQuantized[2] = loadPairedIllegal; - pairedLoadQuantized[3] = loadPairedIllegal; - pairedLoadQuantized[4] = loadPairedU8; - pairedLoadQuantized[5] = loadPairedU16; - pairedLoadQuantized[6] = loadPairedS8; - pairedLoadQuantized[7] = loadPairedS16; -} - -void AsmRoutineManager::GenFifoWrite(int size) -{ - // Assume value in ABI_PARAM1 - PUSH(ESI); - if (size != 32) - PUSH(EDX); - BSWAP(size, ABI_PARAM1); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - if (size != 32) { - MOV(32, R(EDX), R(ABI_PARAM1)); - MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX)); - } else { - MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1)); - } - ADD(32, R(ESI), Imm8(size >> 3)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - if (size != 32) - POP(EDX); - POP(ESI); - RET(); -} - -void AsmRoutineManager::GenFifoFloatWrite() -{ - // Assume value in XMM0 - PUSH(ESI); - PUSH(EDX); - MOVSS(M(&temp32), XMM0); - MOV(32, R(EDX), M(&temp32)); - BSWAP(32, EDX); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX)); - ADD(32, R(ESI), Imm8(4)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(EDX); - POP(ESI); - RET(); -} - -void AsmRoutineManager::GenFifoXmm64Write() -{ - // Assume value in XMM0. Assume pre-byteswapped (unlike the others here!) - PUSH(ESI); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); - MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0); - ADD(32, R(ESI), Imm8(8)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(ESI); - RET(); -} - void AsmRoutineManager::GenerateCommon() { // USES_CR @@ -649,7 +252,6 @@ void AsmRoutineManager::GenerateCommon() GenQuantizedLoads(); GenQuantizedStores(); - computeRcFp = AlignCode16(); //CMPSD(R(XMM0), M(&zero), // TODO diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h index fb296b2cf9..051c486290 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.h @@ -19,6 +19,7 @@ #define _JITASM_H #include "x64Emitter.h" +#include "../JitCommon/JitAsmCommon.h" // In Dolphin, we don't use inline assembly. Instead, we generate all machine-near // code at runtime. In the case of fixed code like this, after writing it, we write @@ -34,16 +35,11 @@ // To add a new asm routine, just add another const here, and add the code to Generate. // Also, possibly increase the size of the code buffer. -class AsmRoutineManager : public Gen::XCodeBlock +class AsmRoutineManager : public CommonAsmRoutines { private: void Generate(); void GenerateCommon(); - void GenFifoWrite(int size); - void GenFifoFloatWrite(); - void GenFifoXmm64Write(); // yes, 32 & 64-bit compatible - void GenQuantizedLoads(); - void GenQuantizedStores(); public: void Init() { @@ -67,7 +63,6 @@ public: const u8 *fpException; const u8 *computeRc; - const u8 *computeRcFp; const u8 *testExceptions; const u8 *dispatchPcInEAX; const u8 *doTiming; @@ -82,8 +77,6 @@ public: const u8 *doReJit; - const u8 *pairedLoadQuantized[8]; - const u8 *pairedStoreQuantized[8]; bool compareEnabled; }; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp index b39a3e1193..9c486efb54 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStorePaired.cpp @@ -15,9 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only. -// Should give a very noticable speed boost to paired single heavy code. - #include "Common.h" #include "Thunk.h" @@ -39,9 +36,8 @@ void Jit64::psq_st(UGeckoInstruction inst) { INSTRUCTION_START - DISABLE64 JITDISABLE(LoadStorePaired) - if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;} + if (inst.W) {Default(inst); return;} IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val; if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); @@ -55,9 +51,8 @@ void Jit64::psq_st(UGeckoInstruction inst) void Jit64::psq_l(UGeckoInstruction inst) { INSTRUCTION_START - DISABLE64 JITDISABLE(LoadStorePaired) - if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;} + if (inst.W) {Default(inst); return;} IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val; if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp new file mode 100644 index 0000000000..ad8e5c31d5 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -0,0 +1,394 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "ABI.h" +#include "Thunk.h" +#include "CPUDetect.h" +#include "x64Emitter.h" + +#include "../../HW/Memmap.h" + +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "MemoryUtil.h" + +#include "ABI.h" +#include "../JitCommon/JitCache.h" + +#include "../../HW/GPFifo.h" +#include "../../Core.h" +#include "JitAsmCommon.h" + +using namespace Gen; + +static int temp32; + +void CommonAsmRoutines::GenFifoWrite(int size) +{ + // Assume value in ABI_PARAM1 + PUSH(ESI); + if (size != 32) + PUSH(EDX); + BSWAP(size, ABI_PARAM1); + MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); + if (size != 32) { + MOV(32, R(EDX), R(ABI_PARAM1)); + MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX)); + } else { + MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1)); + } + ADD(32, R(ESI), Imm8(size >> 3)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); + if (size != 32) + POP(EDX); + POP(ESI); + RET(); +} + +void CommonAsmRoutines::GenFifoFloatWrite() +{ + // Assume value in XMM0 + PUSH(ESI); + PUSH(EDX); + MOVSS(M(&temp32), XMM0); + MOV(32, R(EDX), M(&temp32)); + BSWAP(32, EDX); + MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); + MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX)); + ADD(32, R(ESI), Imm8(4)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); + POP(EDX); + POP(ESI); + RET(); +} + +void CommonAsmRoutines::GenFifoXmm64Write() +{ + // Assume value in XMM0. Assume pre-byteswapped (unlike the others here!) + PUSH(ESI); + MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); + MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0); + ADD(32, R(ESI), Imm8(8)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); + POP(ESI); + RET(); +} + +// Safe + Fast Quantizers, originally from JITIL by magumagu + +static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15}; + +static const float GC_ALIGNED16(m_quantizeTableS[]) = +{ + (1 << 0), (1 << 1), (1 << 2), (1 << 3), + (1 << 4), (1 << 5), (1 << 6), (1 << 7), + (1 << 8), (1 << 9), (1 << 10), (1 << 11), + (1 << 12), (1 << 13), (1 << 14), (1 << 15), + (1 << 16), (1 << 17), (1 << 18), (1 << 19), + (1 << 20), (1 << 21), (1 << 22), (1 << 23), + (1 << 24), (1 << 25), (1 << 26), (1 << 27), + (1 << 28), (1 << 29), (1 << 30), (1 << 31), + 1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29), + 1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25), + 1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21), + 1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17), + 1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13), + 1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9), + 1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5), + 1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1), +}; + +static const float GC_ALIGNED16(m_dequantizeTableS[]) = +{ + 1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3), + 1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7), + 1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11), + 1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15), + 1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19), + 1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23), + 1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27), + 1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31), + (1ULL << 32), (1 << 31), (1 << 30), (1 << 29), + (1 << 28), (1 << 27), (1 << 26), (1 << 25), + (1 << 24), (1 << 23), (1 << 22), (1 << 21), + (1 << 20), (1 << 19), (1 << 18), (1 << 17), + (1 << 16), (1 << 15), (1 << 14), (1 << 13), + (1 << 12), (1 << 11), (1 << 10), (1 << 9), + (1 << 8), (1 << 7), (1 << 6), (1 << 5), + (1 << 4), (1 << 3), (1 << 2), (1 << 1), +}; + +static float GC_ALIGNED16(psTemp[4]); + +static const float m_65535 = 65535.0f; + + +#define QUANTIZE_OVERFLOW_SAFE + +// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range +// while it's OK for large negatives, it isn't for positives +// I don't know whether the overflow actually happens in any games +// but it potentially can cause problems, so we need some clamping + +// TODO(ector): Improve 64-bit version +static void WriteDual32(u64 value, u32 address) +{ + Memory::Write_U32((u32)(value >> 32), address); + Memory::Write_U32((u32)value, address + 4); +} + +// See comment in header for in/outs. +void CommonAsmRoutines::GenQuantizedStores() { + const u8* storePairedIllegal = AlignCode4(); + UD2(); + const u8* storePairedFloat = AlignCode4(); + +#ifdef _M_X64 + SHUFPS(XMM0, R(XMM0), 1); + MOVQ_xmm(M(&psTemp[0]), XMM0); + MOV(64, R(RAX), M(&psTemp[0])); + TEST(32, R(ECX), Imm32(0x0C000000)); + FixupBranch too_complex = J_CC(CC_NZ); + BSWAP(64, RAX); + MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); + FixupBranch skip_complex = J(); + SetJumpTarget(too_complex); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX); + SetJumpTarget(skip_complex); + RET(); +#else + MOVQ_xmm(M(&psTemp[0]), XMM0); + TEST(32, R(ECX), Imm32(0x0C000000)); + FixupBranch argh = J_CC(CC_NZ); + MOV(32, R(EAX), M(&psTemp)); + BSWAP(32, EAX); + AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX)); + MOV(32, R(EAX), M(((char*)&psTemp) + 4)); + BSWAP(32, EAX); + MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX)); + FixupBranch arg2 = J(); + SetJumpTarget(argh); + MOV(32, R(EAX), M(((char*)&psTemp))); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX); + MOV(32, R(EAX), M(((char*)&psTemp)+4)); + ADD(32, R(ECX), Imm32(4)); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX); + SetJumpTarget(arg2); + RET(); +#endif + + const u8* storePairedU8 = AlignCode4(); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); +#ifdef QUANTIZE_OVERFLOW_SAFE + MOVSS(XMM1, M((void *)&m_65535)); + PUNPCKLDQ(XMM1, R(XMM1)); + MINPS(XMM0, R(XMM1)); +#endif + CVTPS2DQ(XMM0, R(XMM0)); + PACKSSDW(XMM0, R(XMM0)); + PACKUSWB(XMM0, R(XMM0)); + MOVD_xmm(R(EAX), XMM0); + SafeWriteRegToReg(AX, ECX, 16, 0, false); + + RET(); + + const u8* storePairedS8 = AlignCode4(); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); +#ifdef QUANTIZE_OVERFLOW_SAFE + MOVSS(XMM1, M((void *)&m_65535)); + PUNPCKLDQ(XMM1, R(XMM1)); + MINPS(XMM0, R(XMM1)); +#endif + CVTPS2DQ(XMM0, R(XMM0)); + PACKSSDW(XMM0, R(XMM0)); + PACKSSWB(XMM0, R(XMM0)); + MOVD_xmm(R(EAX), XMM0); + + SafeWriteRegToReg(AX, ECX, 16, 0, false); + + RET(); + + const u8* storePairedU16 = AlignCode4(); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); + + // PACKUSDW is available only in SSE4 + PXOR(XMM1, R(XMM1)); + MAXPS(XMM0, R(XMM1)); + MOVSS(XMM1, M((void *)&m_65535)); + PUNPCKLDQ(XMM1, R(XMM1)); + MINPS(XMM0, R(XMM1)); + + CVTPS2DQ(XMM0, R(XMM0)); + MOVQ_xmm(M(psTemp), XMM0); + // place ps[0] into the higher word, ps[1] into the lower + // so no need in ROL after BSWAP + MOVZX(32, 16, EAX, M((char*)psTemp + 0)); + SHL(32, R(EAX), Imm8(16)); + MOV(16, R(AX), M((char*)psTemp + 4)); + + BSWAP(32, EAX); + SafeWriteRegToReg(EAX, ECX, 32, 0, false); + + RET(); + + const u8* storePairedS16 = AlignCode4(); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + // SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though. + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); +#ifdef QUANTIZE_OVERFLOW_SAFE + MOVSS(XMM1, M((void *)&m_65535)); + PUNPCKLDQ(XMM1, R(XMM1)); + MINPS(XMM0, R(XMM1)); +#endif + CVTPS2DQ(XMM0, R(XMM0)); + PACKSSDW(XMM0, R(XMM0)); + MOVD_xmm(R(EAX), XMM0); + BSWAP(32, EAX); + ROL(32, R(EAX), Imm8(16)); + SafeWriteRegToReg(EAX, ECX, 32, 0, false); + + RET(); + + pairedStoreQuantized[0] = storePairedFloat; + pairedStoreQuantized[1] = storePairedIllegal; + pairedStoreQuantized[2] = storePairedIllegal; + pairedStoreQuantized[3] = storePairedIllegal; + pairedStoreQuantized[4] = storePairedU8; + pairedStoreQuantized[5] = storePairedU16; + pairedStoreQuantized[6] = storePairedS8; + pairedStoreQuantized[7] = storePairedS16; +} + +void CommonAsmRoutines::GenQuantizedLoads() { + const u8* loadPairedIllegal = AlignCode4(); + UD2(); + const u8* loadPairedFloat = AlignCode4(); + if (cpu_info.bSSSE3) { +#ifdef _M_X64 + MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); +#else + AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); + MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base)); +#endif + PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); + } else { +#ifdef _M_X64 + MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0)); + BSWAP(64, RCX); + ROL(64, R(RCX), Imm8(32)); + MOVQ_xmm(XMM0, R(RCX)); +#else +#if 0 + AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); + MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base)); + PXOR(XMM1, R(XMM1)); + PSHUFLW(XMM0, R(XMM0), 0xB1); + MOVAPD(XMM1, R(XMM0)); + PSRLW(XMM0, 8); + PSLLW(XMM1, 8); + POR(XMM0, R(XMM1)); +#else + AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base)); + BSWAP(32, EAX); + MOV(32, M(&psTemp[0]), R(RAX)); + MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4)); + BSWAP(32, EAX); + MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX)); + MOVQ_xmm(XMM0, M(&psTemp[0])); +#endif +#endif + } + RET(); + + const u8* loadPairedU8 = AlignCode4(); + UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); + MOVD_xmm(XMM0, R(ECX)); + PXOR(XMM1, R(XMM1)); + PUNPCKLBW(XMM0, R(XMM1)); + PUNPCKLWD(XMM0, R(XMM1)); + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedS8 = AlignCode4(); + UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); + MOVD_xmm(XMM0, R(ECX)); + PUNPCKLBW(XMM0, R(XMM0)); + PUNPCKLWD(XMM0, R(XMM0)); + PSRAD(XMM0, 24); + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedU16 = AlignCode4(); + UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); + ROL(32, R(ECX), Imm8(16)); + MOVD_xmm(XMM0, R(ECX)); + PXOR(XMM1, R(XMM1)); + PUNPCKLWD(XMM0, R(XMM1)); + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedS16 = AlignCode4(); + UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); + ROL(32, R(ECX), Imm8(16)); + MOVD_xmm(XMM0, R(ECX)); + PUNPCKLWD(XMM0, R(XMM0)); + PSRAD(XMM0, 16); + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(EAX), Imm8(6)); + AND(32, R(EAX), Imm32(0xFC)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); + RET(); + + pairedLoadQuantized[0] = loadPairedFloat; + pairedLoadQuantized[1] = loadPairedIllegal; + pairedLoadQuantized[2] = loadPairedIllegal; + pairedLoadQuantized[3] = loadPairedIllegal; + pairedLoadQuantized[4] = loadPairedU8; + pairedLoadQuantized[5] = loadPairedU16; + pairedLoadQuantized[6] = loadPairedS8; + pairedLoadQuantized[7] = loadPairedS16; +} diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h new file mode 100644 index 0000000000..f84fa76fbf --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h @@ -0,0 +1,47 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _JITASMCOMMON_H +#define _JITASMCOMMON_H + +#include "../JitCommon/Jit_Util.h" + +class CommonAsmRoutines : public EmuCodeBlock { +protected: + void GenQuantizedLoads(); + void GenQuantizedStores(); +public: + void GenFifoWrite(int size); + void GenFifoXmm64Write(); + void GenFifoFloatWrite(); + + // In: array index: GQR to use. + // In: ECX: Address to read from. + // Out: XMM0: Bottom two 32-bit slots hold the read value, + // converted to a pair of floats. + // Trashes: EAX ECX EDX + const u8 GC_ALIGNED16(*pairedLoadQuantized[8]); + + // In: array index: GQR to use. + // In: ECX: Address to write to. + // In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written. + // Out: Nothing. + // Trashes: EAX ECX EDX + const u8 GC_ALIGNED16(*pairedStoreQuantized[8]); +}; + +#endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp index 98f067fbd3..e15ba7d5df 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp @@ -288,7 +288,7 @@ bool JitBlock::ContainsAddress(u32 em_address) block_numbers->push_back(i); } - u32 JitBlockCache::GetOriginalFirstOp(u32 block_num) + u32 JitBlockCache::GetOriginalFirstOp(int block_num) { if (block_num >= num_blocks) { @@ -298,9 +298,9 @@ bool JitBlock::ContainsAddress(u32 em_address) return blocks[block_num].originalFirstOpcode; } - CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber) + CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int block_num) { - return (CompiledCode)blockCodePointers[blockNumber]; + return (CompiledCode)blockCodePointers[block_num]; } //Block linker @@ -351,25 +351,25 @@ bool JitBlock::ContainsAddress(u32 em_address) } } - void JitBlockCache::DestroyBlock(int blocknum, bool invalidate) + void JitBlockCache::DestroyBlock(int block_num, bool invalidate) { - if (blocknum < 0 || blocknum >= num_blocks) + if (block_num < 0 || block_num >= num_blocks) { - PanicAlert("DestroyBlock: Invalid block number %d", blocknum); + PanicAlert("DestroyBlock: Invalid block number %d", block_num); return; } - JitBlock &b = blocks[blocknum]; + JitBlock &b = blocks[block_num]; if (b.invalid) { if (invalidate) - PanicAlert("Invalidating invalid block %d", blocknum); + PanicAlert("Invalidating invalid block %d", block_num); return; } b.invalid = true; #ifdef JIT_UNLIMITED_ICACHE Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode); #else - if (Memory::ReadFast32(b.originalAddress) == blocknum) + if (Memory::ReadFast32(b.originalAddress) == block_num) Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress); #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h index e72bb6b4e9..20f9d759bb 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h @@ -130,12 +130,12 @@ public: // This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime. void GetBlockNumbersFromAddress(u32 em_address, std::vector *block_numbers); - u32 GetOriginalFirstOp(u32 block_num); - CompiledCode GetCompiledCodeFromBlock(int blockNumber); + u32 GetOriginalFirstOp(int block_num); + CompiledCode GetCompiledCodeFromBlock(int block_num); // DOES NOT WORK CORRECTLY WITH INLINING void InvalidateICache(u32 em_address); - void DestroyBlock(int blocknum, bool invalidate); + void DestroyBlock(int block_num, bool invalidate); // Not currently used //void DestroyBlocksWithFlag(BlockFlag death_flag); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 8f3e643468..74784e7d30 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -39,17 +39,17 @@ using namespace Gen; -void Jit64::JitClearCA() +void EmuCodeBlock::JitClearCA() { AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } -void Jit64::JitSetCA() +void EmuCodeBlock::JitSetCA() { OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 } -void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) +void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { #ifdef _M_IX86 AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); @@ -74,7 +74,17 @@ void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize } } -void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) +void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset) +{ +#ifdef _M_IX86 + AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); + MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset)); +#else + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); +#endif +} + +void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) { if (offset) ADD(32, R(reg), Imm32((u32)offset)); @@ -96,12 +106,12 @@ void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signEx SetJumpTarget(arg2); } -void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) +void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { if (accessSize == 8 && reg_value >= 4) { PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); } - BSWAP(accessSize, reg_value); + if (swap) BSWAP(accessSize, reg_value); #ifdef _M_IX86 AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); @@ -111,7 +121,7 @@ void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSiz } // Destroys both arg registers -void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset) +void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { if (offset) ADD(32, R(reg_addr), Imm32(offset)); @@ -125,11 +135,11 @@ void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, } FixupBranch arg2 = J(); SetJumpTarget(argh); - UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0); + UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); SetJumpTarget(arg2); } -void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address) +void EmuCodeBlock::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address) { #ifdef _M_X64 MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg); @@ -138,7 +148,7 @@ void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 ad #endif } -void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address) +void EmuCodeBlock::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address) { #ifdef _M_X64 MOV(32, R(RAX), Imm32(address)); @@ -148,18 +158,18 @@ void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address) #endif } -void Jit64::ForceSinglePrecisionS(X64Reg xmm) { +void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { // Most games don't need these. Zelda requires it though - some platforms get stuck without them. - if (jo.accurateSinglePrecision) + if (jit.jo.accurateSinglePrecision) { CVTSD2SS(xmm, R(xmm)); CVTSS2SD(xmm, R(xmm)); } } -void Jit64::ForceSinglePrecisionP(X64Reg xmm) { +void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) { // Most games don't need these. Zelda requires it though - some platforms get stuck without them. - if (jo.accurateSinglePrecision) + if (jit.jo.accurateSinglePrecision) { CVTPD2PS(xmm, R(xmm)); CVTPS2PD(xmm, R(xmm)); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h new file mode 100644 index 0000000000..4fad3db64a --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -0,0 +1,41 @@ +// Copyright (C) 2003 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _JITUTIL_H +#define _JITUTIL_H + +#include "x64Emitter.h" + +// Like XCodeBlock but has some utilities for memory access. +class EmuCodeBlock : public Gen::XCodeBlock { +public: + void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false); + void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); + void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); + void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false); + void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true); + + void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address); + void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address); + void JitClearCA(); + void JitSetCA(); + + void ForceSinglePrecisionS(Gen::X64Reg xmm); + void ForceSinglePrecisionP(Gen::X64Reg xmm); +}; + +#endif // _JITUTIL_H diff --git a/Source/Core/Core/Src/SConscript b/Source/Core/Core/Src/SConscript index c3b03c6e9f..3ea24ce9b0 100644 --- a/Source/Core/Core/Src/SConscript +++ b/Source/Core/Core/Src/SConscript @@ -85,7 +85,8 @@ files = ["ActionReplay.cpp", "PowerPC/Interpreter/Interpreter_LoadStore.cpp", "PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp", "PowerPC/Interpreter/Interpreter_SystemRegisters.cpp", - "PowerPC/Interpreter/Interpreter_Tables.cpp", + "PowerPC/Interpreter/Interpreter_Tables.cpp", + "PowerPC/JitCommon/JitAsmCommon.cpp", "PowerPC/JitCommon/JitCache.cpp", "PowerPC/JitCommon/JitBackpatch.cpp", "PowerPC/JitCommon/Jit_Util.cpp", diff --git a/Source/Core/DolphinWX/Src/BootManager.cpp b/Source/Core/DolphinWX/Src/BootManager.cpp index cec4521124..10bef0d7bd 100644 --- a/Source/Core/DolphinWX/Src/BootManager.cpp +++ b/Source/Core/DolphinWX/Src/BootManager.cpp @@ -122,7 +122,6 @@ bool BootCore(const std::string& _rFilename) // General settings game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread); game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle); - game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers); game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF); game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack); // Wii settings diff --git a/Source/Core/DolphinWX/Src/ConfigMain.cpp b/Source/Core/DolphinWX/Src/ConfigMain.cpp index 233a5f6937..2abeab78eb 100644 --- a/Source/Core/DolphinWX/Src/ConfigMain.cpp +++ b/Source/Core/DolphinWX/Src/ConfigMain.cpp @@ -60,13 +60,12 @@ EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_LEDS, CConfigMain::CoreSettingsChanged) EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_SPEAKERS, CConfigMain::CoreSettingsChanged) EVT_CHOICE(ID_INTERFACE_LANG, CConfigMain::CoreSettingsChanged) -EVT_CHECKBOX(ID_ALLWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged) +EVT_CHECKBOX(ID_ALWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged) EVT_RADIOBUTTON(ID_RADIOJIT, CConfigMain::CoreSettingsChanged) EVT_RADIOBUTTON(ID_RADIOINT, CConfigMain::CoreSettingsChanged) EVT_CHECKBOX(ID_CPUTHREAD, CConfigMain::CoreSettingsChanged) EVT_CHECKBOX(ID_DSPTHREAD, CConfigMain::CoreSettingsChanged) EVT_CHECKBOX(ID_LOCKTHREADS, CConfigMain::CoreSettingsChanged) -EVT_CHECKBOX(ID_OPTIMIZEQUANTIZERS, CConfigMain::CoreSettingsChanged) EVT_CHECKBOX(ID_IDLESKIP, CConfigMain::CoreSettingsChanged) EVT_CHECKBOX(ID_ENABLECHEATS, CConfigMain::CoreSettingsChanged) EVT_CHOICE(ID_FRAMELIMIT, CConfigMain::CoreSettingsChanged) @@ -142,7 +141,6 @@ void CConfigMain::UpdateGUI() CPUThread->Disable(); DSPThread->Disable(); LockThreads->Disable(); - OptimizeQuantizers->Disable(); SkipIdle->Disable(); EnableCheats->Disable(); @@ -222,15 +220,13 @@ void CConfigMain::CreateGUIControls() // Core Settings - Advanced // - AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALLWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator); + AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator); AlwaysHLE_BS2->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2); m_RadioJIT = new wxRadioButton(GeneralPage, ID_RADIOJIT, wxT("JIT Recompiler (recommended)")); m_RadioInt = new wxRadioButton(GeneralPage, ID_RADIOINT, wxT("Interpreter (very slow)")); SConfig::GetInstance().m_LocalCoreStartupParameter.bUseJIT ? m_RadioJIT->SetValue(true) : m_RadioInt->SetValue(true); LockThreads = new wxCheckBox(GeneralPage, ID_LOCKTHREADS, wxT("Lock threads to cores"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator); LockThreads->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads); - OptimizeQuantizers = new wxCheckBox(GeneralPage, ID_OPTIMIZEQUANTIZERS, wxT("Optimize Quantizers (speedup)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator); - OptimizeQuantizers->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers); DSPThread = new wxCheckBox(GeneralPage, ID_DSPTHREAD, wxT("DSP on thread (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator); DSPThread->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bDSPThread); @@ -317,7 +313,6 @@ void CConfigMain::CreateGUIControls() sizerCoreType->Add(m_RadioInt, 0, wxALL | wxEXPAND, 5); sbAdvanced->Add(sizerCoreType, 0, wxALL, 5); sbAdvanced->Add(LockThreads, 0, wxALL, 5); - sbAdvanced->Add(OptimizeQuantizers, 0, wxALL, 5); sbAdvanced->Add(DSPThread, 0, wxALL, 5); sCore->Add(sbBasic, 0, wxEXPAND); sCore->AddStretchSpacer(); @@ -690,7 +685,7 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event) case ID_FRAMELIMIT: SConfig::GetInstance().m_Framelimit = (u32)Framelimit->GetSelection(); break; - case ID_ALLWAYS_HLE_BS2: // Core + case ID_ALWAYS_HLE_BS2: // Core SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2 = AlwaysHLE_BS2->IsChecked(); break; case ID_RADIOJIT: @@ -710,9 +705,6 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event) case ID_LOCKTHREADS: SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads = LockThreads->IsChecked(); break; - case ID_OPTIMIZEQUANTIZERS: - SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers = OptimizeQuantizers->IsChecked(); - break; case ID_IDLESKIP: SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle = SkipIdle->IsChecked(); break; diff --git a/Source/Core/DolphinWX/Src/ConfigMain.h b/Source/Core/DolphinWX/Src/ConfigMain.h index 79da4bdb66..584aec8640 100644 --- a/Source/Core/DolphinWX/Src/ConfigMain.h +++ b/Source/Core/DolphinWX/Src/ConfigMain.h @@ -68,7 +68,6 @@ private: wxCheckBox* CPUThread; wxCheckBox* DSPThread; wxCheckBox* LockThreads; - wxCheckBox* OptimizeQuantizers; wxCheckBox* SkipIdle; wxCheckBox* EnableCheats; @@ -159,13 +158,12 @@ private: ID_PATHSPAGE, ID_PLUGINPAGE, - ID_ALLWAYS_HLE_BS2, + ID_ALWAYS_HLE_BS2, ID_RADIOJIT, ID_RADIOINT, ID_CPUTHREAD, ID_DSPTHREAD, ID_LOCKTHREADS, - ID_OPTIMIZEQUANTIZERS, ID_IDLESKIP, ID_ENABLECHEATS, diff --git a/Source/Core/DolphinWX/Src/ISOProperties.cpp b/Source/Core/DolphinWX/Src/ISOProperties.cpp index 3b30c614b1..b6f14eafe6 100644 --- a/Source/Core/DolphinWX/Src/ISOProperties.cpp +++ b/Source/Core/DolphinWX/Src/ISOProperties.cpp @@ -290,7 +290,6 @@ void CISOProperties::CreateGUIControls(bool IsWad) sbCoreOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Core")); CPUThread = new wxCheckBox(m_GameConfig, ID_USEDUALCORE, _("Enable Dual Core"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator); SkipIdle = new wxCheckBox(m_GameConfig, ID_IDLESKIP, _("Enable Idle Skipping"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator); - OptimizeQuantizers = new wxCheckBox(m_GameConfig, ID_OPTIMIZEQUANTIZERS, _("Optimize Quantizers"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator); TLBHack = new wxCheckBox(m_GameConfig, ID_TLBHACK, _("TLB Hack"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator); // Wii Console sbWiiOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Wii Console")); @@ -347,7 +346,6 @@ void CISOProperties::CreateGUIControls(bool IsWad) sbCoreOverrides->Add(CPUThread, 0, wxEXPAND|wxLEFT, 5); sbCoreOverrides->Add(SkipIdle, 0, wxEXPAND|wxLEFT, 5); sbCoreOverrides->Add(TLBHack, 0, wxEXPAND|wxLEFT, 5); - sbCoreOverrides->Add(OptimizeQuantizers, 0, wxEXPAND|wxLEFT, 5); sbWiiOverrides->Add(EnableProgressiveScan, 0, wxEXPAND|wxLEFT, 5); sbWiiOverrides->Add(EnableWideScreen, 0, wxEXPAND|wxLEFT, 5); sbVideoOverrides->Add(ForceFiltering, 0, wxEXPAND|wxLEFT, 5); @@ -806,11 +804,6 @@ void CISOProperties::LoadGameConfig() else SkipIdle->Set3StateValue(wxCHK_UNDETERMINED); - if (GameIni.Get("Core", "OptimizeQuantizers", &bTemp)) - OptimizeQuantizers->Set3StateValue((wxCheckBoxState)bTemp); - else - OptimizeQuantizers->Set3StateValue(wxCHK_UNDETERMINED); - if (GameIni.Get("Core", "TLBHack", &bTemp)) TLBHack->Set3StateValue((wxCheckBoxState)bTemp); else @@ -896,11 +889,6 @@ bool CISOProperties::SaveGameConfig() else GameIni.Set("Core", "SkipIdle", SkipIdle->Get3StateValue()); - if (OptimizeQuantizers->Get3StateValue() == wxCHK_UNDETERMINED) - GameIni.DeleteKey("Core", "OptimizeQuantizers"); - else - GameIni.Set("Core", "OptimizeQuantizers", OptimizeQuantizers->Get3StateValue()); - if (TLBHack->Get3StateValue() == wxCHK_UNDETERMINED) GameIni.DeleteKey("Core", "TLBHack"); else diff --git a/Source/Core/DolphinWX/Src/ISOProperties.h b/Source/Core/DolphinWX/Src/ISOProperties.h index bee7ad7742..347b2ce637 100644 --- a/Source/Core/DolphinWX/Src/ISOProperties.h +++ b/Source/Core/DolphinWX/Src/ISOProperties.h @@ -81,7 +81,7 @@ class CISOProperties : public wxDialog wxStaticText *OverrideText; // Core - wxCheckBox *CPUThread, *SkipIdle, *OptimizeQuantizers, *TLBHack, *BPHack; + wxCheckBox *CPUThread, *SkipIdle, *TLBHack, *BPHack; // Wii wxCheckBox *EnableProgressiveScan, *EnableWideScreen; // Video @@ -172,7 +172,6 @@ class CISOProperties : public wxDialog ID_RE0FIX, ID_ENABLEPROGRESSIVESCAN, ID_ENABLEWIDESCREEN, - ID_OPTIMIZEQUANTIZERS, ID_EDITCONFIG, ID_EMUSTATE_TEXT, ID_EMUSTATE,