From e7f49692e875e080f4bca6653c4f6c43e5b4db48 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 20 Aug 2014 10:50:40 -0400 Subject: [PATCH] Core: Clean up body/brace placements in Jit64 and JitCommon --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 8 +- Source/Core/Core/PowerPC/Jit64/Jit.h | 12 +- Source/Core/Core/PowerPC/Jit64/JitAsm.h | 6 +- .../Core/Core/PowerPC/Jit64/JitRegCache.cpp | 29 ++- Source/Core/Core/PowerPC/Jit64/JitRegCache.h | 27 ++- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 3 +- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 10 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 188 ++++++++++++------ .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 121 +++++++---- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 7 +- Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 17 +- .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 21 +- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 8 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 8 +- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 12 +- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 87 +++++--- 16 files changed, 397 insertions(+), 167 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index e6d20a0b7e..2894429cd8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -162,7 +162,9 @@ void Jit64::Init() jo.enableBlocklink = false; } else + { jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU; + } } jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF; jo.optimizeGatherPipe = true; @@ -435,7 +437,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) { + if (Profiler::g_ProfileBlocks) + { ADD(32, M(&b->runCount), Imm8(1)); #ifdef _WIN32 b->ticCounter = 0; @@ -617,7 +620,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc //NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str()); } #endif - if (js.skipnext) { + if (js.skipnext) + { js.skipnext = false; i++; // Skip next instruction } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 3ea3ec81ab..95fbcd1f7b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -68,18 +68,22 @@ public: void ClearCache() override; - const u8 *GetDispatcher() { + const u8 *GetDispatcher() + { return asm_routines.dispatcher; } - const CommonAsmRoutines *GetAsmRoutines() override { + + const CommonAsmRoutines *GetAsmRoutines() override + { return &asm_routines; } - const char *GetName() override { + const char *GetName() override + { return "JIT64"; } - // Run! + // Run! void Run() override; void SingleStep() override; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index 19679247ec..e3cc4371f7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -27,13 +27,15 @@ private: void GenerateCommon(); public: - void Init() { + void Init() + { AllocCodeSpace(8192); Generate(); WriteProtect(); } - void Shutdown() { + void Shutdown() + { FreeCodeSpace(); } }; diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index 28e9de7441..119c41612b 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -52,21 +52,35 @@ void RegCache::Start() void RegCache::Lock(int p1, int p2, int p3, int p4) { regs[p1].locked = true; - if (p2 != 0xFF) regs[p2].locked = true; - if (p3 != 0xFF) regs[p3].locked = true; - if (p4 != 0xFF) regs[p4].locked = true; + + if (p2 != 0xFF) + regs[p2].locked = true; + + if (p3 != 0xFF) + regs[p3].locked = true; + + if (p4 != 0xFF) + regs[p4].locked = true; } // these are x64 reg indices void RegCache::LockX(int x1, int x2, int x3, int x4) { - if (xregs[x1].locked) { + if (xregs[x1].locked) + { PanicAlert("RegCache: x %i already locked!", x1); } + xregs[x1].locked = true; - if (x2 != 0xFF) xregs[x2].locked = true; - if (x3 != 0xFF) xregs[x3].locked = true; - if (x4 != 0xFF) xregs[x4].locked = true; + + if (x2 != 0xFF) + xregs[x2].locked = true; + + if (x3 != 0xFF) + xregs[x3].locked = true; + + if (x4 != 0xFF) + xregs[x4].locked = true; } void RegCache::UnlockAll() @@ -321,6 +335,7 @@ void RegCache::Flush(FlushMode mode) { PanicAlert("Someone forgot to unlock PPC reg %" PRIx64 " (X64 reg %i).", i, RX(i)); } + if (regs[i].away) { if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm()) diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index d4e984f537..e01da5dc5f 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -47,23 +47,34 @@ protected: public: RegCache(); - virtual ~RegCache() {} + void Start(); void DiscardRegContentsIfCached(size_t preg); - void SetEmitter(Gen::XEmitter *emitter) {emit = emitter;} + void SetEmitter(Gen::XEmitter *emitter) + { + emit = emitter; + } void FlushR(Gen::X64Reg reg); - void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2) {FlushR(reg); FlushR(reg2);} - void FlushLockX(Gen::X64Reg reg) { + void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2) + { + FlushR(reg); + FlushR(reg2); + } + + void FlushLockX(Gen::X64Reg reg) + { FlushR(reg); LockX(reg); } - void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2) { + void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2) + { FlushR(reg1); FlushR(reg2); LockX(reg1); LockX(reg2); } + void Flush(FlushMode mode = FLUSH_ALL); void Flush(PPCAnalyst::CodeOp *op) {Flush();} int SanityCheck() const; @@ -76,7 +87,11 @@ public: virtual void StoreRegister(size_t preg, Gen::OpArg newLoc) = 0; virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0; - const Gen::OpArg &R(size_t preg) const {return regs[preg].location;} + const Gen::OpArg &R(size_t preg) const + { + return regs[preg].location; + } + Gen::X64Reg RX(size_t preg) const { if (IsBound(preg)) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 135e0c4f3c..8bef37cb51 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -67,7 +67,8 @@ void Jit64::bx(UGeckoInstruction inst) // If this is not the last instruction of a block, // we will skip the rest process. // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) { + if (!js.isLastInstruction) + { return; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index a35797b80c..7761c636d2 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -136,10 +136,13 @@ void Jit64::fmaddXX(UGeckoInstruction inst) fpr.BindToRegister(d, false); //YES it is necessary to dupe the result :( //TODO : analysis - does the top reg get used? If so, dupe, if not, don't. - if (single_precision) { + if (single_precision) + { ForceSinglePrecisionS(XMM0); MOVDDUP(fpr.RX(d), R(XMM0)); - } else { + } + else + { MOVSD(fpr.RX(d), R(XMM0)); } // SMB checks flags after this op. Let's lie. @@ -159,7 +162,8 @@ void Jit64::fsign(UGeckoInstruction inst) fpr.Lock(b, d); fpr.BindToRegister(d, true, true); MOVSD(XMM0, fpr.R(b)); - switch (inst.SUBOP10) { + switch (inst.SUBOP10) + { case 40: // fnegx PXOR(XMM0, M((void*)&psSignBits2)); break; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 6ee50b1a5c..9ca4dee930 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -137,10 +137,26 @@ void Jit64::ComputeRC(const Gen::OpArg & arg) } } -static u32 Add(u32 a, u32 b) {return a + b;} -static u32 Or (u32 a, u32 b) {return a | b;} -static u32 And(u32 a, u32 b) {return a & b;} -static u32 Xor(u32 a, u32 b) {return a ^ b;} +// Following static functions are used in conjunction with regimmop +static u32 Add(u32 a, u32 b) +{ + return a + b; +} + +static u32 Or(u32 a, u32 b) +{ + return a | b; +} + +static u32 And(u32 a, u32 b) +{ + return a & b; +} + +static u32 Xor(u32 a, u32 b) +{ + return a ^ b; +} void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) { @@ -196,7 +212,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) u32 d = inst.RD, a = inst.RA, s = inst.RS; switch (inst.OPCD) { - case 14: // addi + case 14: // addi // occasionally used as MOV - emulate, with immediate propagation if (gpr.R(a).IsImm() && d != a && a != 0) { @@ -244,18 +260,36 @@ void Jit64::reg_imm(UGeckoInstruction inst) regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); } break; - case 24: + case 24: // ori if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop - {NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one. + { + // Make the nop visible in the generated code. not much use but interesting if we see one. + NOP(); + return; + } regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); - break; //ori - case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris - case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break; - case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; - case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori - case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris - case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic - case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc + break; + case 25: // oris + regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); + break; + case 28: // andi + regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); + break; + case 29: // andis + regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); + break; + case 26: // xori + regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); + break; + case 27: // xoris + regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); + break; + case 12: // addic + regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); + break; + case 13: // addic_rc + regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); + break; default: FALLBACK_IF(true); } @@ -274,20 +308,23 @@ void Jit64::cmpXX(UGeckoInstruction inst) int test_crf = js.next_inst.BI >> 2; // Check if the next instruction is a branch - if it is, merge the two. if (((js.next_inst.OPCD == 16 /* bcx */) || - ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) || - ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) && - (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && - !(js.next_inst.BO & BO_DONT_CHECK_CONDITION)) { + ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) || + ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) && + (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && + !(js.next_inst.BO & BO_DONT_CHECK_CONDITION)) + { // Looks like a decent conditional branch that we can merge with. // It only test CR, not CTR. - if (test_crf == crf) { + if (test_crf == crf) + { merge_branch = true; } } OpArg comparand; bool signedCompare; - if (inst.OPCD == 31) { + if (inst.OPCD == 31) + { // cmp / cmpl gpr.Lock(a, b); comparand = gpr.R(b); @@ -402,6 +439,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset)); else MOVSX(64, 32, RAX, gpr.R(a)); + if (!comparand.IsImm()) { MOVSX(64, 32, ABI_PARAM1, comparand); @@ -419,6 +457,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) MOV(32, R(ABI_PARAM1), comparand); else MOVZX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); } SUB(64, R(RAX), comparand); @@ -466,6 +505,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) { if (js.next_inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, R(EAX), M(&CTR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); WriteExitDestInEAX(); @@ -474,8 +514,10 @@ void Jit64::cmpXX(UGeckoInstruction inst) { MOV(32, R(EAX), M(&LR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); + if (js.next_inst.LK) MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + WriteExitDestInEAX(); } else @@ -506,22 +548,23 @@ void Jit64::boolX(UGeckoInstruction inst) if (gpr.R(s).IsImm() && gpr.R(b).IsImm()) { - if (inst.SUBOP10 == 28) /* andx */ + if (inst.SUBOP10 == 28) // andx gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (u32)gpr.R(b).offset); - else if (inst.SUBOP10 == 476) /* nandx */ + else if (inst.SUBOP10 == 476) // nandx gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset & (u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 60) /* andcx */ + else if (inst.SUBOP10 == 60) // andcx gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (~(u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 444) /* orx */ + else if (inst.SUBOP10 == 444) // orx gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (u32)gpr.R(b).offset); - else if (inst.SUBOP10 == 124) /* norx */ + else if (inst.SUBOP10 == 124) // norx gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset | (u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 412) /* orcx */ + else if (inst.SUBOP10 == 412) // orcx gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (~(u32)gpr.R(b).offset)); - else if (inst.SUBOP10 == 316) /* xorx */ + else if (inst.SUBOP10 == 316) // xorx gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset); - else if (inst.SUBOP10 == 284) /* eqvx */ + else if (inst.SUBOP10 == 284) // eqvx gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset)); + if (inst.Rc) { ComputeRC(gpr.R(a)); @@ -575,16 +618,16 @@ void Jit64::boolX(UGeckoInstruction inst) OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s)); gpr.BindToRegister(a, true, true); - if (inst.SUBOP10 == 28) /* andx */ + if (inst.SUBOP10 == 28) // andx { AND(32, gpr.R(a), operand); } - else if (inst.SUBOP10 == 476) /* nandx */ + else if (inst.SUBOP10 == 476) // nandx { AND(32, gpr.R(a), operand); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 60) /* andcx */ + else if (inst.SUBOP10 == 60) // andcx { if (a == b) { @@ -598,16 +641,16 @@ void Jit64::boolX(UGeckoInstruction inst) AND(32, gpr.R(a), R(EAX)); } } - else if (inst.SUBOP10 == 444) /* orx */ + else if (inst.SUBOP10 == 444) // orx { OR(32, gpr.R(a), operand); } - else if (inst.SUBOP10 == 124) /* norx */ + else if (inst.SUBOP10 == 124) // norx { OR(32, gpr.R(a), operand); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 412) /* orcx */ + else if (inst.SUBOP10 == 412) // orcx { if (a == b) { @@ -621,11 +664,11 @@ void Jit64::boolX(UGeckoInstruction inst) OR(32, gpr.R(a), R(EAX)); } } - else if (inst.SUBOP10 == 316) /* xorx */ + else if (inst.SUBOP10 == 316) // xorx { XOR(32, gpr.R(a), operand); } - else if (inst.SUBOP10 == 284) /* eqvx */ + else if (inst.SUBOP10 == 284) // eqvx { NOT(32, gpr.R(a)); XOR(32, gpr.R(a), operand); @@ -643,46 +686,46 @@ void Jit64::boolX(UGeckoInstruction inst) gpr.Lock(a,s,b); gpr.BindToRegister(a, false, true); - if (inst.SUBOP10 == 28) /* andx */ + if (inst.SUBOP10 == 28) // andx { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); } - else if (inst.SUBOP10 == 476) /* nandx */ + else if (inst.SUBOP10 == 476) // nandx { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 60) /* andcx */ + else if (inst.SUBOP10 == 60) // andcx { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); AND(32, gpr.R(a), gpr.R(s)); } - else if (inst.SUBOP10 == 444) /* orx */ + else if (inst.SUBOP10 == 444) // orx { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); } - else if (inst.SUBOP10 == 124) /* norx */ + else if (inst.SUBOP10 == 124) // norx { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); } - else if (inst.SUBOP10 == 412) /* orcx */ + else if (inst.SUBOP10 == 412) // orcx { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); OR(32, gpr.R(a), gpr.R(s)); } - else if (inst.SUBOP10 == 316) /* xorx */ + else if (inst.SUBOP10 == 316) // xorx { MOV(32, gpr.R(a), gpr.R(s)); XOR(32, gpr.R(a), gpr.R(b)); } - else if (inst.SUBOP10 == 284) /* eqvx */ + else if (inst.SUBOP10 == 284) // eqvx { MOV(32, gpr.R(a), gpr.R(s)); NOT(32, gpr.R(a)); @@ -992,13 +1035,25 @@ void Jit64::mulli(UGeckoInstruction inst) else if ((imm & (imm - 1)) == 0) { u32 shift = 0; - if (imm & 0xFFFF0000) shift |= 16; - if (imm & 0xFF00FF00) shift |= 8; - if (imm & 0xF0F0F0F0) shift |= 4; - if (imm & 0xCCCCCCCC) shift |= 2; - if (imm & 0xAAAAAAAA) shift |= 1; + + if (imm & 0xFFFF0000) + shift |= 16; + + if (imm & 0xFF00FF00) + shift |= 8; + + if (imm & 0xF0F0F0F0) + shift |= 4; + + if (imm & 0xCCCCCCCC) + shift |= 2; + + if (imm & 0xAAAAAAAA) + shift |= 1; + if (d != a) MOV(32, gpr.R(d), gpr.R(a)); + if (shift) SHL(32, gpr.R(d), Imm8(shift)); } @@ -1047,13 +1102,25 @@ void Jit64::mullwx(UGeckoInstruction inst) else if ((imm & (imm - 1)) == 0 && !inst.OE) { u32 shift = 0; - if (imm & 0xFFFF0000) shift |= 16; - if (imm & 0xFF00FF00) shift |= 8; - if (imm & 0xF0F0F0F0) shift |= 4; - if (imm & 0xCCCCCCCC) shift |= 2; - if (imm & 0xAAAAAAAA) shift |= 1; + + if (imm & 0xFFFF0000) + shift |= 16; + + if (imm & 0xFF00FF00) + shift |= 8; + + if (imm & 0xF0F0F0F0) + shift |= 4; + + if (imm & 0xCCCCCCCC) + shift |= 2; + + if (imm & 0xAAAAAAAA) + shift |= 1; + if (d != src) MOV(32, gpr.R(d), gpr.R(src)); + if (shift) SHL(32, gpr.R(d), Imm8(shift)); } @@ -1554,6 +1621,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { ROL(32, gpr.R(a), Imm8(inst.SH)); } + if (!(inst.MB==0 && inst.ME==31)) { AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); @@ -1604,10 +1672,12 @@ void Jit64::rlwimix(UGeckoInstruction inst) { MOV(32, gpr.R(a), gpr.R(s)); } + if (inst.SH) { ROL(32, gpr.R(a), Imm8(inst.SH)); } + if (inst.Rc) { ComputeRC(gpr.R(a)); @@ -1637,6 +1707,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) AND(32, R(EAX), Imm32(mask)); XOR(32, gpr.R(a), R(EAX)); } + if (inst.Rc) ComputeRC(gpr.R(a)); } @@ -1700,6 +1771,7 @@ void Jit64::negx(UGeckoInstruction inst) { ComputeRC(gpr.R(d)); } + if (inst.OE) { GenerateConstantOverflow(gpr.R(d).offset == 0x80000000); @@ -1821,7 +1893,9 @@ void Jit64::srawx(UGeckoInstruction inst) SetJumpTarget(nocarry); gpr.UnlockAll(); gpr.UnlockAllX(); - if (inst.Rc) { + + if (inst.Rc) + { ComputeRC(gpr.R(a)); } } @@ -1888,8 +1962,10 @@ void Jit64::cntlzwx(UGeckoInstruction inst) u32 mask = 0x80000000; u32 i = 0; for (; i < 32; i++, mask >>= 1) + { if ((u32)gpr.R(s).offset & mask) break; + } gpr.SetImmediate32(a, i); } else diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 484c072166..735b163bbf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -30,26 +30,26 @@ void Jit64::lXXx(UGeckoInstruction inst) bool signExtend = false; switch (inst.OPCD) { - case 32: /* lwz */ - case 33: /* lwzu */ + case 32: // lwz + case 33: // lwzu accessSize = 32; signExtend = false; break; - case 34: /* lbz */ - case 35: /* lbzu */ + case 34: // lbz + case 35: // lbzu accessSize = 8; signExtend = false; break; - case 40: /* lhz */ - case 41: /* lhzu */ + case 40: // lhz + case 41: // lhzu accessSize = 16; signExtend = false; break; - case 42: /* lha */ - case 43: /* lhau */ + case 42: // lha + case 43: // lhau accessSize = 16; signExtend = true; break; @@ -57,25 +57,25 @@ void Jit64::lXXx(UGeckoInstruction inst) case 31: switch (inst.SUBOP10) { - case 23: /* lwzx */ - case 55: /* lwzux */ + case 23: // lwzx + case 55: // lwzux accessSize = 32; signExtend = false; break; - case 87: /* lbzx */ - case 119: /* lbzux */ + case 87: // lbzx + case 119: // lbzux accessSize = 8; signExtend = false; break; - case 279: /* lhzx */ - case 311: /* lhzux */ + case 279: // lhzx + case 311: // lhzux accessSize = 16; signExtend = false; break; - case 343: /* lhax */ - case 375: /* lhaux */ + case 343: // lhax + case 375: // lhaux accessSize = 16; signExtend = true; break; @@ -96,11 +96,11 @@ void Jit64::lXXx(UGeckoInstruction inst) // ... maybe the throttle one already do that :p // if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping()) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.OPCD == 32 && - (inst.hex & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && - Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) + inst.OPCD == 32 && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { // TODO(LinesPrower): // - Rewrite this! @@ -259,10 +259,18 @@ void Jit64::stX(UGeckoInstruction inst) int accessSize; switch (inst.OPCD & ~1) { - case 36: accessSize = 32; break; //stw - case 44: accessSize = 16; break; //sth - case 38: accessSize = 8; break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + case 36: // stw + accessSize = 32; + break; + case 44: // sth + accessSize = 16; + break; + case 38: // stb + accessSize = 8; + break; + default: + _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); + return; } if ((a == 0) || gpr.R(a).IsImm()) @@ -273,18 +281,27 @@ void Jit64::stX(UGeckoInstruction inst) addr += offset; if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + // Helps external systems know which instruction triggered the write + MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + gpr.FlushLockX(ABI_PARAM1); MOV(32, R(ABI_PARAM1), gpr.R(s)); if (update) gpr.SetImmediate32(a, addr); + + // No need to protect these, they don't touch any state + // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat switch (accessSize) { - // No need to protect these, they don't touch any state - // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - case 8: CALL((void *)asm_routines.fifoDirectWrite8); break; - case 16: CALL((void *)asm_routines.fifoDirectWrite16); break; - case 32: CALL((void *)asm_routines.fifoDirectWrite32); break; + case 8: + CALL((void *)asm_routines.fifoDirectWrite8); + break; + case 16: + CALL((void *)asm_routines.fifoDirectWrite16); + break; + case 32: + CALL((void *)asm_routines.fifoDirectWrite32); + break; } js.fifoBytesThisBlock += accessSize >> 3; gpr.UnlockAllX(); @@ -300,14 +317,22 @@ void Jit64::stX(UGeckoInstruction inst) } else { - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + // Helps external systems know which instruction triggered the write + MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + u32 registersInUse = RegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); break; - case 16: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); break; - case 8: ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; + case 32: + ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); + break; + case 16: + ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); + break; + case 8: + ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); + break; } ABI_PopRegistersAndAdjustStack(registersInUse, false); if (update) @@ -359,17 +384,29 @@ void Jit64::stXx(UGeckoInstruction inst) ADD(32, gpr.R(a), gpr.R(b)); MOV(32, R(EDX), gpr.R(a)); MEMCHECK_END - } else { + } + else + { MOV(32, R(EDX), gpr.R(a)); ADD(32, R(EDX), gpr.R(b)); } + int accessSize; - switch (inst.SUBOP10 & ~32) { - case 151: accessSize = 32; break; - case 407: accessSize = 16; break; - case 215: accessSize = 8; break; - default: PanicAlert("stXx: invalid access size"); - accessSize = 0; break; + switch (inst.SUBOP10 & ~32) + { + case 151: + accessSize = 32; + break; + case 407: + accessSize = 16; + break; + case 215: + accessSize = 8; + break; + default: + PanicAlert("stXx: invalid access size"); + accessSize = 0; + break; } MOV(32, R(ECX), gpr.R(s)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index fbff119cb7..1129d5e833 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -47,12 +47,15 @@ void Jit64::psq_st(UGeckoInstruction inst) MOVZX(32, 8, EDX, R(AL)); // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! - if (inst.W) { + if (inst.W) + { // One value PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); - } else { + } + else + { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 18bb56ebb9..6934f56d42 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -156,12 +156,21 @@ void Jit64::ps_arith(UGeckoInstruction inst) switch (inst.SUBOP5) { - case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div - case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub - case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add - case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul + case 18: // div + tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); + break; + case 20: // sub + tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); + break; + case 21: // add + tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); + break; + case 25: // mul + tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); + break; default: _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); + break; } } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 6e8a21ebfa..5b74980e0d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -247,13 +247,16 @@ void CommonAsmRoutines::GenQuantizedSingleStores() SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); /* - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3) + { PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); // TODO: SafeWriteFloat MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - } else { + } + else + { MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); @@ -320,10 +323,13 @@ void CommonAsmRoutines::GenQuantizedLoads() UD2(); const u8* loadPairedFloatTwo = AlignCode4(); - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3) + { MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); - } else { + } + else + { LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); ROL(64, R(RCX), Imm8(32)); MOVQ_xmm(XMM0, R(RCX)); @@ -331,11 +337,14 @@ void CommonAsmRoutines::GenQuantizedLoads() RET(); const u8* loadPairedFloatOne = AlignCode4(); - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3) + { MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); UNPCKLPS(XMM0, M((void*)m_one)); - } else { + } + else + { LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); MOVD_xmm(XMM0, R(RCX)); UNPCKLPS(XMM0, M((void*)m_one)); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 047901ad46..fcde1aebe5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -21,7 +21,8 @@ using namespace Gen; extern u8 *trampolineCodePtr; -static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { +static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) +{ u64 code_addr = (u64)codePtr; disassembler disasm; char disbuf[256]; @@ -61,9 +62,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); - if (info.displacement) { + + if (info.displacement) ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); - } + ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index dbf7b69ff6..2b927ba0d9 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -135,9 +135,9 @@ using namespace Gen; { // check if any endpoint is inside the other range if ((s1 >= s2 && s1 <= e2) || - (e1 >= s2 && e1 <= e2) || - (s2 >= s1 && s2 <= e1) || - (e2 >= s1 && e2 <= e1)) + (e1 >= s2 && e1 <= e2) || + (s2 >= s1 && s2 <= e1) || + (e2 >= s1 && e2 <= e1)) return true; else return false; @@ -360,11 +360,13 @@ using namespace Gen; } } } + void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) { XEmitter emit(location); emit.JMP(address, true); } + void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) { XEmitter emit((u8 *)location); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 2b0db88c64..0d711dcfa2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -42,7 +42,8 @@ struct JitBlock bool invalid; - struct LinkData { + struct LinkData + { u8 *exitPtrs; // to be able to rewrite the exit jum u32 exitAddress; bool linkStatus; // is it already linked? @@ -81,18 +82,22 @@ public: m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]); ClearAll(); } + void Set(u32 bit) { m_valid_block[bit / 32] |= 1u << (bit % 32); } + void Clear(u32 bit) { m_valid_block[bit / 32] &= ~(1u << (bit % 32)); } + void ClearAll() { memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); } + bool Test(u32 bit) { return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; @@ -125,7 +130,10 @@ class JitBaseBlockCache public: JitBaseBlockCache() : blockCodePointers(nullptr), blocks(nullptr), num_blocks(0), - iCache(nullptr), iCacheEx(nullptr), iCacheVMEM(nullptr) {} + iCache(nullptr), iCacheEx(nullptr), iCacheVMEM(nullptr) + { + } + int AllocateBlock(u32 em_address); void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index cc233846a5..9491187cdb 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -77,7 +77,8 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac // offsets with the wrong sign, so whatever. Since the original code // *could* try to wrap an address around, however, this is the correct // place to address the issue.) - if ((u32) offset >= 0x1000) { + if ((u32) offset >= 0x1000) + { LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset)); opAddress = R(reg_value); offset = 0; @@ -186,7 +187,9 @@ private: // then mask, then sign extend if needed (1 instr vs. 2/3). u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) + { MoveOpArgToReg(sbits, MDisp(EAX, 0)); + } else { m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); @@ -342,10 +345,18 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 64: ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); break; - case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break; - case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break; - case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break; + case 64: + ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); + break; + case 32: + ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); + break; + case 16: + ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); + break; + case 8: + ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); + break; } ABI_PopRegistersAndAdjustStack(registersInUse, false); @@ -373,11 +384,12 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { - u8 *result; - if (accessSize == 8 && reg_value >= 4) { + if (accessSize == 8 && reg_value >= 4) + { PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); } - result = GetWritableCodePtr(); + + u8* result = GetWritableCodePtr(); OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); if (swap) { @@ -396,6 +408,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc { MOV(accessSize, dest, R(reg_value)); } + return result; } @@ -450,10 +463,18 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); switch (accessSize) { - case 64: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); break; - case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break; - case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break; - case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break; + case 64: + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); + break; + case 32: + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); + break; + case 16: + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); + break; + case 8: + ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); + break; } ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); FixupBranch exit = J(); @@ -478,7 +499,8 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 a MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); } -void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { +void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) +{ // Most games don't need these. Zelda requires it though - some platforms get stuck without them. if (jit->jo.accurateSinglePrecision) { @@ -487,7 +509,8 @@ void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { } } -void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) { +void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) +{ // Most games don't need these. Zelda requires it though - some platforms get stuck without them. if (jit->jo.accurateSinglePrecision) { @@ -600,10 +623,13 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) MOVSD(XMM1, R(src)); FLD(64, M(&temp64)); CCFlags cond; - if (cpu_info.bSSE4_1) { + if (cpu_info.bSSE4_1) + { PTEST(XMM1, M((void *)&double_exponent)); cond = CC_NC; - } else { + } + else + { // emulate PTEST; checking FPU flags is incorrect because the NaN bits // are sticky (persist between instructions) MOVSD(XMM0, M((void *)&double_exponent)); @@ -619,9 +645,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) PANDN(XMM1, M((void *)&double_qnan_bit)); PSRLQ(XMM1, 29); - if (cpu_info.bAVX) { + if (cpu_info.bAVX) + { VPANDN(XMM0, XMM1, R(XMM0)); - } else { + } + else + { PANDN(XMM1, R(XMM0)); MOVSS(XMM0, R(XMM1)); } @@ -633,19 +662,26 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr) { - if (src_is_gpr) { + if (src_is_gpr) + { MOV(32, M(&temp32), R(src)); MOVD_xmm(XMM1, R(src)); - } else { + } + else + { MOVSS(M(&temp32), src); MOVSS(R(XMM1), src); } + FLD(32, M(&temp32)); CCFlags cond; - if (cpu_info.bSSE4_1) { + if (cpu_info.bSSE4_1) + { PTEST(XMM1, M((void *)&single_exponent)); cond = CC_NC; - } else { + } + else + { // emulate PTEST; checking FPU flags is incorrect because the NaN bits // are sticky (persist between instructions) MOVSS(XMM0, M((void *)&single_exponent)); @@ -661,9 +697,12 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr PANDN(XMM1, M((void *)&single_qnan_bit)); PSLLQ(XMM1, 29); - if (cpu_info.bAVX) { + if (cpu_info.bAVX) + { VPANDN(dst, XMM1, R(dst)); - } else { + } + else + { PANDN(XMM1, R(dst)); MOVSD(dst, R(XMM1)); }