mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 23:59:27 +01:00
JIT: Clean up float loads and stores.
Less code is good, and this should make future changes to memory handling easier.
This commit is contained in:
parent
07da9cbcf4
commit
06864e9fee
@ -1286,9 +1286,7 @@ void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
|
||||
}
|
||||
|
||||
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
|
||||
if (arg.IsSimpleReg())
|
||||
PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination");
|
||||
if (src > 7)
|
||||
if (src > 7 || arg.IsSimpleReg())
|
||||
{
|
||||
// Alternate encoding
|
||||
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
||||
|
@ -88,7 +88,7 @@ static GekkoOPTemplate primarytable[] =
|
||||
{51, &Jit64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{52, &Jit64::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{53, &Jit64::stfs}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{53, &Jit64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{54, &Jit64::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{55, &Jit64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
|
@ -2,9 +2,6 @@
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticeable speed boost to paired single heavy code.
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
@ -12,20 +9,8 @@
|
||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// pshufb todo: MOVQ
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||
|
||||
u64 GC_ALIGNED16(temp64);
|
||||
|
||||
}
|
||||
|
||||
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
||||
// and pshufb could help a lot.
|
||||
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
|
||||
|
||||
void Jit64::lfs(UGeckoInstruction inst)
|
||||
{
|
||||
@ -40,12 +25,11 @@ void Jit64::lfs(UGeckoInstruction inst)
|
||||
|
||||
SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, false);
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
fpr.BindToRegister(d, js.memcheck);
|
||||
|
||||
MEMCHECK_START
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
MEMCHECK_END
|
||||
|
||||
fpr.UnlockAll();
|
||||
@ -56,61 +40,23 @@ void Jit64::lfd(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(js.memcheck || !inst.RA);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
// TODO - optimize. This has to load the previous value - upper double should stay unmodified.
|
||||
|
||||
SafeLoadToReg(RAX, gpr.R(a), 64, offset, RegistersInUse(), false);
|
||||
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, true);
|
||||
X64Reg xd = fpr.RX(d);
|
||||
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
#if _M_X86_64
|
||||
MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe));
|
||||
MOVSD(xd, R(XMM0));
|
||||
} else {
|
||||
#if _M_X86_64
|
||||
LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
MOV(64, M(&temp64), R(EAX));
|
||||
MEMCHECK_START
|
||||
MOVQ_xmm(XMM0, R(RAX));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
MEMCHECK_END
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
MOVSD(XMM0, M(&temp64));
|
||||
MOVSD(xd, R(XMM0));
|
||||
|
||||
MEMCHECK_END
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVSD(XMM0, M(&temp64));
|
||||
MOVSD(xd, R(XMM0));
|
||||
|
||||
MEMCHECK_END
|
||||
#endif
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
@ -119,146 +65,49 @@ void Jit64::stfd(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(js.memcheck || !inst.RA);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
if (Core::g_CoreStartupParameter.bMMU ||
|
||||
Core::g_CoreStartupParameter.bTLBHack) {
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
}
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||
{
|
||||
mem_mask |= Memory::EXRAM_MASK;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
gpr.BindToRegister(a, true, false);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RAX), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RAX), fpr.R(s));
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
||||
TEST(32, R(ABI_PARAM1), Imm32(mem_mask));
|
||||
FixupBranch safe = J_CC(CC_NZ);
|
||||
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, RegistersInUse());
|
||||
|
||||
// Fast routine
|
||||
if (cpu_info.bSSSE3) {
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
PSHUFB(XMM0, M((void*)bswapShuffle1x8));
|
||||
#if _M_X86_64
|
||||
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0);
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0);
|
||||
#endif
|
||||
} else {
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);
|
||||
|
||||
PSRLQ(XMM0, 32);
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
|
||||
}
|
||||
FixupBranch exit = J(true);
|
||||
SetJumpTarget(safe);
|
||||
|
||||
// Safe but slow routine
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
PSRLQ(XMM0, 32);
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));
|
||||
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());
|
||||
|
||||
SetJumpTarget(exit);
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
// In Release on 32bit build,
|
||||
// this seemed to cause a problem with PokePark2
|
||||
// at start after talking to first pokemon,
|
||||
// you run and smash a box, then he goes on about
|
||||
// following him and then you cant do anything.
|
||||
// I have enabled interpreter for this function
|
||||
// in the mean time.
|
||||
// Parlane
|
||||
void Jit64::stfs(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
|
||||
FALLBACK_IF(!a || update);
|
||||
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
u32 addr = (u32)(gpr.R(a).offset + offset);
|
||||
if (Memory::IsRAMAddress(addr))
|
||||
{
|
||||
if (cpu_info.bSSSE3) {
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||
WriteFloatToConstRamAddress(XMM0, addr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (addr == 0xCC008000)
|
||||
{
|
||||
// Float directly to write gather pipe! Fun!
|
||||
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
||||
// TODO
|
||||
js.fifoBytesThisBlock += 4;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
||||
if (update && offset)
|
||||
{
|
||||
// We must flush immediate values from the following register because
|
||||
// it may take another value at runtime if no MMU exception has been raised
|
||||
gpr.KillImmediate(a, true, true);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
|
||||
MEMCHECK_END
|
||||
}
|
||||
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, RegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::stfsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
|
||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
@ -268,14 +117,11 @@ void Jit64::stfsx(UGeckoInstruction inst)
|
||||
fpr.Lock(s);
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
|
||||
|
||||
gpr.UnlockAllX();
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, 0, RegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::lfsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
@ -283,30 +129,17 @@ void Jit64::lfsx(UGeckoInstruction inst)
|
||||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
{
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
}
|
||||
|
||||
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
|
||||
|
||||
fpr.Lock(inst.RS);
|
||||
fpr.BindToRegister(inst.RS, false);
|
||||
X64Reg s = fpr.RX(inst.RS);
|
||||
if (cpu_info.bSSSE3 && !js.memcheck) {
|
||||
#if _M_X86_32
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(XMM0, MDisp(EAX, (u32)Memory::base));
|
||||
#else
|
||||
MOVD_xmm(XMM0, MComplex(RBX, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||
ConvertSingleToDouble(s, XMM0);
|
||||
} else {
|
||||
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
|
||||
fpr.BindToRegister(inst.RS, js.memcheck);
|
||||
|
||||
MEMCHECK_START
|
||||
MEMCHECK_START
|
||||
ConvertSingleToDouble(fpr.RX(inst.RS), EAX, true);
|
||||
MEMCHECK_END
|
||||
|
||||
ConvertSingleToDouble(s, EAX, true);
|
||||
|
||||
MEMCHECK_END
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
@ -266,7 +266,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
||||
|
||||
// Easy!
|
||||
const u8* storeSingleFloat = AlignCode4();
|
||||
SafeWriteFloatToReg(XMM0, ECX, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
/*
|
||||
if (cpu_info.bSSSE3) {
|
||||
|
@ -101,7 +101,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
||||
if (accessSize == 8 && signExtend)
|
||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
MOVZX(64, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -110,7 +110,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
||||
if (accessSize == 8 && signExtend)
|
||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||
else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||
MOVZX(64, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||
}
|
||||
#else
|
||||
if (opAddress.IsImm())
|
||||
@ -151,6 +151,10 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
||||
case 32:
|
||||
BSWAP(32, reg_value);
|
||||
break;
|
||||
|
||||
case 64:
|
||||
BSWAP(64, reg_value);
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -272,6 +276,8 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
|
||||
}
|
||||
}
|
||||
|
||||
// Always clobbers EAX. Preserves the address.
|
||||
// Preserves the value if the load fails and js.memcheck is enabled.
|
||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
|
||||
{
|
||||
if (!jit->js.memcheck)
|
||||
@ -325,7 +331,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
{
|
||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||
}
|
||||
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address))
|
||||
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address) && accessSize != 64)
|
||||
{
|
||||
MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse,
|
||||
address, accessSize, signExtend);
|
||||
@ -335,6 +341,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break;
|
||||
case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break;
|
||||
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
|
||||
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
|
||||
@ -350,7 +357,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
}
|
||||
else if (reg_value != EAX)
|
||||
{
|
||||
MOVZX(32, accessSize, reg_value, R(EAX));
|
||||
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
@ -372,6 +379,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64: ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); break;
|
||||
case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break;
|
||||
case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break;
|
||||
case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break;
|
||||
@ -387,7 +395,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
}
|
||||
else if (reg_value != EAX)
|
||||
{
|
||||
MOVZX(32, accessSize, reg_value, R(EAX));
|
||||
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
@ -490,6 +498,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); break;
|
||||
case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break;
|
||||
case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break;
|
||||
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break;
|
||||
@ -501,43 +510,12 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
|
||||
// Destroys both arg registers and EAX
|
||||
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags)
|
||||
{
|
||||
// FIXME
|
||||
if (false && cpu_info.bSSSE3) {
|
||||
// This path should be faster but for some reason it causes errors so I've disabled it.
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||
mem_mask |= Memory::EXRAM_MASK;
|
||||
#endif
|
||||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
||||
FixupBranch argh = J_CC(CC_Z);
|
||||
MOVSS(M(&float_buffer), xmm_value);
|
||||
LoadAndSwap(32, EAX, M(&float_buffer));
|
||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, false);
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
|
||||
#if _M_X86_64
|
||||
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
|
||||
#else
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
|
||||
#endif
|
||||
SetJumpTarget(arg2);
|
||||
} else {
|
||||
MOVSS(M(&float_buffer), xmm_value);
|
||||
MOV(32, R(EAX), M(&float_buffer));
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, 0, registersInUse, flags);
|
||||
}
|
||||
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
|
||||
MOVD_xmm(R(EAX), xmm_value);
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
|
||||
|
@ -47,8 +47,7 @@ public:
|
||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
|
||||
|
||||
// Trashes both inputs and EAX.
|
||||
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, u32 registersInUse, int flags = 0);
|
||||
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
|
Loading…
x
Reference in New Issue
Block a user