mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 15:49:25 +01:00
For unknown reasons, this patch fixes Beyond Good and Evil and Metroid intro in 32-bit mode only. Yeah, I have some work to do on the JIT.
Also adds some minor stuff like memory card write notification, plus some minor SSSE3 optimizations. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@179 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
93429219ab
commit
29102ecbc6
@ -2,3 +2,18 @@
|
|||||||
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||||
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
|
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
|
||||||
PadPlugin = Plugins\Plugin_PadSimple.dll
|
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||||
|
[General]
|
||||||
|
LastFilename =
|
||||||
|
GCMPathes = 1
|
||||||
|
GCMPath0 = E:\GCM
|
||||||
|
[Core]
|
||||||
|
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||||
|
DSPPlugin = Plugins\Plugin_DSP.dll
|
||||||
|
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||||
|
HLEBios = True
|
||||||
|
UseDynarec = True
|
||||||
|
UseDualCore = True
|
||||||
|
Throttle = False
|
||||||
|
LockThreads = True
|
||||||
|
DefaultGCM =
|
||||||
|
OptimizeQuantizers = True
|
||||||
|
@ -2,3 +2,18 @@
|
|||||||
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||||
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
|
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
|
||||||
PadPlugin = Plugins\Plugin_PadSimple.dll
|
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||||
|
[General]
|
||||||
|
LastFilename =
|
||||||
|
GCMPathes = 1
|
||||||
|
GCMPath0 = E:\GCM
|
||||||
|
[Core]
|
||||||
|
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||||
|
DSPPlugin = Plugins\Plugin_DSP.dll
|
||||||
|
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||||
|
HLEBios = True
|
||||||
|
UseDynarec = True
|
||||||
|
UseDualCore = False
|
||||||
|
Throttle = False
|
||||||
|
LockThreads = True
|
||||||
|
DefaultGCM =
|
||||||
|
OptimizeQuantizers = True
|
||||||
|
@ -71,6 +71,7 @@ typedef signed __int16 s16;
|
|||||||
typedef signed __int8 s8;
|
typedef signed __int8 s8;
|
||||||
|
|
||||||
#define GC_ALIGNED16(x) __declspec(align(16)) x
|
#define GC_ALIGNED16(x) __declspec(align(16)) x
|
||||||
|
#define GC_ALIGNED32(x) __declspec(align(32)) x
|
||||||
#define GC_ALIGNED64(x) __declspec(align(64)) x
|
#define GC_ALIGNED64(x) __declspec(align(64)) x
|
||||||
#define GC_ALIGNED16_DECL(x) __declspec(align(16)) x
|
#define GC_ALIGNED16_DECL(x) __declspec(align(16)) x
|
||||||
#define GC_ALIGNED64_DECL(x) __declspec(align(64)) x
|
#define GC_ALIGNED64_DECL(x) __declspec(align(64)) x
|
||||||
@ -101,6 +102,7 @@ typedef union _LARGE_INTEGER
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define GC_ALIGNED16(x) __attribute((aligned(16))) x
|
#define GC_ALIGNED16(x) __attribute((aligned(16))) x
|
||||||
|
#define GC_ALIGNED32(x) __attribute((aligned(16))) x
|
||||||
#define GC_ALIGNED64(x) __attribute((aligned(64))) x
|
#define GC_ALIGNED64(x) __attribute((aligned(64))) x
|
||||||
#define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
|
#define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
|
||||||
#define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x
|
#define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x
|
||||||
|
@ -974,8 +974,7 @@ namespace Gen
|
|||||||
void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);}
|
void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);}
|
||||||
|
|
||||||
void MOVQ_xmm(X64Reg dest, OpArg arg) {
|
void MOVQ_xmm(X64Reg dest, OpArg arg) {
|
||||||
if (dest > 7)
|
#ifdef _M_X64
|
||||||
{
|
|
||||||
// Alternate encoding
|
// Alternate encoding
|
||||||
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
||||||
arg.operandReg = dest;
|
arg.operandReg = dest;
|
||||||
@ -984,14 +983,13 @@ namespace Gen
|
|||||||
Write8(0x0f);
|
Write8(0x0f);
|
||||||
Write8(0x6E);
|
Write8(0x6E);
|
||||||
arg.WriteRest(0);
|
arg.WriteRest(0);
|
||||||
} else {
|
#else
|
||||||
arg.operandReg = dest;
|
arg.operandReg = dest;
|
||||||
arg.WriteRex(false);
|
|
||||||
Write8(0xF3);
|
Write8(0xF3);
|
||||||
Write8(0x0f);
|
Write8(0x0f);
|
||||||
Write8(0x7E);
|
Write8(0x7E);
|
||||||
arg.WriteRest(0);
|
arg.WriteRest(0);
|
||||||
}
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);}
|
void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);}
|
||||||
|
@ -93,7 +93,7 @@ Common::Event emuThreadGoing;
|
|||||||
|
|
||||||
bool PanicAlertToVideo(const char* text, bool yes_no)
|
bool PanicAlertToVideo(const char* text, bool yes_no)
|
||||||
{
|
{
|
||||||
PluginVideo::Video_AddMessage(text,3000);
|
DisplayMessage(text, 3000);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,13 +140,24 @@ bool Init(const SCoreStartupParameter _CoreParameter)
|
|||||||
// all right ... here we go
|
// all right ... here we go
|
||||||
Host_SetWaitCursor(false);
|
Host_SetWaitCursor(false);
|
||||||
|
|
||||||
PluginVideo::Video_AddMessage("Emulation started.",3000);
|
DisplayMessage("Emulation started.", 3000);
|
||||||
|
|
||||||
//RegisterPanicAlertHandler(PanicAlertToVideo);
|
//RegisterPanicAlertHandler(PanicAlertToVideo);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DisplayMessage(const std::string &message, int time_in_ms)
|
||||||
|
{
|
||||||
|
PluginVideo::Video_AddMessage(message.c_str(), time_in_ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DisplayMessage(const char *message, int time_in_ms)
|
||||||
|
{
|
||||||
|
PluginVideo::Video_AddMessage(message, time_in_ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Called from GUI thread or VI thread
|
// Called from GUI thread or VI thread
|
||||||
void Stop() // - Hammertime!
|
void Stop() // - Hammertime!
|
||||||
{
|
{
|
||||||
|
@ -54,6 +54,8 @@ namespace Core
|
|||||||
extern bool bWriteTrace;
|
extern bool bWriteTrace;
|
||||||
|
|
||||||
void StartTrace(bool write);
|
void StartTrace(bool write);
|
||||||
|
void DisplayMessage(const std::string &message, int time_in_ms); // This displays messages in a user-visible way.
|
||||||
|
void DisplayMessage(const char *message, int time_in_ms); // This displays messages in a user-visible way.
|
||||||
|
|
||||||
int SyncTrace();
|
int SyncTrace();
|
||||||
void SetBlockStart(u32 addr);
|
void SetBlockStart(u32 addr);
|
||||||
|
@ -32,7 +32,7 @@ CEXIChannel::CEXIChannel() :
|
|||||||
|
|
||||||
m_Status.CHIP_SELECT = 1;
|
m_Status.CHIP_SELECT = 1;
|
||||||
|
|
||||||
for (int i=0; i<NUM_DEVICES; i++)
|
for (int i = 0; i < NUM_DEVICES; i++)
|
||||||
{
|
{
|
||||||
m_pDevices[i] = EXIDevice_Create(EXIDEVICE_DUMMY);
|
m_pDevices[i] = EXIDevice_Create(EXIDEVICE_DUMMY);
|
||||||
_dbg_assert_(EXPANSIONINTERFACE, m_pDevices[i] != NULL);
|
_dbg_assert_(EXPANSIONINTERFACE, m_pDevices[i] != NULL);
|
||||||
|
@ -84,6 +84,8 @@ void CEXIMemoryCard::Flush()
|
|||||||
}
|
}
|
||||||
fwrite(memory_card_content, memory_card_size, 1, pFile);
|
fwrite(memory_card_content, memory_card_size, 1, pFile);
|
||||||
fclose(pFile);
|
fclose(pFile);
|
||||||
|
|
||||||
|
Core::DisplayMessage(StringFromFormat("Wrote memory card contents to %s", m_strFilename.c_str()), 4000);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CEXIMemoryCard::FlushCallback(u64 userdata, int cyclesLate)
|
void CEXIMemoryCard::FlushCallback(u64 userdata, int cyclesLate)
|
||||||
|
@ -37,7 +37,7 @@ namespace GPFifo
|
|||||||
// Both of these should actually work! Only problem is that we have to decide at run time,
|
// Both of these should actually work! Only problem is that we have to decide at run time,
|
||||||
// the same function could use both methods. Compile 2 different versions of each such block?
|
// the same function could use both methods. Compile 2 different versions of each such block?
|
||||||
|
|
||||||
u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
|
u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
|
||||||
|
|
||||||
// pipe counter
|
// pipe counter
|
||||||
u32 m_gatherPipeCount = 0;
|
u32 m_gatherPipeCount = 0;
|
||||||
@ -73,7 +73,7 @@ void CheckGatherPipe()
|
|||||||
// increase the CPUWritePointer
|
// increase the CPUWritePointer
|
||||||
CPeripheralInterface::Fifo_CPUWritePointer += GATHER_PIPE_SIZE;
|
CPeripheralInterface::Fifo_CPUWritePointer += GATHER_PIPE_SIZE;
|
||||||
if (CPeripheralInterface::Fifo_CPUWritePointer > CPeripheralInterface::Fifo_CPUEnd)
|
if (CPeripheralInterface::Fifo_CPUWritePointer > CPeripheralInterface::Fifo_CPUEnd)
|
||||||
_assert_msg_(DYNA_REC,0,"ARGH");
|
_assert_msg_(DYNA_REC, 0, "ARGH");
|
||||||
|
|
||||||
if (CPeripheralInterface::Fifo_CPUWritePointer >= CPeripheralInterface::Fifo_CPUEnd)
|
if (CPeripheralInterface::Fifo_CPUWritePointer >= CPeripheralInterface::Fifo_CPUEnd)
|
||||||
CPeripheralInterface::Fifo_CPUWritePointer = CPeripheralInterface::Fifo_CPUBase;
|
CPeripheralInterface::Fifo_CPUWritePointer = CPeripheralInterface::Fifo_CPUBase;
|
||||||
|
@ -28,6 +28,11 @@ enum
|
|||||||
GATHER_PIPE_SIZE = 32
|
GATHER_PIPE_SIZE = 32
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
|
||||||
|
|
||||||
|
// pipe counter
|
||||||
|
extern u32 m_gatherPipeCount;
|
||||||
|
|
||||||
// Init
|
// Init
|
||||||
void Init();
|
void Init();
|
||||||
|
|
||||||
|
@ -183,6 +183,20 @@ void Generate()
|
|||||||
SetJumpTarget(pLesser);
|
SetJumpTarget(pLesser);
|
||||||
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
|
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
// Fast write routines - special case the most common hardware write
|
||||||
|
// TODO: use this.
|
||||||
|
// Even in x86, the param values will be in the right registers.
|
||||||
|
/*
|
||||||
|
const u8 *fastMemWrite8 = AlignCode16();
|
||||||
|
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
|
||||||
|
FixupBranch skip_fast_write = J_CC(CC_NE, false);
|
||||||
|
MOV(32, EAX, M(&m_gatherPipeCount));
|
||||||
|
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
|
||||||
|
ADD(32, 1, M(&m_gatherPipeCount));
|
||||||
|
RET();
|
||||||
|
SetJumpTarget(skip_fast_write);
|
||||||
|
CALL((void *)&Memory::Write_U8);*/
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(_M_X64)
|
#elif defined(_M_X64)
|
||||||
|
@ -139,6 +139,15 @@ namespace Jit64
|
|||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void fmrx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START;
|
||||||
|
Default(inst); return;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void fcmpx(UGeckoInstruction inst)
|
void fcmpx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include "../../HW/PixelEngine.h"
|
#include "../../HW/PixelEngine.h"
|
||||||
#include "../../HW/Memmap.h"
|
#include "../../HW/Memmap.h"
|
||||||
#include "../PPCTables.h"
|
#include "../PPCTables.h"
|
||||||
|
#include "CPUDetect.h"
|
||||||
#include "x64Emitter.h"
|
#include "x64Emitter.h"
|
||||||
#include "ABI.h"
|
#include "ABI.h"
|
||||||
|
|
||||||
@ -51,6 +52,7 @@ namespace Jit64
|
|||||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
|
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||||
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
|
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
|
||||||
|
|
||||||
static u64 GC_ALIGNED16(temp64);
|
static u64 GC_ALIGNED16(temp64);
|
||||||
@ -115,12 +117,18 @@ void lfd(UGeckoInstruction inst)
|
|||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
gpr.Lock(a);
|
gpr.Lock(a);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
|
||||||
BSWAP(64, EAX);
|
|
||||||
MOV(64, M(&temp64), R(EAX));
|
|
||||||
fpr.Lock(d);
|
|
||||||
fpr.LoadToX64(d, false);
|
fpr.LoadToX64(d, false);
|
||||||
MOVDDUP(fpr.RX(d), M(&temp64));
|
fpr.Lock(d);
|
||||||
|
if (cpu_info.bSSE3NewInstructions) {
|
||||||
|
X64Reg xd = fpr.RX(d);
|
||||||
|
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||||
|
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
|
||||||
|
} else {
|
||||||
|
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||||
|
BSWAP(64, EAX);
|
||||||
|
MOV(64, M(&temp64), R(EAX));
|
||||||
|
MOVDDUP(fpr.RX(d), M(&temp64));
|
||||||
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
@ -128,7 +136,10 @@ void lfd(UGeckoInstruction inst)
|
|||||||
void stfd(UGeckoInstruction inst)
|
void stfd(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
DISABLE_32BIT;
|
if (!cpu_info.bSSSE3NewInstructions)
|
||||||
|
{
|
||||||
|
DISABLE_32BIT;
|
||||||
|
}
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
if (!a)
|
if (!a)
|
||||||
@ -140,12 +151,25 @@ void stfd(UGeckoInstruction inst)
|
|||||||
gpr.Lock(a);
|
gpr.Lock(a);
|
||||||
fpr.Lock(s);
|
fpr.Lock(s);
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
fpr.LoadToX64(s, true, false);
|
|
||||||
MOVSD(M(&temp64), fpr.RX(s));
|
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
MOV(64, R(EAX), M(&temp64));
|
#ifdef _M_IX86
|
||||||
BSWAP(64, EAX);
|
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
|
#endif
|
||||||
|
if (cpu_info.bSSSE3NewInstructions) {
|
||||||
|
MOVAPS(XMM0, fpr.R(s));
|
||||||
|
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
|
||||||
|
#else
|
||||||
|
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base + offset), XMM0);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
fpr.LoadToX64(s, true, false);
|
||||||
|
MOVSD(M(&temp64), fpr.RX(s));
|
||||||
|
MOV(64, R(EAX), M(&temp64));
|
||||||
|
BSWAP(64, EAX);
|
||||||
|
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
|
||||||
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
@ -154,6 +178,7 @@ void stfd(UGeckoInstruction inst)
|
|||||||
void stfs(UGeckoInstruction inst)
|
void stfs(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
|
DISABLE_32BIT;
|
||||||
bool update = inst.OPCD & 1;
|
bool update = inst.OPCD & 1;
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
@ -192,10 +217,24 @@ void lfsx(UGeckoInstruction inst)
|
|||||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
UnsafeLoadRegToReg(EAX, EAX, 32, false);
|
if (cpu_info.bSSSE3NewInstructions) {
|
||||||
MOV(32, M(&temp32), R(EAX));
|
// PanicAlert("SSE3 supported!");
|
||||||
CVTSS2SD(XMM0, M(&temp32));
|
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||||
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
|
#ifdef _M_IX86
|
||||||
|
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOVD_xmm(r, MDisp(EAX, (u32)Memory::base));
|
||||||
|
#else
|
||||||
|
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
|
||||||
|
#endif
|
||||||
|
PSHUFB(r, M((void *)bswapShuffle1x4));
|
||||||
|
CVTSS2SD(r, R(r));
|
||||||
|
MOVDDUP(r, R(r));
|
||||||
|
} else {
|
||||||
|
UnsafeLoadRegToReg(EAX, EAX, 32, false);
|
||||||
|
MOV(32, M(&temp32), R(EAX));
|
||||||
|
CVTSS2SD(XMM0, M(&temp32));
|
||||||
|
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
|
||||||
|
}
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include "../../HW/PixelEngine.h"
|
#include "../../HW/PixelEngine.h"
|
||||||
#include "../../HW/Memmap.h"
|
#include "../../HW/Memmap.h"
|
||||||
#include "../PPCTables.h"
|
#include "../PPCTables.h"
|
||||||
|
#include "CPUDetect.h"
|
||||||
#include "x64Emitter.h"
|
#include "x64Emitter.h"
|
||||||
#include "ABI.h"
|
#include "ABI.h"
|
||||||
|
|
||||||
@ -225,6 +226,8 @@ void psq_st(UGeckoInstruction inst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
|
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
|
|
||||||
void psq_l(UGeckoInstruction inst)
|
void psq_l(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
@ -247,39 +250,57 @@ void psq_l(UGeckoInstruction inst)
|
|||||||
}
|
}
|
||||||
int offset = inst.SIMM_12;
|
int offset = inst.SIMM_12;
|
||||||
switch (ldType) {
|
switch (ldType) {
|
||||||
case QUANTIZE_FLOAT:
|
case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
|
||||||
{
|
{
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
gpr.LoadToX64(inst.RA);
|
gpr.LoadToX64(inst.RA, true, update);
|
||||||
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
|
||||||
BSWAP(64, RAX);
|
|
||||||
MOV(64, M(&psTemp[0]), R(RAX));
|
|
||||||
fpr.LoadToX64(inst.RS, false);
|
fpr.LoadToX64(inst.RS, false);
|
||||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
if (cpu_info.bSSSE3NewInstructions) {
|
||||||
CVTPS2PD(r, M(&psTemp[0]));
|
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||||
SHUFPD(r, R(r), 1);
|
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||||
|
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||||
|
CVTPS2PD(xd, R(xd));
|
||||||
|
} else {
|
||||||
|
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||||
|
BSWAP(64, RAX);
|
||||||
|
MOV(64, M(&psTemp[0]), R(RAX));
|
||||||
|
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||||
|
CVTPS2PD(r, M(&psTemp[0]));
|
||||||
|
SHUFPD(r, R(r), 1);
|
||||||
|
}
|
||||||
if (update)
|
if (update)
|
||||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
gpr.FlushR(ECX);
|
if (cpu_info.bSSSE3NewInstructions) {
|
||||||
gpr.LockX(ECX);
|
gpr.LoadToX64(inst.RA, true, update);
|
||||||
gpr.LoadToX64(inst.RA);
|
fpr.LoadToX64(inst.RS, false);
|
||||||
// This can probably be optimized somewhat.
|
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||||
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
MOV(32, R(EAX), gpr.R(inst.RA));
|
||||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
|
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
|
||||||
BSWAP(32, RAX);
|
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||||
MOV(32, M(&psTemp[0]), R(RAX));
|
CVTPS2PD(xd, R(xd));
|
||||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
|
} else {
|
||||||
BSWAP(32, RAX);
|
gpr.FlushR(ECX);
|
||||||
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
|
gpr.LockX(ECX);
|
||||||
fpr.LoadToX64(inst.RS, false);
|
gpr.LoadToX64(inst.RA);
|
||||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
// This can probably be optimized somewhat.
|
||||||
CVTPS2PD(r, M(&psTemp[0]));
|
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
|
||||||
|
BSWAP(32, RAX);
|
||||||
|
MOV(32, M(&psTemp[0]), R(RAX));
|
||||||
|
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
|
||||||
|
BSWAP(32, RAX);
|
||||||
|
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
|
||||||
|
fpr.LoadToX64(inst.RS, false);
|
||||||
|
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||||
|
CVTPS2PD(r, M(&psTemp[0]));
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
if (update)
|
if (update)
|
||||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||||
gpr.UnlockAllX();
|
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ public:
|
|||||||
{
|
{
|
||||||
TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; }
|
TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; }
|
||||||
|
|
||||||
u32 texture;
|
GLuint texture;
|
||||||
u32 addr;
|
u32 addr;
|
||||||
u32 hash;
|
u32 hash;
|
||||||
u32 paletteHash;
|
u32 paletteHash;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user