mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 15:49:25 +01:00
For unknown reasons, this patch fixes Beyond Good and Evil and Metroid intro in 32-bit mode only. Yeah, I have some work to do on the JIT.
Also adds some minor stuff like memory card write notification, plus some minor SSSE3 optimizations. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@179 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
93429219ab
commit
29102ecbc6
@ -2,3 +2,18 @@
|
||||
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
|
||||
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||
[General]
|
||||
LastFilename =
|
||||
GCMPathes = 1
|
||||
GCMPath0 = E:\GCM
|
||||
[Core]
|
||||
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||
DSPPlugin = Plugins\Plugin_DSP.dll
|
||||
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||
HLEBios = True
|
||||
UseDynarec = True
|
||||
UseDualCore = True
|
||||
Throttle = False
|
||||
LockThreads = True
|
||||
DefaultGCM =
|
||||
OptimizeQuantizers = True
|
||||
|
@ -2,3 +2,18 @@
|
||||
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
|
||||
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||
[General]
|
||||
LastFilename =
|
||||
GCMPathes = 1
|
||||
GCMPath0 = E:\GCM
|
||||
[Core]
|
||||
GFXPlugin = Plugins\Plugin_VideoOGL.dll
|
||||
DSPPlugin = Plugins\Plugin_DSP.dll
|
||||
PadPlugin = Plugins\Plugin_PadSimple.dll
|
||||
HLEBios = True
|
||||
UseDynarec = True
|
||||
UseDualCore = False
|
||||
Throttle = False
|
||||
LockThreads = True
|
||||
DefaultGCM =
|
||||
OptimizeQuantizers = True
|
||||
|
@ -71,6 +71,7 @@ typedef signed __int16 s16;
|
||||
typedef signed __int8 s8;
|
||||
|
||||
#define GC_ALIGNED16(x) __declspec(align(16)) x
|
||||
#define GC_ALIGNED32(x) __declspec(align(32)) x
|
||||
#define GC_ALIGNED64(x) __declspec(align(64)) x
|
||||
#define GC_ALIGNED16_DECL(x) __declspec(align(16)) x
|
||||
#define GC_ALIGNED64_DECL(x) __declspec(align(64)) x
|
||||
@ -101,6 +102,7 @@ typedef union _LARGE_INTEGER
|
||||
#endif
|
||||
|
||||
#define GC_ALIGNED16(x) __attribute((aligned(16))) x
|
||||
#define GC_ALIGNED32(x) __attribute((aligned(16))) x
|
||||
#define GC_ALIGNED64(x) __attribute((aligned(64))) x
|
||||
#define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
|
||||
#define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x
|
||||
|
@ -974,8 +974,7 @@ namespace Gen
|
||||
void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);}
|
||||
|
||||
void MOVQ_xmm(X64Reg dest, OpArg arg) {
|
||||
if (dest > 7)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
// Alternate encoding
|
||||
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
||||
arg.operandReg = dest;
|
||||
@ -984,14 +983,13 @@ namespace Gen
|
||||
Write8(0x0f);
|
||||
Write8(0x6E);
|
||||
arg.WriteRest(0);
|
||||
} else {
|
||||
#else
|
||||
arg.operandReg = dest;
|
||||
arg.WriteRex(false);
|
||||
Write8(0xF3);
|
||||
Write8(0x0f);
|
||||
Write8(0x7E);
|
||||
arg.WriteRest(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);}
|
||||
|
@ -93,7 +93,7 @@ Common::Event emuThreadGoing;
|
||||
|
||||
bool PanicAlertToVideo(const char* text, bool yes_no)
|
||||
{
|
||||
PluginVideo::Video_AddMessage(text,3000);
|
||||
DisplayMessage(text, 3000);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -140,13 +140,24 @@ bool Init(const SCoreStartupParameter _CoreParameter)
|
||||
// all right ... here we go
|
||||
Host_SetWaitCursor(false);
|
||||
|
||||
PluginVideo::Video_AddMessage("Emulation started.",3000);
|
||||
DisplayMessage("Emulation started.", 3000);
|
||||
|
||||
//RegisterPanicAlertHandler(PanicAlertToVideo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DisplayMessage(const std::string &message, int time_in_ms)
|
||||
{
|
||||
PluginVideo::Video_AddMessage(message.c_str(), time_in_ms);
|
||||
}
|
||||
|
||||
void DisplayMessage(const char *message, int time_in_ms)
|
||||
{
|
||||
PluginVideo::Video_AddMessage(message, time_in_ms);
|
||||
}
|
||||
|
||||
|
||||
// Called from GUI thread or VI thread
|
||||
void Stop() // - Hammertime!
|
||||
{
|
||||
|
@ -54,6 +54,8 @@ namespace Core
|
||||
extern bool bWriteTrace;
|
||||
|
||||
void StartTrace(bool write);
|
||||
void DisplayMessage(const std::string &message, int time_in_ms); // This displays messages in a user-visible way.
|
||||
void DisplayMessage(const char *message, int time_in_ms); // This displays messages in a user-visible way.
|
||||
|
||||
int SyncTrace();
|
||||
void SetBlockStart(u32 addr);
|
||||
|
@ -84,6 +84,8 @@ void CEXIMemoryCard::Flush()
|
||||
}
|
||||
fwrite(memory_card_content, memory_card_size, 1, pFile);
|
||||
fclose(pFile);
|
||||
|
||||
Core::DisplayMessage(StringFromFormat("Wrote memory card contents to %s", m_strFilename.c_str()), 4000);
|
||||
}
|
||||
|
||||
void CEXIMemoryCard::FlushCallback(u64 userdata, int cyclesLate)
|
||||
|
@ -37,7 +37,7 @@ namespace GPFifo
|
||||
// Both of these should actually work! Only problem is that we have to decide at run time,
|
||||
// the same function could use both methods. Compile 2 different versions of each such block?
|
||||
|
||||
u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
|
||||
u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
|
||||
|
||||
// pipe counter
|
||||
u32 m_gatherPipeCount = 0;
|
||||
|
@ -28,6 +28,11 @@ enum
|
||||
GATHER_PIPE_SIZE = 32
|
||||
};
|
||||
|
||||
extern u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
|
||||
|
||||
// pipe counter
|
||||
extern u32 m_gatherPipeCount;
|
||||
|
||||
// Init
|
||||
void Init();
|
||||
|
||||
|
@ -183,6 +183,20 @@ void Generate()
|
||||
SetJumpTarget(pLesser);
|
||||
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
|
||||
RET();
|
||||
|
||||
// Fast write routines - special case the most common hardware write
|
||||
// TODO: use this.
|
||||
// Even in x86, the param values will be in the right registers.
|
||||
/*
|
||||
const u8 *fastMemWrite8 = AlignCode16();
|
||||
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
|
||||
FixupBranch skip_fast_write = J_CC(CC_NE, false);
|
||||
MOV(32, EAX, M(&m_gatherPipeCount));
|
||||
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
|
||||
ADD(32, 1, M(&m_gatherPipeCount));
|
||||
RET();
|
||||
SetJumpTarget(skip_fast_write);
|
||||
CALL((void *)&Memory::Write_U8);*/
|
||||
}
|
||||
|
||||
#elif defined(_M_X64)
|
||||
|
@ -139,6 +139,15 @@ namespace Jit64
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void fmrx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
Default(inst); return;
|
||||
|
||||
|
||||
}
|
||||
|
||||
void fcmpx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
@ -51,6 +52,7 @@ namespace Jit64
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
|
||||
|
||||
static u64 GC_ALIGNED16(temp64);
|
||||
@ -115,12 +117,18 @@ void lfd(UGeckoInstruction inst)
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
fpr.LoadToX64(d, false);
|
||||
fpr.Lock(d);
|
||||
if (cpu_info.bSSE3NewInstructions) {
|
||||
X64Reg xd = fpr.RX(d);
|
||||
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
|
||||
} else {
|
||||
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
BSWAP(64, EAX);
|
||||
MOV(64, M(&temp64), R(EAX));
|
||||
fpr.Lock(d);
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVDDUP(fpr.RX(d), M(&temp64));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
@ -128,7 +136,10 @@ void lfd(UGeckoInstruction inst)
|
||||
void stfd(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
if (!cpu_info.bSSSE3NewInstructions)
|
||||
{
|
||||
DISABLE_32BIT;
|
||||
}
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
if (!a)
|
||||
@ -140,12 +151,25 @@ void stfd(UGeckoInstruction inst)
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
#endif
|
||||
if (cpu_info.bSSSE3NewInstructions) {
|
||||
MOVAPS(XMM0, fpr.R(s));
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
|
||||
#else
|
||||
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base + offset), XMM0);
|
||||
#endif
|
||||
} else {
|
||||
fpr.LoadToX64(s, true, false);
|
||||
MOVSD(M(&temp64), fpr.RX(s));
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
MOV(64, R(EAX), M(&temp64));
|
||||
BSWAP(64, EAX);
|
||||
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
@ -154,6 +178,7 @@ void stfd(UGeckoInstruction inst)
|
||||
void stfs(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
DISABLE_32BIT;
|
||||
bool update = inst.OPCD & 1;
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
@ -192,10 +217,24 @@ void lfsx(UGeckoInstruction inst)
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
if (cpu_info.bSSSE3NewInstructions) {
|
||||
// PanicAlert("SSE3 supported!");
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(r, MDisp(EAX, (u32)Memory::base));
|
||||
#else
|
||||
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
PSHUFB(r, M((void *)bswapShuffle1x4));
|
||||
CVTSS2SD(r, R(r));
|
||||
MOVDDUP(r, R(r));
|
||||
} else {
|
||||
UnsafeLoadRegToReg(EAX, EAX, 32, false);
|
||||
MOV(32, M(&temp32), R(EAX));
|
||||
CVTSS2SD(XMM0, M(&temp32));
|
||||
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
@ -225,6 +226,8 @@ void psq_st(UGeckoInstruction inst)
|
||||
}
|
||||
}
|
||||
|
||||
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
||||
void psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
@ -247,21 +250,38 @@ void psq_l(UGeckoInstruction inst)
|
||||
}
|
||||
int offset = inst.SIMM_12;
|
||||
switch (ldType) {
|
||||
case QUANTIZE_FLOAT:
|
||||
case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
|
||||
{
|
||||
#ifdef _M_X64
|
||||
gpr.LoadToX64(inst.RA);
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
if (cpu_info.bSSSE3NewInstructions) {
|
||||
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||
CVTPS2PD(xd, R(xd));
|
||||
} else {
|
||||
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, M(&psTemp[0]), R(RAX));
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
SHUFPD(r, R(r), 1);
|
||||
}
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
#else
|
||||
if (cpu_info.bSSSE3NewInstructions) {
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOV(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
|
||||
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||
CVTPS2PD(xd, R(xd));
|
||||
} else {
|
||||
gpr.FlushR(ECX);
|
||||
gpr.LockX(ECX);
|
||||
gpr.LoadToX64(inst.RA);
|
||||
@ -277,9 +297,10 @@ void psq_l(UGeckoInstruction inst)
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
gpr.UnlockAllX();
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
{
|
||||
TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; }
|
||||
|
||||
u32 texture;
|
||||
GLuint texture;
|
||||
u32 addr;
|
||||
u32 hash;
|
||||
u32 paletteHash;
|
||||
|
Loading…
x
Reference in New Issue
Block a user