For unknown reasons, this patch fixes Beyond Good and Evil and Metroid intro in 32-bit mode only. Yeah, I have some work to do on the JIT.

Also adds some minor stuff like memory card write notification, plus some minor SSSE3 optimizations.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@179 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-11 19:35:38 +00:00
parent 93429219ab
commit 29102ecbc6
15 changed files with 183 additions and 50 deletions

View File

@ -2,3 +2,18 @@
GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
PadPlugin = Plugins\Plugin_PadSimple.dll
[General]
LastFilename =
GCMPathes = 1
GCMPath0 = E:\GCM
[Core]
GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP.dll
PadPlugin = Plugins\Plugin_PadSimple.dll
HLEBios = True
UseDynarec = True
UseDualCore = True
Throttle = False
LockThreads = True
DefaultGCM =
OptimizeQuantizers = True

View File

@ -2,3 +2,18 @@
GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP_NULL.dll
PadPlugin = Plugins\Plugin_PadSimple.dll
[General]
LastFilename =
GCMPathes = 1
GCMPath0 = E:\GCM
[Core]
GFXPlugin = Plugins\Plugin_VideoOGL.dll
DSPPlugin = Plugins\Plugin_DSP.dll
PadPlugin = Plugins\Plugin_PadSimple.dll
HLEBios = True
UseDynarec = True
UseDualCore = False
Throttle = False
LockThreads = True
DefaultGCM =
OptimizeQuantizers = True

View File

@ -71,6 +71,7 @@ typedef signed __int16 s16;
typedef signed __int8 s8;
#define GC_ALIGNED16(x) __declspec(align(16)) x
#define GC_ALIGNED32(x) __declspec(align(32)) x
#define GC_ALIGNED64(x) __declspec(align(64)) x
#define GC_ALIGNED16_DECL(x) __declspec(align(16)) x
#define GC_ALIGNED64_DECL(x) __declspec(align(64)) x
@ -101,6 +102,7 @@ typedef union _LARGE_INTEGER
#endif
#define GC_ALIGNED16(x) __attribute((aligned(16))) x
#define GC_ALIGNED32(x) __attribute((aligned(16))) x
#define GC_ALIGNED64(x) __attribute((aligned(64))) x
#define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
#define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x

View File

@ -974,8 +974,7 @@ namespace Gen
void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);}
void MOVQ_xmm(X64Reg dest, OpArg arg) {
if (dest > 7)
{
#ifdef _M_X64
// Alternate encoding
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
arg.operandReg = dest;
@ -984,14 +983,13 @@ namespace Gen
Write8(0x0f);
Write8(0x6E);
arg.WriteRest(0);
} else {
#else
arg.operandReg = dest;
arg.WriteRex(false);
Write8(0xF3);
Write8(0x0f);
Write8(0x7E);
arg.WriteRest(0);
}
#endif
}
void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);}

View File

@ -93,7 +93,7 @@ Common::Event emuThreadGoing;
bool PanicAlertToVideo(const char* text, bool yes_no)
{
PluginVideo::Video_AddMessage(text,3000);
DisplayMessage(text, 3000);
return true;
}
@ -140,13 +140,24 @@ bool Init(const SCoreStartupParameter _CoreParameter)
// all right ... here we go
Host_SetWaitCursor(false);
PluginVideo::Video_AddMessage("Emulation started.",3000);
DisplayMessage("Emulation started.", 3000);
//RegisterPanicAlertHandler(PanicAlertToVideo);
return true;
}
void DisplayMessage(const std::string &message, int time_in_ms)
{
PluginVideo::Video_AddMessage(message.c_str(), time_in_ms);
}
void DisplayMessage(const char *message, int time_in_ms)
{
PluginVideo::Video_AddMessage(message, time_in_ms);
}
// Called from GUI thread or VI thread
void Stop() // - Hammertime!
{

View File

@ -54,6 +54,8 @@ namespace Core
extern bool bWriteTrace;
void StartTrace(bool write);
void DisplayMessage(const std::string &message, int time_in_ms); // This displays messages in a user-visible way.
void DisplayMessage(const char *message, int time_in_ms); // This displays messages in a user-visible way.
int SyncTrace();
void SetBlockStart(u32 addr);

View File

@ -84,6 +84,8 @@ void CEXIMemoryCard::Flush()
}
fwrite(memory_card_content, memory_card_size, 1, pFile);
fclose(pFile);
Core::DisplayMessage(StringFromFormat("Wrote memory card contents to %s", m_strFilename.c_str()), 4000);
}
void CEXIMemoryCard::FlushCallback(u64 userdata, int cyclesLate)

View File

@ -37,7 +37,7 @@ namespace GPFifo
// Both of these should actually work! Only problem is that we have to decide at run time,
// the same function could use both methods. Compile 2 different versions of each such block?
u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
u8 GC_ALIGNED32(m_gatherPipe[GATHER_PIPE_SIZE*16]); //more room, for the fastmodes
// pipe counter
u32 m_gatherPipeCount = 0;

View File

@ -28,6 +28,11 @@ enum
GATHER_PIPE_SIZE = 32
};
extern u8 m_gatherPipe[GATHER_PIPE_SIZE*16]; //more room, for the fastmodes
// pipe counter
extern u32 m_gatherPipeCount;
// Init
void Init();

View File

@ -183,6 +183,20 @@ void Generate()
SetJumpTarget(pLesser);
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
RET();
// Fast write routines - special case the most common hardware write
// TODO: use this.
// Even in x86, the param values will be in the right registers.
/*
const u8 *fastMemWrite8 = AlignCode16();
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
FixupBranch skip_fast_write = J_CC(CC_NE, false);
MOV(32, EAX, M(&m_gatherPipeCount));
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
ADD(32, 1, M(&m_gatherPipeCount));
RET();
SetJumpTarget(skip_fast_write);
CALL((void *)&Memory::Write_U8);*/
}
#elif defined(_M_X64)

View File

@ -139,6 +139,15 @@ namespace Jit64
fpr.UnlockAll();
}
void fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst); return;
}
void fcmpx(UGeckoInstruction inst)
{
INSTRUCTION_START;

View File

@ -27,6 +27,7 @@
#include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h"
#include "../PPCTables.h"
#include "CPUDetect.h"
#include "x64Emitter.h"
#include "ABI.h"
@ -51,6 +52,7 @@ namespace Jit64
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
static u64 GC_ALIGNED16(temp64);
@ -115,12 +117,18 @@ void lfd(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
fpr.LoadToX64(d, false);
fpr.Lock(d);
if (cpu_info.bSSE3NewInstructions) {
X64Reg xd = fpr.RX(d);
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
} else {
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
MOVDDUP(fpr.RX(d), M(&temp64));
}
gpr.UnlockAll();
fpr.UnlockAll();
}
@ -128,7 +136,10 @@ void lfd(UGeckoInstruction inst)
void stfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
if (!cpu_info.bSSSE3NewInstructions)
{
DISABLE_32BIT;
}
int s = inst.RS;
int a = inst.RA;
if (!a)
@ -140,12 +151,25 @@ void stfd(UGeckoInstruction inst)
gpr.Lock(a);
fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_IX86
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
#endif
if (cpu_info.bSSSE3NewInstructions) {
MOVAPS(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
#ifdef _M_X64
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
#else
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base + offset), XMM0);
#endif
} else {
fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s));
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(64, R(EAX), M(&temp64));
BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
}
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
@ -154,6 +178,7 @@ void stfd(UGeckoInstruction inst)
void stfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;
@ -192,10 +217,24 @@ void lfsx(UGeckoInstruction inst)
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
if (cpu_info.bSSSE3NewInstructions) {
// PanicAlert("SSE3 supported!");
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVD_xmm(r, MDisp(EAX, (u32)Memory::base));
#else
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
#endif
PSHUFB(r, M((void *)bswapShuffle1x4));
CVTSS2SD(r, R(r));
MOVDDUP(r, R(r));
} else {
UnsafeLoadRegToReg(EAX, EAX, 32, false);
MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32));
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
}
fpr.UnlockAll();
}

View File

@ -27,6 +27,7 @@
#include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h"
#include "../PPCTables.h"
#include "CPUDetect.h"
#include "x64Emitter.h"
#include "ABI.h"
@ -225,6 +226,8 @@ void psq_st(UGeckoInstruction inst)
}
}
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
void psq_l(UGeckoInstruction inst)
{
@ -247,21 +250,38 @@ void psq_l(UGeckoInstruction inst)
}
int offset = inst.SIMM_12;
switch (ldType) {
case QUANTIZE_FLOAT:
case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
{
#ifdef _M_X64
gpr.LoadToX64(inst.RA);
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
if (cpu_info.bSSSE3NewInstructions) {
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(64, RAX);
MOV(64, M(&psTemp[0]), R(RAX));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r), 1);
}
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#else
if (cpu_info.bSSSE3NewInstructions) {
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOV(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.LoadToX64(inst.RA);
@ -277,9 +297,10 @@ void psq_l(UGeckoInstruction inst)
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
gpr.UnlockAllX();
}
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
gpr.UnlockAllX();
break;
#endif
}

View File

@ -33,7 +33,7 @@ public:
{
TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; }
u32 texture;
GLuint texture;
u32 addr;
u32 hash;
u32 paletteHash;