Lots of various changes. CPU detect fix. Maybe a minor speed increase. CPU bugs remain.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@180 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-12 20:05:45 +00:00
parent 29102ecbc6
commit 575bdd9166
27 changed files with 400 additions and 192 deletions

View File

@ -659,6 +659,14 @@
RelativePath=".\Src\Thread.h"
>
</File>
<File
RelativePath=".\Src\Thunk.cpp"
>
</File>
<File
RelativePath=".\Src\Thunk.h"
>
</File>
<File
RelativePath=".\Src\Timer.cpp"
>

View File

@ -103,6 +103,11 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack();
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();
#ifdef _M_IX86
inline int ABI_GetNumXMMRegs() { return 8; }
#else
inline int ABI_GetNumXMMRegs() { return 16; }
#endif
#endif // _JIT_ABI_H

View File

@ -21,7 +21,7 @@
//#include <config/i386/cpuid.h>
#include <xmmintrin.h>
void __cpuid(int info[4], int x) {}
void __cpuid(int info[4], int x) {memset(info, 0, sizeof(info));}
#endif
@ -72,14 +72,11 @@ void CPUInfoStruct::Detect()
isAMD = true;
}
if (nIds >= 2)
{
// Get the information associated with each valid Id
for (unsigned int i = 0; i <= nIds; ++i)
{
__cpuid(CPUInfo, i);
__cpuid(CPUInfo, 1);
// Interpret CPU feature information.
if (i == 1)
{
nSteppingID = CPUInfo[0] & 0xf;
nModel = (CPUInfo[0] >> 4) & 0xf;
nFamily = (CPUInfo[0] >> 8) & 0xf;
@ -89,8 +86,8 @@ void CPUInfoStruct::Detect()
nBrandIndex = CPUInfo[1] & 0xff;
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
bSSSE3NewInstructions = (CPUInfo[2] & 0x200) || false;
bSSE3 = (CPUInfo[2] & 0x1) || false;
bSSSE3 = (CPUInfo[2] & 0x200) || false;
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
@ -111,8 +108,10 @@ void CPUInfoStruct::Detect()
bSSE4_2 = true;
}
}
}
if (bSSE3)
{
// Only SSE3 CPU-s support extended infotypes
// Calling __cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
__cpuid(CPUInfo, 0x80000000);
@ -162,6 +161,7 @@ void CPUInfoStruct::Detect()
// numCores = coresPerDie;
}
}
}
// Display all the information in user-friendly format.
// printf_s("\n\nCPU String: %s\n", CPUString);
@ -222,9 +222,9 @@ void CPUInfoStruct::Detect()
nIds <<= 1;
bFXSAVE_FXRSTOR = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;
bSSEExtensions = (nFeatureInfo & nIds) ? true : false;
bSSE = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;
bSSE2Extensions = (nFeatureInfo & nIds) ? true : false;
bSSE2 = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;
bSelfSnoop = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1;

View File

@ -77,16 +77,16 @@ struct CPUInfoStruct
bool bThermalMonitorandClockCtrl;
bool bMMXTechnology;
bool bFXSAVE_FXRSTOR;
bool bSSEExtensions;
bool bSSE2Extensions;
bool bSSE3NewInstructions;
bool bSSSE3NewInstructions;
bool bSelfSnoop;
bool bHyper_threadingTechnology;
bool bThermalMonitor;
bool bUnknown4;
bool bPendBrkEN;
bool bSSE;
bool bSSE2;
bool bSSE3;
bool bSSSE3;
bool bPOPCNT;
bool bSSE4_1;
bool bSSE4_2;

View File

@ -17,6 +17,7 @@ files = ["ABI.cpp",
"PortableSockets.cpp",
"StringUtil.cpp",
"TestFramework.cpp",
"Thunk.cpp",
"Timer.cpp",
"Thread.cpp",
"x64Emitter.cpp",

View File

@ -0,0 +1,147 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <map>
#include "Common.h"
#include "Thunk.h"
#include "x64Emitter.h"
#include "MemoryUtil.h"
#include "ABI.h"
using namespace Gen;
#define THUNK_ARENA_SIZE 1024*1024*1
namespace {
static std::map<void *, const u8 *> thunks;
u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
}
static u8 *thunk_memory;
static u8 *thunk_code;
static const u8 *save_regs;
static const u8 *load_regs;
u32 saved_return;
void Thunk_Init()
{
thunk_memory = (u8 *)AllocateExecutableMemory(THUNK_ARENA_SIZE);
thunk_code = thunk_memory;
GenContext ctx(&thunk_code);
save_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
#ifdef _M_X64
MOV(64, M(saved_gpr_state + 0 ), R(RCX));
MOV(64, M(saved_gpr_state + 8 ), R(RDX));
MOV(64, M(saved_gpr_state + 16), R(R8) );
MOV(64, M(saved_gpr_state + 24), R(R9) );
MOV(64, M(saved_gpr_state + 32), R(R10));
MOV(64, M(saved_gpr_state + 40), R(R11));
#ifndef _WIN32
MOV(64, M(saved_gpr_state + 48), R(RSI));
MOV(64, M(saved_gpr_state + 56), R(RDI));
#endif
#else
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
#endif
RET();
load_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
#ifdef _M_X64
MOV(64, R(RCX), M(saved_gpr_state + 0 ));
MOV(64, R(RDX), M(saved_gpr_state + 8 ));
MOV(64, R(R8) , M(saved_gpr_state + 16));
MOV(64, R(R9) , M(saved_gpr_state + 24));
MOV(64, R(R10), M(saved_gpr_state + 32));
MOV(64, R(R11), M(saved_gpr_state + 40));
#ifndef _WIN32
MOV(64, R(RSI), M(saved_gpr_state + 48));
MOV(64, R(RDI), M(saved_gpr_state + 56));
#endif
#else
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
#endif
RET();
}
void Thunk_Reset()
{
thunks.clear();
thunk_code = thunk_memory;
}
void Thunk_Shutdown()
{
Thunk_Reset();
FreeMemoryPages(thunk_memory, THUNK_ARENA_SIZE);
thunk_memory = 0;
thunk_code = 0;
}
void *ProtectFunction(void *function, int num_params)
{
std::map<void *, const u8 *>::iterator iter;
iter = thunks.find(function);
if (iter != thunks.end())
return (void *)iter->second;
if (!thunk_memory)
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
GenContext gen(&thunk_code);
const u8 *call_point = GetCodePtr();
// Make sure to align stack.
#ifdef _M_X64
#ifdef _WIN32
SUB(64, R(ESP), Imm8(0x28));
#else
SUB(64, R(ESP), Imm8(0x8));
#endif
CALL((void*)save_regs);
CALL((void*)function);
CALL((void*)load_regs);
#ifdef _WIN32
ADD(64, R(ESP), Imm8(0x28));
#else
ADD(64, R(ESP), Imm8(0x8));
#endif
RET();
#else
//INT3();
CALL((void*)save_regs);
// Re-push parameters from previous stack frame
for (int i = 0; i < num_params; i++) {
// ESP is changing, so we do not need i
PUSH(32, MDisp(ESP, (num_params) * 4));
}
CALL(function);
if (num_params)
ADD(32, R(ESP), Imm8(num_params * 4));
CALL((void*)load_regs);
RET();
#endif
thunks[function] = call_point;
return (void *)call_point;
}

View File

@ -0,0 +1,39 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _THUNK_H
#define _THUNK_H
// This simple class creates a wrapper around a C/C++ function that saves all fp state
// before entering it, and restores it upon exit. This is required to be able to selectively
// call functions from generated code, without inflicting the performance hit and increase
// of complexity that it means to protect the generated code from this problem.
// This process is called thunking.
// There will only ever be one level of thunking on the stack, plus,
// we don't want to pollute the stack, so we store away regs somewhere global.
// NOT THREAD SAFE. This may only be used from the CPU thread.
// Any other thread using this stuff will be FATAL.
void Thunk_Init();
void Thunk_Reset();
void Thunk_Shutdown();
void *ProtectFunction(void *function, int num_params);
#endif

View File

@ -1156,7 +1156,7 @@ namespace Gen
void MOVDDUP(X64Reg regOp, OpArg arg)
{
// TODO(ector): check SSE3 flag
if (cpu_info.bSSE3NewInstructions)
if (cpu_info.bSSE3)
{
WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3
}
@ -1205,7 +1205,7 @@ namespace Gen
}
void PSHUFB(X64Reg dest, OpArg arg) {
if (!cpu_info.bSSE3NewInstructions) {
if (!cpu_info.bSSSE3) {
PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer.");
}
Write8(0x66);

View File

@ -14,6 +14,9 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
#ifndef _DOLPHIN_INTEL_CODEGEN
#define _DOLPHIN_INTEL_CODEGEN
@ -92,6 +95,26 @@ namespace Gen
const u8 *GetCodePtr();
u8 *GetWritableCodePtr();
// Safe way to temporarily redirect the code generator.
class GenContext
{
u8 **code_ptr_ptr;
u8 *saved_ptr;
public:
GenContext(u8 **code_ptr_ptr_)
{
saved_ptr = GetWritableCodePtr();
code_ptr_ptr = code_ptr_ptr_;
SetCodePtr(*code_ptr_ptr);
}
~GenContext()
{
*code_ptr_ptr = GetWritableCodePtr();
SetCodePtr(saved_ptr);
}
};
enum NormalOp {
nrmADD,
nrmADC,

View File

@ -26,6 +26,7 @@
#include "Console.h"
#include "Core.h"
#include "CPUDetect.h"
#include "CoreTiming.h"
#include "Boot/Boot.h"
#include "PatchEngine.h"
@ -140,7 +141,9 @@ bool Init(const SCoreStartupParameter _CoreParameter)
// all right ... here we go
Host_SetWaitCursor(false);
DisplayMessage("Emulation started.", 3000);
DisplayMessage(cpu_info.CPUBrandString, 3000);
DisplayMessage(_CoreParameter.m_strFilename, 3000);
//RegisterPanicAlertHandler(PanicAlertToVideo);

View File

@ -16,6 +16,7 @@
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../Core.h"
#include "HW.h"
#include "../PowerPC/PowerPC.h"
@ -42,6 +43,7 @@ namespace HW
{
void Init()
{
Thunk_Init(); // not really hw, but this way we know it's inited first :P
// Init the whole Hardware
PixelEngine::Init();
CommandProcessor::Init();
@ -72,5 +74,6 @@ namespace HW
WII_IPC_HLE_Interface::Shutdown();
WII_IPCInterface::Shutdown();
Thunk_Shutdown();
}
}

View File

@ -1054,10 +1054,14 @@ void SDRUpdated()
u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag)
{
PanicAlert("TLB: %s unknown memory (0x%08x)\n"
"This is either the game crashing randomly, or a TLB write."
"Several games uses the TLB to map memory. This\n"
"function is not support in dolphin. Cheers!",
"function is not supported in Dolphin. "
"Also, unfortunately there is no way to recover from this error,"
"so Dolphin will now exit abruptly. Sorry!",
_Flag == FLAG_WRITE ? "Write to" : "Read from", _Address);
exit(0);
u32 sr = PowerPC::ppcState.sr[EA_SR(_Address)];
u32 offset = EA_Offset(_Address); // 12 bit

View File

@ -132,7 +132,7 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress)
if ((_uValue != 0x80000001) && (_uValue != 0x80000005)) // DVDLowReset
{
TCHAR szTemp[256];
sprintf(szTemp, "Unknown write to PI_RESET_CODE (%08x)", _uValue);
sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue);
PanicAlert(szTemp);
}
}

View File

@ -145,7 +145,7 @@ void CInterpreter::lmw(UGeckoInstruction _inst)
u32 TempReg = Memory::Read_U32(uAddress);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
PanicAlert("DSI exception in lmv. This is very bad.");
PanicAlert("DSI exception in lmv.");
return;
}
@ -500,9 +500,9 @@ void CInterpreter::lswi(UGeckoInstruction _inst)
u32 n;
if (_inst.NB == 0)
n=32;
n = 32;
else
n=_inst.NB;
n = _inst.NB;
int r = _inst.RD - 1;
int i = 0;
@ -511,22 +511,22 @@ void CInterpreter::lswi(UGeckoInstruction _inst)
if (i==0)
{
r++;
r&=31;
r &= 31;
m_GPR[r] = 0;
}
u32 TempValue = Memory::Read_U8(EA) << (24-i);
u32 TempValue = Memory::Read_U8(EA) << (24 - i);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
PanicAlert("DSI exception in lsw. This is very bad.");
PanicAlert("DSI exception in lsw.");
return;
}
m_GPR[r] |= TempValue;
i+=8;
if (i==32)
i=0;
i += 8;
if (i == 32)
i = 0;
EA++;
n--;
}
@ -546,26 +546,26 @@ void CInterpreter::stswi(UGeckoInstruction _inst)
u32 n;
if (_inst.NB == 0)
n=32;
n = 32;
else
n=_inst.NB;
n = _inst.NB;
int r = _inst.RS - 1;
int i = 0;
while (n>0)
while (n > 0)
{
if (i==0)
if (i == 0)
{
r++;
r&=31;
r &= 31;
}
Memory::Write_U8((m_GPR[r] >> (24-i)) & 0xFF, EA);
Memory::Write_U8((m_GPR[r] >> (24 - i)) & 0xFF, EA);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
return;
i+=8;
if (i==32)
i=0;
i += 8;
if (i == 32)
i = 0;
EA++;
n--;
}

View File

@ -231,9 +231,8 @@ namespace Jit64
void WriteCallInterpreter(UGeckoInstruction _inst)
{
gpr.Flush(js.op);
if (PPCTables::UsesFPU(_inst))
fpr.Flush(js.op);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
if (js.isLastInstruction)
{
MOV(32, M(&PC), Imm32(js.compilerPC));
@ -250,7 +249,8 @@ namespace Jit64
void HLEFunction(UGeckoInstruction _inst)
{
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
MOV(32, R(EAX), M(&NPC));
WriteExitDestInEAX(0);

View File

@ -24,6 +24,7 @@
#include "x64Emitter.h"
#include "ABI.h"
#include "Thunk.h"
#include "x64Analyzer.h"
#include "StringUtil.h"
@ -109,7 +110,7 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
// break;
case 4:
// THIS FUNCTION CANNOT TOUCH FLOATING POINT REGISTERS.
CALL((void *)&Memory::Read_U32);
CALL(ProtectFunction((void *)&Memory::Read_U32, 1));
break;
default:
BackPatchError(StringFromFormat("We don't handle the size %i yet in backpatch", info.operandSize), codePtr, emAddress);

View File

@ -259,12 +259,12 @@ namespace Jit64
{
#ifdef _M_X64
#ifdef _WIN32
RSI, RDI, R12, R13, R14, R8, R9, RDX, R10, R11 //, RCX
RSI, RDI, R12, R13, R14, R8, R9, R10, R11 //, RCX
#else
R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX
#endif
#elif _M_IX86
ESI, EDI, EBX, EBP, EDX
ESI, EDI, EBX, EBP, EDX, ECX,
#endif
};
count = sizeof(allocationOrder) / sizeof(const int);
@ -412,10 +412,7 @@ namespace Jit64
if (regs[i].location.IsSimpleReg())
{
X64Reg xr = RX(i);
if (mode != FLUSH_VOLATILE || IsXRegVolatile(xr))
{
StoreFromX64(i);
}
xregs[xr].dirty = false;
}
else if (regs[i].location.IsImm())

View File

@ -25,7 +25,6 @@ namespace Jit64
using namespace Gen;
enum FlushMode
{
FLUSH_VOLATILE,
// FLUSH_ALLNONSTATIC,
FLUSH_ALL
};

View File

@ -48,7 +48,7 @@ namespace Jit64
void rfi(UGeckoInstruction _inst)
{
FlushRegCaches();
//Bits SRR1[0,5-9,16<31>23, 25<32>27, 30<33>31] are placed into the corresponding bits of the MSR.
//Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR.
//MSR[13] is set to 0.
const int mask = 0x87C0FF73;
// MSR = (MSR & ~mask) | (SRR1 & mask);
@ -105,7 +105,7 @@ namespace Jit64
const bool only_counter_check = ((inst.BO >> 4) & 1);
const bool only_condition_check = ((inst.BO >> 2) & 1);
if (only_condition_check && only_counter_check)
PanicAlert("Stupid bcx encountered. Likely bad or corrupt code.");
PanicAlert("Bizarre bcx encountered. Likely bad or corrupt code.");
bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0;
bool ctrDecremented = false;
@ -182,7 +182,7 @@ namespace Jit64
bool fastway = true;
if((inst.BO & 16) == 0)
if ((inst.BO & 16) == 0)
{
PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex);
_assert_msg_(DYNA_REC, 0, "Bizarro bcctrx");

View File

@ -27,6 +27,7 @@
#ifdef _WIN32
#define INSTRUCTION_START
//#define INSTRUCTION_START Default(inst); return;
#else
#define INSTRUCTION_START Default(inst); return;
#endif

View File

@ -317,8 +317,7 @@ namespace Jit64
{
INSTRUCTION_START;
int a = inst.RA, d = inst.RD;
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.FlushLockX(ECX);
gpr.Lock(a, d);
if (a != d)
gpr.LoadToX64(d, false, true);
@ -376,8 +375,7 @@ namespace Jit64
{
INSTRUCTION_START;
int a = inst.RA, d = inst.RD;
gpr.FlushR(EDX);
gpr.LockX(EDX);
gpr.FlushLockX(EDX);
gpr.Lock(a, d);
if (d != a) {
gpr.LoadToX64(d, false, true);
@ -396,8 +394,7 @@ namespace Jit64
{
INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(EDX);
gpr.LockX(EDX);
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d);
if (d != a && d != b) {
gpr.LoadToX64(d, false, true);
@ -420,8 +417,7 @@ namespace Jit64
{
INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(EDX);
gpr.LockX(EDX);
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d);
if (d != a && d != b) {
gpr.LoadToX64(d, false, true);
@ -450,8 +446,7 @@ namespace Jit64
Default(inst); return;
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(EDX);
gpr.LockX(EDX);
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d);
if (d != a && d != b) {
gpr.LoadToX64(d, false, true);
@ -534,8 +529,7 @@ namespace Jit64
{
INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.FlushLockX(ECX);
gpr.Lock(a, b, d);
if (d != a && d != b)
gpr.LoadToX64(d, false);
@ -649,8 +643,7 @@ namespace Jit64
}
u32 mask = Helper_Mask(inst.MB, inst.ME);
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.FlushLockX(ECX);
gpr.Lock(a, b, s);
MOV(32, R(EAX), gpr.R(s));
MOV(32, R(ECX), gpr.R(b));
@ -691,8 +684,7 @@ namespace Jit64
int a = inst.RA;
int b = inst.RB;
int s = inst.RS;
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.FlushLockX(ECX);
gpr.Lock(a, b, s);
gpr.LoadToX64(a, a == s || a == b || s == b, true);
MOV(32, R(ECX), gpr.R(b));
@ -719,8 +711,7 @@ namespace Jit64
int a = inst.RA;
int b = inst.RB;
int s = inst.RS;
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.FlushLockX(ECX);
gpr.Lock(a, b, s);
gpr.LoadToX64(a, a == s || a == b || s == b, true);
MOV(32, R(ECX), gpr.R(b));

View File

@ -19,6 +19,7 @@
// Should give a very noticable speed boost to paired single heavy code.
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"
@ -82,9 +83,9 @@ namespace Jit64
SetJumpTarget(argh);
switch (accessSize)
{
case 32: ABI_CallFunctionR((void *)&Memory::Read_U32, reg); break;
case 16: ABI_CallFunctionR((void *)&Memory::Read_U16, reg); break;
case 8: ABI_CallFunctionR((void *)&Memory::Read_U8, reg); break;
case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
}
SetJumpTarget(arg2);
}
@ -97,9 +98,9 @@ namespace Jit64
BSWAP(32, reg_value);
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base), R(reg_value));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#else
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, 0), R(reg_value));
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
#endif
}
@ -113,17 +114,16 @@ namespace Jit64
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
FixupBranch arg2 = J();
SetJumpTarget(argh);
ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2);
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
SetJumpTarget(arg2);
}
void lbzx(UGeckoInstruction inst)
{
INSTRUCTION_START;
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
@ -134,6 +134,7 @@ namespace Jit64
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
}
void lXz(UGeckoInstruction inst)
@ -145,7 +146,6 @@ namespace Jit64
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
// Will give nice boost to dual core mode
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
if (!Core::GetStartupParameter().bUseDualCore &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
@ -183,8 +183,6 @@ namespace Jit64
if (true) {
#endif
// Safe and boring
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
@ -221,8 +219,6 @@ namespace Jit64
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
@ -272,8 +268,6 @@ namespace Jit64
s32 offset = (s32)(s16)inst.SIMM_16;
if (a || update)
{
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
int accessSize;
switch (inst.OPCD & ~1)
{
@ -358,6 +352,7 @@ namespace Jit64
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
{
gpr.LoadToX64(a, true, true);
MOV(32, gpr.R(a), R(ABI_PARAM2));
}
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
@ -380,9 +375,9 @@ namespace Jit64
SetJumpTarget(argh);
switch (accessSize)
{
case 32: ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); break;
case 16: ABI_CallFunctionRR((void *)&Memory::Write_U16, ABI_PARAM1, ABI_PARAM2); break;
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break;
case 32: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
case 16: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
case 8: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
}
SetJumpTarget(arg2);
gpr.UnlockAll();

View File

@ -65,7 +65,6 @@ static u32 GC_ALIGNED16(temp32);
void lfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
int d = inst.RD;
int a = inst.RA;
if (!a)
@ -74,15 +73,8 @@ void lfs(UGeckoInstruction inst)
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
if (jo.noAssumeFPLoadFromMem) {
// We might call a function.
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
}
gpr.Lock(d, a);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
if (!jo.noAssumeFPLoadFromMem)
{
@ -103,6 +95,7 @@ void lfs(UGeckoInstruction inst)
fpr.UnlockAll();
}
void lfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -115,11 +108,12 @@ void lfd(UGeckoInstruction inst)
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
fpr.LoadToX64(d, false);
fpr.Lock(d);
if (cpu_info.bSSE3NewInstructions) {
if (cpu_info.bSSSE3) {
X64Reg xd = fpr.RX(d);
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
@ -130,13 +124,14 @@ void lfd(UGeckoInstruction inst)
MOVDDUP(fpr.RX(d), M(&temp64));
}
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
void stfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
if (!cpu_info.bSSSE3NewInstructions)
if (!cpu_info.bSSSE3)
{
DISABLE_32BIT;
}
@ -148,14 +143,14 @@ void stfd(UGeckoInstruction inst)
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_IX86
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
#endif
if (cpu_info.bSSSE3NewInstructions) {
if (cpu_info.bSSSE3) {
MOVAPS(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
#ifdef _M_X64
@ -175,6 +170,7 @@ void stfd(UGeckoInstruction inst)
fpr.UnlockAll();
}
void stfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -185,12 +181,11 @@ void stfs(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16;
if (a && !update)
{
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM2), gpr.R(a));
ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset)
{
MOV(32, gpr.R(a), R(ABI_PARAM2));
@ -198,7 +193,7 @@ void stfs(UGeckoInstruction inst)
CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset);
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
@ -209,6 +204,7 @@ void stfs(UGeckoInstruction inst)
}
}
void lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -217,8 +213,7 @@ void lfsx(UGeckoInstruction inst)
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
if (cpu_info.bSSSE3NewInstructions) {
// PanicAlert("SSE3 supported!");
if (cpu_info.bSSSE3) {
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));

View File

@ -20,6 +20,7 @@
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"
#include "../../HW/GPFifo.h"
@ -36,7 +37,7 @@
#include "JitAsm.h"
#include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return;
//#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
#ifdef _M_IX86
@ -125,8 +126,6 @@ void psq_st(UGeckoInstruction inst)
if (stType == QUANTIZE_FLOAT)
{
DISABLE_32BIT;
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
@ -147,7 +146,7 @@ void psq_st(UGeckoInstruction inst)
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL((void *)&WriteDual32);
CALL(ProtectFunction((void *)&WriteDual32, 0));
SetJumpTarget(arg2);
gpr.UnlockAll();
gpr.UnlockAllX();
@ -255,7 +254,7 @@ void psq_l(UGeckoInstruction inst)
#ifdef _M_X64
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
if (cpu_info.bSSSE3NewInstructions) {
if (cpu_info.bSSSE3) {
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
@ -272,7 +271,7 @@ void psq_l(UGeckoInstruction inst)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#else
if (cpu_info.bSSSE3NewInstructions) {
if (cpu_info.bSSSE3) {
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
@ -282,8 +281,7 @@ void psq_l(UGeckoInstruction inst)
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.FlushLockX(ECX);
gpr.LoadToX64(inst.RA);
// This can probably be optimized somewhat.
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));

View File

@ -93,8 +93,6 @@ namespace Jit64
fpr.UnlockAll();
}
//add a, b, c
//mov a, b

View File

@ -59,7 +59,7 @@ namespace Jit64
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
js.blockSetsQuantizers = false;
js.blockSetsQuantizers = true;
// Prevent recompiler from compiling in old quantizer values.
// TODO - actually save the set state and use it in following quantizer ops.
break;

View File

@ -45,7 +45,7 @@ bool DolphinApp::OnInit()
#ifdef _WIN32
// TODO: if First Boot
if (!cpu_info.bSSE2Extensions)
if (!cpu_info.bSSE2)
{
MessageBox(0, _T("Hi,\n\nDolphin requires that your CPU has support for SSE2 extensions.\n"
"Unfortunately your CPU does not support them, so Dolphin will not run.\n\n"