New LockThreads option. Also added new INI core option - OptimizeQuantizers. Set to False to work around Resident Evil 1 bug (this will slow down other games somewhat).

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@20 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-07-17 18:51:53 +00:00
parent b673d8d770
commit d23af1a15e
10 changed files with 358 additions and 279 deletions

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="Windows-1252"?> <?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject <VisualStudioProject
ProjectType="Visual C++" ProjectType="Visual C++"
Version="8,00" Version="8.00"
Name="zlib" Name="zlib"
ProjectGUID="{3E03C179-8251-46E4-81F4-466F114BAC63}" ProjectGUID="{3E03C179-8251-46E4-81F4-466F114BAC63}"
RootNamespace="zlib" RootNamespace="zlib"
@ -24,6 +24,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="1" CharacterSet="1"
WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -86,6 +87,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="1" CharacterSet="1"
WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -149,7 +151,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="1" CharacterSet="1"
WholeProgramOptimization="1" WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -213,7 +215,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="1" CharacterSet="1"
WholeProgramOptimization="1" WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -278,7 +280,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="1" CharacterSet="1"
WholeProgramOptimization="1" WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -342,7 +344,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="1" CharacterSet="1"
WholeProgramOptimization="1" WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"

View File

@ -885,6 +885,10 @@
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStore.cpp" RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStore.cpp"
> >
</File> </File>
<File
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStorePaired.cpp"
>
</File>
<File <File
RelativePath=".\Src\PowerPC\Jit64\Jit_Paired.cpp" RelativePath=".\Src\PowerPC\Jit64\Jit_Paired.cpp"
> >

View File

@ -182,6 +182,7 @@ THREAD_RETURN CpuThread(void *pArg)
CPUCompare::ConnectAsClient(); CPUCompare::ConnectAsClient();
} }
if (_CoreParameter.bLockThreads)
Common::Thread::SetCurrentThreadAffinity(1); //Force to first core Common::Thread::SetCurrentThreadAffinity(1); //Force to first core
// Let's run under memory watch // Let's run under memory watch
@ -208,6 +209,7 @@ THREAD_RETURN EmuThread(void *pArg)
Common::SetCurrentThreadName("Emuthread - starting"); Common::SetCurrentThreadName("Emuthread - starting");
const SCoreStartupParameter& _CoreParameter = *(SCoreStartupParameter*)pArg; const SCoreStartupParameter& _CoreParameter = *(SCoreStartupParameter*)pArg;
if (_CoreParameter.bLockThreads)
Common::Thread::SetCurrentThreadAffinity(2); //Force to second core Common::Thread::SetCurrentThreadAffinity(2); //Force to second core
LOG(OSREPORT, "Starting core = %s mode", _CoreParameter.bWii ? "Wii" : "Gamecube"); LOG(OSREPORT, "Starting core = %s mode", _CoreParameter.bWii ? "Wii" : "Gamecube");
@ -222,7 +224,7 @@ THREAD_RETURN EmuThread(void *pArg)
VideoInitialize.pGetMemoryPointer = Memory::GetPointer; VideoInitialize.pGetMemoryPointer = Memory::GetPointer;
VideoInitialize.pSetPEToken = PixelEngine::SetToken; VideoInitialize.pSetPEToken = PixelEngine::SetToken;
VideoInitialize.pSetPEFinish = PixelEngine::SetFinish; VideoInitialize.pSetPEFinish = PixelEngine::SetFinish;
VideoInitialize.pWindowHandle = _CoreParameter.hMainWindow; // NULL; // filled by video_initialize VideoInitialize.pWindowHandle = NULL; // _CoreParameter.hMainWindow; // NULL; // filled by video_initialize
VideoInitialize.pLog = Callback_VideoLog; VideoInitialize.pLog = Callback_VideoLog;
VideoInitialize.pRequestWindowSize = NULL; //Callback_VideoRequestWindowSize; VideoInitialize.pRequestWindowSize = NULL; //Callback_VideoRequestWindowSize;
VideoInitialize.pCopiedToXFB = Callback_VideoCopiedToXFB; VideoInitialize.pCopiedToXFB = Callback_VideoCopiedToXFB;

View File

@ -32,6 +32,7 @@ void SCoreStartupParameter::LoadDefaults()
bUseDynarec = false; bUseDynarec = false;
bUseDualCore = false; bUseDualCore = false;
bRunCompareServer = false; bRunCompareServer = false;
bLockThreads = true;
bWii = false; bWii = false;
} }

View File

@ -41,6 +41,8 @@ struct SCoreStartupParameter
bool bHLEBios; bool bHLEBios;
bool bThrottle; bool bThrottle;
bool bUseFastMem; bool bUseFastMem;
bool bLockThreads;
bool bOptimizeQuantizers;
bool bRunCompareServer; bool bRunCompareServer;
bool bRunCompareClient; bool bRunCompareClient;

View File

@ -43,6 +43,9 @@
namespace Jit64 namespace Jit64
{ {
static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32);
#ifdef _M_X64 #ifdef _M_X64
void SafeLoadECXtoEAX(int accessSize, s32 offset) void SafeLoadECXtoEAX(int accessSize, s32 offset)
{ {
@ -119,11 +122,6 @@ namespace Jit64
gpr.UnlockAll(); gpr.UnlockAll();
} }
void SafeStoreECXtoEDX(int accessSize, int offset)
{
}
void lXz(UGeckoInstruction inst) void lXz(UGeckoInstruction inst)
{ {
int d = inst.RD; int d = inst.RD;
@ -209,9 +207,6 @@ namespace Jit64
gpr.UnlockAll(); gpr.UnlockAll();
} }
u32 GC_ALIGNED16(temp32);
u64 GC_ALIGNED16(temp64);
void lfs(UGeckoInstruction inst) void lfs(UGeckoInstruction inst)
{ {
// BIT32OLD; // BIT32OLD;
@ -252,7 +247,6 @@ namespace Jit64
fpr.UnlockAll(); fpr.UnlockAll();
} }
void lfd(UGeckoInstruction inst) void lfd(UGeckoInstruction inst)
{ {
BIT32OLD; BIT32OLD;
@ -301,8 +295,6 @@ namespace Jit64
fpr.UnlockAll(); fpr.UnlockAll();
} }
double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
void stfs(UGeckoInstruction inst) void stfs(UGeckoInstruction inst)
{ {
BIT32OLD; BIT32OLD;
@ -364,255 +356,7 @@ namespace Jit64
fpr.UnlockAll(); fpr.UnlockAll();
} }
// Zero cache line.
// TODO(ector): Improve 64-bit version
void WriteDual32(u64 value, u32 address)
{
Memory::Write_U32((u32)(value>>32), address);
Memory::Write_U32((u32)value, address+4);
}
const double m_quantizeTableD[] =
{
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
};
const double m_dequantizeTableD[] =
{
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
};
u32 temp;
void psq_st(UGeckoInstruction inst)
{
BIT32OLD;
OLD;
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
int stScale = gqr.ST_SCALE;
bool update = inst.OPCD == 61;
if (!inst.RA || inst.W)
{
// PanicAlert(inst.RA ? "W" : "inst");
Default(inst);
return;
}
int offset = inst.SIMM_12;
int a = inst.RA;
int s = inst.RS; // Fp numbers
if (stType == QUANTIZE_FLOAT)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, true);
MOV(32, R(EDX), gpr.R(a));
if (offset)
ADD(32, R(EDX), Imm32((u32)offset));
TEST(32, R(EDX), Imm32(0x0C000000));
if (update && offset)
MOV(32, gpr.R(a), R(EDX));
CVTPD2PS(XMM0, fpr.R(s));
SHUFPS(XMM0, R(XMM0), 1);
MOVAPS(M(&temp64), XMM0);
MOV(64, R(ECX), M(&temp64));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(64, ECX);
MOV(64, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL((void *)&WriteDual32);
SetJumpTarget(arg2);
if (update)
MOV(32, gpr.R(a), R(EDX));
gpr.UnlockAll();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_U8)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, update);
MOV(32, R(EDX), gpr.R(a));
if (offset)
ADD(32,R(EDX),Imm32((u32)offset));
MOVAPS(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
CVTPD2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVAPS(M(&temp64), XMM0);
MOV(16, R(ECX), M(&temp64));
#ifdef _M_X64
MOV(16, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
#else
BSWAP(32, ECX);
SHR(32, R(ECX), Imm8(16));
CALL(&Memory::Write_U16);
#endif
if (update)
MOV(32, gpr.R(a), R(EDX));
gpr.UnlockAll();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_S16)
{
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, update);
MOV(32, R(EDX), gpr.R(a));
if (offset)
ADD(32,R(EDX),Imm32((u32)offset));
MOVAPS(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
SHUFPD(XMM0, R(XMM0), 1);
CVTPD2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(M(&temp64), XMM0);
MOV(32, R(ECX), M(&temp64));
#ifdef _M_X64
BSWAP(32, ECX);
MOV(32, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
#else
BSWAP(32, ECX);
CALL(&Memory::Write_U32);
#endif
if (update)
MOV(32, gpr.R(a), R(EDX));
gpr.UnlockAll();
fpr.UnlockAll();
}
else {
// Dodger uses this.
PanicAlert("st %i:%i", stType, inst.W);
Default(inst);
}
}
void psq_l(UGeckoInstruction inst)
{
BIT32OLD;
OLD;
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
int ldScale = gqr.LD_SCALE;
if (!inst.RA || inst.W)
{
// 0 1 during load
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}
bool update = inst.OPCD == 57;
int offset = inst.SIMM_12;
//INT3();
switch (ldType) {
#ifdef _M_X64
case QUANTIZE_FLOAT:
{
gpr.LoadToX64(inst.RA);
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(64, RAX);
MOV(64, M(&psTemp[0]),R(RAX));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r),1);
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
}
case QUANTIZE_U8:
{
gpr.LoadToX64(inst.RA);
XOR(32, R(EAX), R(EAX));
MOV(16, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
MOV(32, M(&temp64), R(EAX));
MOVD_xmm(XMM0, M(&temp64));
// SSE4 optimization opportunity here.
PXOR(XMM1, R(XMM1));
PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PD(XMM0, R(XMM0));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
case QUANTIZE_S16:
{
gpr.LoadToX64(inst.RA);
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX));
//INT3();
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVD_xmm(XMM0, M(&temp64));
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
CVTDQ2PD(XMM0, R(XMM0));
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
SHUFPD(r, R(r), 1);
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
#endif
default:
// 4 0
PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
}
void dcbz(UGeckoInstruction inst) void dcbz(UGeckoInstruction inst)
{ {
#ifdef _M_IX86 #ifdef _M_IX86

View File

@ -0,0 +1,312 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
// Should give a very noticable speed boost to paired single heavy code.
#include "../PowerPC.h"
#include "../../Core.h"
#include "../../HW/GPFifo.h"
#include "../../HW/CommandProcessor.h"
#include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h"
#include "../PPCTables.h"
#include "x64Emitter.h"
#include "Jit.h"
#include "JitCache.h"
#include "JitAsm.h"
#include "JitRegCache.h"
#define OLD
//#define OLD Default(inst); return;
#ifdef _M_IX86
#define BIT32OLD Default(inst); return;
#else
#define BIT32OLD ;
#endif
namespace Jit64 {
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32);
// TODO(ector): Improve 64-bit version
void WriteDual32(u64 value, u32 address)
{
Memory::Write_U32((u32)(value>>32), address);
Memory::Write_U32((u32)value, address+4);
}
const double m_quantizeTableD[] =
{
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
};
const double m_dequantizeTableD[] =
{
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
};
u32 temp;
void psq_st(UGeckoInstruction inst)
{
BIT32OLD;
OLD;
if (!Core::GetStartupParameter().bOptimizeQuantizers)
{
Default(inst);
return;
}
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
int stScale = gqr.ST_SCALE;
bool update = inst.OPCD == 61;
if (!inst.RA || inst.W)
{
// PanicAlert(inst.RA ? "W" : "inst");
Default(inst);
return;
}
int offset = inst.SIMM_12;
int a = inst.RA;
int s = inst.RS; // Fp numbers
if (stType == QUANTIZE_FLOAT)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, true);
MOV(32, R(EDX), gpr.R(a));
if (offset)
ADD(32, R(EDX), Imm32((u32)offset));
TEST(32, R(EDX), Imm32(0x0C000000));
if (update && offset)
MOV(32, gpr.R(a), R(EDX));
CVTPD2PS(XMM0, fpr.R(s));
SHUFPS(XMM0, R(XMM0), 1);
MOVAPS(M(&temp64), XMM0);
MOV(64, R(ECX), M(&temp64));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(64, ECX);
MOV(64, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL((void *)&WriteDual32);
SetJumpTarget(arg2);
if (update)
MOV(32, gpr.R(a), R(EDX));
gpr.UnlockAll();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_U8)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, update);
MOV(32, R(EDX), gpr.R(a));
if (offset)
ADD(32,R(EDX),Imm32((u32)offset));
MOVAPS(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
CVTPD2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVAPS(M(&temp64), XMM0);
MOV(16, R(ECX), M(&temp64));
#ifdef _M_X64
MOV(16, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
#else
BSWAP(32, ECX);
SHR(32, R(ECX), Imm8(16));
CALL(&Memory::Write_U16);
#endif
if (update)
MOV(32, gpr.R(a), R(EDX));
gpr.UnlockAll();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_S16)
{
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, update);
MOV(32, R(EDX), gpr.R(a));
if (offset)
ADD(32,R(EDX),Imm32((u32)offset));
MOVAPS(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
SHUFPD(XMM0, R(XMM0), 1);
CVTPD2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(M(&temp64), XMM0);
MOV(32, R(ECX), M(&temp64));
#ifdef _M_X64
BSWAP(32, ECX);
MOV(32, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
#else
BSWAP(32, ECX);
CALL(&Memory::Write_U32);
#endif
if (update)
MOV(32, gpr.R(a), R(EDX));
gpr.UnlockAll();
fpr.UnlockAll();
}
else {
// Dodger uses this.
PanicAlert("st %i:%i", stType, inst.W);
Default(inst);
}
}
void psq_l(UGeckoInstruction inst)
{
BIT32OLD;
OLD;
if (!Core::GetStartupParameter().bOptimizeQuantizers)
{
Default(inst);
return;
}
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
int ldScale = gqr.LD_SCALE;
if (!inst.RA || inst.W)
{
// 0 1 during load
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}
bool update = inst.OPCD == 57;
int offset = inst.SIMM_12;
//INT3();
switch (ldType) {
#ifdef _M_X64
case QUANTIZE_FLOAT:
{
gpr.LoadToX64(inst.RA);
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(64, RAX);
MOV(64, M(&psTemp[0]),R(RAX));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r),1);
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
}
case QUANTIZE_U8:
{
gpr.LoadToX64(inst.RA);
XOR(32, R(EAX), R(EAX));
MOV(16, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
MOV(32, M(&temp64), R(EAX));
MOVD_xmm(XMM0, M(&temp64));
// SSE4 optimization opportunity here.
PXOR(XMM1, R(XMM1));
PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PD(XMM0, R(XMM0));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
case QUANTIZE_S16:
{
gpr.LoadToX64(inst.RA);
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX));
//INT3();
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVD_xmm(XMM0, M(&temp64));
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
CVTDQ2PD(XMM0, R(XMM0));
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
SHUFPD(r, R(r), 1);
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
#endif
default:
// 4 0
// 6 0 //power tennis
// 5 0
PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
}
} // namespace

View File

@ -24,6 +24,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="2" CharacterSet="2"
WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -43,6 +44,7 @@
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
Optimization="0" Optimization="0"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src" AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__WXMSW__" PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__WXMSW__"
MinimalRebuild="true" MinimalRebuild="true"
@ -88,6 +90,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="2" CharacterSet="2"
WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -108,6 +111,7 @@
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
Optimization="0" Optimization="0"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src" AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__WXMSW__" PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__WXMSW__"
MinimalRebuild="true" MinimalRebuild="true"
@ -153,7 +157,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="2" CharacterSet="2"
WholeProgramOptimization="1" WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -172,6 +176,7 @@
/> />
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src" AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__WXMSW__" PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__WXMSW__"
RuntimeLibrary="0" RuntimeLibrary="0"
@ -215,7 +220,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="2" CharacterSet="2"
WholeProgramOptimization="1" WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -235,6 +240,7 @@
/> />
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src" AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__WXMSW__" PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__WXMSW__"
RuntimeLibrary="0" RuntimeLibrary="0"
@ -279,6 +285,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="2" CharacterSet="2"
WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -298,6 +305,7 @@
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
Optimization="0" Optimization="0"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src" AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
PreprocessorDefinitions="WIN32;__WXMSW__;_WINDOWS;NOPCH;_SECURE_SCL=0;_CRT_SECURE_NO_WARNINGS" PreprocessorDefinitions="WIN32;__WXMSW__;_WINDOWS;NOPCH;_SECURE_SCL=0;_CRT_SECURE_NO_WARNINGS"
MinimalRebuild="true" MinimalRebuild="true"
@ -343,6 +351,7 @@
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)" IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="4" ConfigurationType="4"
CharacterSet="2" CharacterSet="2"
WholeProgramOptimization="0"
> >
<Tool <Tool
Name="VCPreBuildEventTool" Name="VCPreBuildEventTool"
@ -363,6 +372,7 @@
<Tool <Tool
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
Optimization="0" Optimization="0"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src" AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
PreprocessorDefinitions="WIN32;__WXMSW__;_WINDOWS;NOPCH;_SECURE_SCL=0;_CRT_SECURE_NO_WARNINGS" PreprocessorDefinitions="WIN32;__WXMSW__;_WINDOWS;NOPCH;_SECURE_SCL=0;_CRT_SECURE_NO_WARNINGS"
MinimalRebuild="true" MinimalRebuild="true"

View File

@ -67,7 +67,9 @@ void SConfig::SaveSettings()
ini.Set("Core", "UseDynarec", m_LocalCoreStartupParameter.bUseDynarec); ini.Set("Core", "UseDynarec", m_LocalCoreStartupParameter.bUseDynarec);
ini.Set("Core", "UseDualCore", m_LocalCoreStartupParameter.bUseDualCore); ini.Set("Core", "UseDualCore", m_LocalCoreStartupParameter.bUseDualCore);
ini.Set("Core", "Throttle", m_LocalCoreStartupParameter.bThrottle); ini.Set("Core", "Throttle", m_LocalCoreStartupParameter.bThrottle);
ini.Set("Core", "LockThreads", m_LocalCoreStartupParameter.bLockThreads);
ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM); ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM);
ini.Set("Core", "OptimizeQuantizers", m_LocalCoreStartupParameter.bOptimizeQuantizers);
} }
ini.Save("Dolphin.ini"); ini.Save("Dolphin.ini");
@ -115,7 +117,7 @@ void SConfig::LoadSettings()
ini.Get("Core", "UseDynarec", &m_LocalCoreStartupParameter.bUseDynarec, false); ini.Get("Core", "UseDynarec", &m_LocalCoreStartupParameter.bUseDynarec, false);
ini.Get("Core", "UseDualCore", &m_LocalCoreStartupParameter.bUseDualCore, false); ini.Get("Core", "UseDualCore", &m_LocalCoreStartupParameter.bUseDualCore, false);
ini.Get("Core", "Throttle", &m_LocalCoreStartupParameter.bThrottle, true); ini.Get("Core", "Throttle", &m_LocalCoreStartupParameter.bThrottle, true);
ini.Get("Core", "LockThreads", &m_LocalCoreStartupParameter.bLockThreads, true);
ini.Get("Core", "OptimizeQuantizers", &m_LocalCoreStartupParameter.bOptimizeQuantizers, true);
} }
} }