mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 23:59:27 +01:00
Merge pull request #6197 from degasus/GP
Jit: Optimize gather pipe usage.
This commit is contained in:
commit
9178a6e636
@ -13,6 +13,7 @@
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/HW/ProcessorInterface.h"
|
||||
#include "Core/PowerPC/JitInterface.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
#include "VideoCommon/CommandProcessor.h"
|
||||
|
||||
namespace GPFifo
|
||||
@ -31,17 +32,14 @@ namespace GPFifo
|
||||
// More room for the fastmodes
|
||||
alignas(32) static u8 s_gather_pipe[GATHER_PIPE_SIZE * 16];
|
||||
|
||||
// pipe pointer
|
||||
u8* g_gather_pipe_ptr = s_gather_pipe;
|
||||
|
||||
static size_t GetGatherPipeCount()
|
||||
{
|
||||
return g_gather_pipe_ptr - s_gather_pipe;
|
||||
return PowerPC::ppcState.gather_pipe_ptr - s_gather_pipe;
|
||||
}
|
||||
|
||||
static void SetGatherPipeCount(size_t size)
|
||||
{
|
||||
g_gather_pipe_ptr = s_gather_pipe + size;
|
||||
PowerPC::ppcState.gather_pipe_ptr = s_gather_pipe + size;
|
||||
}
|
||||
|
||||
void DoState(PointerWrap& p)
|
||||
@ -55,6 +53,7 @@ void DoState(PointerWrap& p)
|
||||
void Init()
|
||||
{
|
||||
ResetGatherPipe();
|
||||
PowerPC::ppcState.gather_pipe_base_ptr = s_gather_pipe;
|
||||
memset(s_gather_pipe, 0, sizeof(s_gather_pipe));
|
||||
}
|
||||
|
||||
@ -68,7 +67,7 @@ void ResetGatherPipe()
|
||||
SetGatherPipeCount(0);
|
||||
}
|
||||
|
||||
static void UpdateGatherPipe()
|
||||
void UpdateGatherPipe()
|
||||
{
|
||||
size_t pipe_count = GetGatherPipeCount();
|
||||
size_t processed;
|
||||
@ -144,29 +143,29 @@ void Write64(const u64 value)
|
||||
|
||||
void FastWrite8(const u8 value)
|
||||
{
|
||||
*g_gather_pipe_ptr = value;
|
||||
g_gather_pipe_ptr += sizeof(u8);
|
||||
*PowerPC::ppcState.gather_pipe_ptr = value;
|
||||
PowerPC::ppcState.gather_pipe_ptr += sizeof(u8);
|
||||
}
|
||||
|
||||
void FastWrite16(u16 value)
|
||||
{
|
||||
value = Common::swap16(value);
|
||||
std::memcpy(g_gather_pipe_ptr, &value, sizeof(u16));
|
||||
g_gather_pipe_ptr += sizeof(u16);
|
||||
std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u16));
|
||||
PowerPC::ppcState.gather_pipe_ptr += sizeof(u16);
|
||||
}
|
||||
|
||||
void FastWrite32(u32 value)
|
||||
{
|
||||
value = Common::swap32(value);
|
||||
std::memcpy(g_gather_pipe_ptr, &value, sizeof(u32));
|
||||
g_gather_pipe_ptr += sizeof(u32);
|
||||
std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u32));
|
||||
PowerPC::ppcState.gather_pipe_ptr += sizeof(u32);
|
||||
}
|
||||
|
||||
void FastWrite64(u64 value)
|
||||
{
|
||||
value = Common::swap64(value);
|
||||
std::memcpy(g_gather_pipe_ptr, &value, sizeof(u64));
|
||||
g_gather_pipe_ptr += sizeof(u64);
|
||||
std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u64));
|
||||
PowerPC::ppcState.gather_pipe_ptr += sizeof(u64);
|
||||
}
|
||||
|
||||
} // end of namespace GPFifo
|
||||
|
@ -15,15 +15,13 @@ enum
|
||||
GATHER_PIPE_SIZE = 32
|
||||
};
|
||||
|
||||
// pipe pointer for JIT access
|
||||
extern u8* g_gather_pipe_ptr;
|
||||
|
||||
// Init
|
||||
void Init();
|
||||
void DoState(PointerWrap& p);
|
||||
|
||||
// ResetGatherPipe
|
||||
void ResetGatherPipe();
|
||||
void UpdateGatherPipe();
|
||||
void CheckGatherPipe();
|
||||
void FastCheckGatherPipe();
|
||||
|
||||
|
@ -355,9 +355,14 @@ bool Jit64::Cleanup()
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||
{
|
||||
MOV(64, R(RSCRATCH), PPCSTATE(gather_pipe_ptr));
|
||||
SUB(64, R(RSCRATCH), PPCSTATE(gather_pipe_base_ptr));
|
||||
CMP(64, R(RSCRATCH), Imm32(GPFifo::GATHER_PIPE_SIZE));
|
||||
FixupBranch exit = J_CC(CC_L);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(GPFifo::FastCheckGatherPipe);
|
||||
ABI_CallFunction(GPFifo::UpdateGatherPipe);
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
SetJumpTarget(exit);
|
||||
did_something = true;
|
||||
}
|
||||
|
||||
|
@ -222,14 +222,6 @@ void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter)
|
||||
|
||||
void Jit64AsmRoutineManager::GenerateCommon()
|
||||
{
|
||||
fifoDirectWrite8 = AlignCode4();
|
||||
GenFifoWrite(8);
|
||||
fifoDirectWrite16 = AlignCode4();
|
||||
GenFifoWrite(16);
|
||||
fifoDirectWrite32 = AlignCode4();
|
||||
GenFifoWrite(32);
|
||||
fifoDirectWrite64 = AlignCode4();
|
||||
GenFifoWrite(64);
|
||||
frsqrte = AlignCode4();
|
||||
GenFrsqrte();
|
||||
fres = AlignCode4();
|
||||
|
@ -203,28 +203,6 @@ bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int access
|
||||
return offsetAddedToAddress;
|
||||
}
|
||||
|
||||
void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize)
|
||||
{
|
||||
// No need to protect these, they don't touch any state
|
||||
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
||||
switch (accessSize)
|
||||
{
|
||||
case 8:
|
||||
CALL(g_jit->GetAsmRoutines()->fifoDirectWrite8);
|
||||
break;
|
||||
case 16:
|
||||
CALL(g_jit->GetAsmRoutines()->fifoDirectWrite16);
|
||||
break;
|
||||
case 32:
|
||||
CALL(g_jit->GetAsmRoutines()->fifoDirectWrite32);
|
||||
break;
|
||||
case 64:
|
||||
CALL(g_jit->GetAsmRoutines()->fifoDirectWrite64);
|
||||
break;
|
||||
}
|
||||
g_jit->js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
}
|
||||
|
||||
// Visitor that generates code to read a MMIO value.
|
||||
template <typename T>
|
||||
class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>
|
||||
@ -622,10 +600,22 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
||||
// fun tricks...
|
||||
if (g_jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address))
|
||||
{
|
||||
if (!arg.IsSimpleReg(RSCRATCH))
|
||||
MOV(accessSize, R(RSCRATCH), arg);
|
||||
X64Reg arg_reg = RSCRATCH;
|
||||
|
||||
UnsafeWriteGatherPipe(accessSize);
|
||||
// With movbe, we can store inplace without temporary register
|
||||
if (arg.IsSimpleReg() && cpu_info.bMOVBE)
|
||||
arg_reg = arg.GetSimpleReg();
|
||||
|
||||
if (!arg.IsSimpleReg(arg_reg))
|
||||
MOV(accessSize, R(arg_reg), arg);
|
||||
|
||||
// And store it in the gather pipe
|
||||
MOV(64, R(RSCRATCH2), PPCSTATE(gather_pipe_ptr));
|
||||
SwapAndStore(accessSize, MatR(RSCRATCH2), arg_reg);
|
||||
ADD(64, R(RSCRATCH2), Imm8(accessSize >> 3));
|
||||
MOV(64, PPCSTATE(gather_pipe_ptr), R(RSCRATCH2));
|
||||
|
||||
g_jit->js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
return false;
|
||||
}
|
||||
else if (PowerPC::IsOptimizableRAMAddress(address))
|
||||
|
@ -61,7 +61,6 @@ public:
|
||||
|
||||
bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend, Gen::MovInfo* info = nullptr);
|
||||
void UnsafeWriteGatherPipe(int accessSize);
|
||||
|
||||
// Generate a load/write from the MMIO handler for a given address. Only
|
||||
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include "Common/MathUtil.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/HW/GPFifo.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64Base.h"
|
||||
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
|
||||
@ -25,22 +24,6 @@
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
void CommonAsmRoutines::GenFifoWrite(int size)
|
||||
{
|
||||
const void* start = GetCodePtr();
|
||||
|
||||
// Assume value in RSCRATCH
|
||||
MOV(64, R(RSCRATCH2), ImmPtr(&GPFifo::g_gather_pipe_ptr));
|
||||
MOV(64, R(RSCRATCH2), MatR(RSCRATCH2));
|
||||
SwapAndStore(size, MatR(RSCRATCH2), RSCRATCH);
|
||||
MOV(64, R(RSCRATCH), ImmPtr(&GPFifo::g_gather_pipe_ptr));
|
||||
ADD(64, R(RSCRATCH2), Imm8(size >> 3));
|
||||
MOV(64, MatR(RSCRATCH), R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
JitRegister::Register(start, GetCodePtr(), "JIT_FifoWrite_%i", size);
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenFrsqrte()
|
||||
{
|
||||
const void* start = GetCodePtr();
|
||||
|
@ -24,7 +24,6 @@ private:
|
||||
class CommonAsmRoutines : public CommonAsmRoutinesBase, public QuantizedMemoryRoutines
|
||||
{
|
||||
public:
|
||||
void GenFifoWrite(int size);
|
||||
void GenFrsqrte();
|
||||
void GenFres();
|
||||
void GenMfcr();
|
||||
|
@ -231,8 +231,13 @@ void JitArm64::Cleanup()
|
||||
{
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||
{
|
||||
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
||||
LDP(INDEX_SIGNED, X0, X1, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||
SUB(X0, X0, X1);
|
||||
CMP(X0, GPFifo::GATHER_PIPE_SIZE);
|
||||
FixupBranch exit = B(CC_LT);
|
||||
MOVP2R(X0, &GPFifo::UpdateGatherPipe);
|
||||
BLR(X0);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HW/DSP.h"
|
||||
#include "Core/HW/GPFifo.h"
|
||||
#include "Core/HW/MMIO.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||
@ -230,7 +229,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||
|
||||
if (is_immediate && jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
|
||||
{
|
||||
ARM64Reg WA = INVALID_REG;
|
||||
int accessSize;
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
accessSize = 32;
|
||||
@ -239,30 +237,23 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
||||
else
|
||||
accessSize = 8;
|
||||
|
||||
if (accessSize != 8)
|
||||
WA = gpr.GetReg();
|
||||
|
||||
MOVP2R(X1, &GPFifo::g_gather_pipe_ptr);
|
||||
LDR(INDEX_UNSIGNED, X0, X1, 0);
|
||||
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||
if (accessSize == 32)
|
||||
{
|
||||
REV32(WA, RS);
|
||||
STR(INDEX_POST, WA, X0, 4);
|
||||
REV32(W1, RS);
|
||||
STR(INDEX_POST, W1, X0, 4);
|
||||
}
|
||||
else if (accessSize == 16)
|
||||
{
|
||||
REV16(WA, RS);
|
||||
STRH(INDEX_POST, WA, X0, 2);
|
||||
REV16(W1, RS);
|
||||
STRH(INDEX_POST, W1, X0, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
STRB(INDEX_POST, RS, X0, 1);
|
||||
}
|
||||
STR(INDEX_UNSIGNED, X0, X1, 0);
|
||||
STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
|
||||
if (accessSize != 8)
|
||||
gpr.Unlock(WA);
|
||||
}
|
||||
else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HW/GPFifo.h"
|
||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
|
||||
#include "Core/PowerPC/PPCTables.h"
|
||||
@ -357,8 +356,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
else
|
||||
accessSize = 32;
|
||||
|
||||
MOVP2R(X1, &GPFifo::g_gather_pipe_ptr);
|
||||
LDR(INDEX_UNSIGNED, X0, X1, 0);
|
||||
LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F64)
|
||||
{
|
||||
m_float_emit.REV64(8, Q0, V0);
|
||||
@ -375,7 +373,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
|
||||
m_float_emit.STR(accessSize, INDEX_POST, accessSize == 64 ? Q0 : D0, X0, accessSize >> 3);
|
||||
|
||||
STR(INDEX_UNSIGNED, X0, X1, 0);
|
||||
STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||
js.fifoBytesSinceCheck += accessSize >> 3;
|
||||
|
||||
if (update)
|
||||
|
@ -15,11 +15,6 @@ alignas(16) extern const float m_dequantizeTableS[128];
|
||||
class CommonAsmRoutinesBase
|
||||
{
|
||||
public:
|
||||
const u8* fifoDirectWrite8;
|
||||
const u8* fifoDirectWrite16;
|
||||
const u8* fifoDirectWrite32;
|
||||
const u8* fifoDirectWrite64;
|
||||
|
||||
const u8* enterCode;
|
||||
|
||||
const u8* dispatcherMispredictedBLR;
|
||||
|
@ -93,6 +93,10 @@ struct PowerPCState
|
||||
// lscbx
|
||||
u16 xer_stringctrl;
|
||||
|
||||
// gather pipe pointer for JIT access
|
||||
u8* gather_pipe_ptr;
|
||||
u8* gather_pipe_base_ptr;
|
||||
|
||||
#if _M_X86_64
|
||||
// This member exists for the purpose of an assertion in x86 JitBase.cpp
|
||||
// that its offset <= 0x100. To minimize code size on x86, we want as much
|
||||
|
Loading…
x
Reference in New Issue
Block a user