Merge pull request #8537 from degasus/fastmem

Core/HW -> PowerPC/JIT: Fastmem arena construction
This commit is contained in:
Connor McLaughlin 2020-01-14 09:38:15 +10:00 committed by GitHub
commit efc1ee8e6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 100 additions and 33 deletions

View File

@ -42,6 +42,7 @@ namespace Memory
// Store the MemArena here // Store the MemArena here
u8* physical_base = nullptr; u8* physical_base = nullptr;
u8* logical_base = nullptr; u8* logical_base = nullptr;
static bool is_fastmem_arena_initialized = false;
// The MemArena class // The MemArena class
static Common::MemArena g_arena; static Common::MemArena g_arena;
@ -131,7 +132,7 @@ struct LogicalMemoryView
// other devices, like the GPU, use other rules, approximated by // other devices, like the GPU, use other rules, approximated by
// Memory::GetPointer.) This memory is laid out as follows: // Memory::GetPointer.) This memory is laid out as follows:
// [0x00000000, 0x02000000) - 32MB RAM // [0x00000000, 0x02000000) - 32MB RAM
// [0x02000000, 0x08000000) - Mirrors of 32MB RAM // [0x02000000, 0x08000000) - Mirrors of 32MB RAM (not handled here)
// [0x08000000, 0x0C000000) - EFB "mapping" (not handled here) // [0x08000000, 0x0C000000) - EFB "mapping" (not handled here)
// [0x0C000000, 0x0E000000) - MMIO etc. (not handled here) // [0x0C000000, 0x0E000000) - MMIO etc. (not handled here)
// [0x10000000, 0x14000000) - 64MB RAM (Wii-only; slightly slower) // [0x10000000, 0x14000000) - 64MB RAM (Wii-only; slightly slower)
@ -155,6 +156,7 @@ struct LogicalMemoryView
// //
// TODO: The actual size of RAM is REALRAM_SIZE (24MB); the other 8MB shouldn't // TODO: The actual size of RAM is REALRAM_SIZE (24MB); the other 8MB shouldn't
// be backed by actual memory. // be backed by actual memory.
// TODO: Do we want to handle the mirrors of the GC RAM?
static PhysicalMemoryRegion physical_regions[] = { static PhysicalMemoryRegion physical_regions[] = {
{&m_pRAM, 0x00000000, RAM_SIZE, PhysicalMemoryRegion::ALWAYS}, {&m_pRAM, 0x00000000, RAM_SIZE, PhysicalMemoryRegion::ALWAYS},
{&m_pL1Cache, 0xE0000000, L1_CACHE_SIZE, PhysicalMemoryRegion::ALWAYS}, {&m_pL1Cache, 0xE0000000, L1_CACHE_SIZE, PhysicalMemoryRegion::ALWAYS},
@ -164,7 +166,7 @@ static PhysicalMemoryRegion physical_regions[] = {
static std::vector<LogicalMemoryView> logical_mapped_entries; static std::vector<LogicalMemoryView> logical_mapped_entries;
void Init() static u32 GetFlags()
{ {
bool wii = SConfig::GetInstance().bWii; bool wii = SConfig::GetInstance().bWii;
bool bMMU = SConfig::GetInstance().bMMU; bool bMMU = SConfig::GetInstance().bMMU;
@ -181,6 +183,14 @@ void Init()
flags |= PhysicalMemoryRegion::WII_ONLY; flags |= PhysicalMemoryRegion::WII_ONLY;
if (bFakeVMEM) if (bFakeVMEM)
flags |= PhysicalMemoryRegion::FAKE_VMEM; flags |= PhysicalMemoryRegion::FAKE_VMEM;
return flags;
}
void Init()
{
bool wii = SConfig::GetInstance().bWii;
u32 flags = GetFlags();
u32 mem_size = 0; u32 mem_size = 0;
for (PhysicalMemoryRegion& region : physical_regions) for (PhysicalMemoryRegion& region : physical_regions)
{ {
@ -190,15 +200,14 @@ void Init()
mem_size += region.size; mem_size += region.size;
} }
g_arena.GrabSHMSegment(mem_size); g_arena.GrabSHMSegment(mem_size);
physical_base = Common::MemArena::FindMemoryBase();
// Create an anonymous view of the physical memory
for (PhysicalMemoryRegion& region : physical_regions) for (PhysicalMemoryRegion& region : physical_regions)
{ {
if ((flags & region.flags) != region.flags) if ((flags & region.flags) != region.flags)
continue; continue;
u8* base = physical_base + region.physical_address; *region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size);
*region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size, base);
if (!*region.out_pointer) if (!*region.out_pointer)
{ {
@ -207,10 +216,6 @@ void Init()
} }
} }
#ifndef _ARCH_32
logical_base = physical_base + 0x200000000;
#endif
if (wii) if (wii)
mmio_mapping = InitMMIOWii(); mmio_mapping = InitMMIOWii();
else else
@ -222,8 +227,41 @@ void Init()
m_IsInitialized = true; m_IsInitialized = true;
} }
bool InitFastmemArena()
{
u32 flags = GetFlags();
physical_base = Common::MemArena::FindMemoryBase();
if (!physical_base)
return false;
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
u8* view = (u8*)g_arena.CreateView(region.shm_position, region.size, base);
if (base != view)
{
return false;
}
}
#ifndef _ARCH_32
logical_base = physical_base + 0x200000000;
#endif
is_fastmem_arena_initialized = true;
return true;
}
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
{ {
if (!is_fastmem_arena_initialized)
return;
for (auto& entry : logical_mapped_entries) for (auto& entry : logical_mapped_entries)
{ {
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size); g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
@ -279,12 +317,10 @@ void DoState(PointerWrap& p)
void Shutdown() void Shutdown()
{ {
ShutdownFastmemArena();
m_IsInitialized = false; m_IsInitialized = false;
u32 flags = 0; u32 flags = GetFlags();
if (SConfig::GetInstance().bWii)
flags |= PhysicalMemoryRegion::WII_ONLY;
if (m_pFakeVMEM)
flags |= PhysicalMemoryRegion::FAKE_VMEM;
for (PhysicalMemoryRegion& region : physical_regions) for (PhysicalMemoryRegion& region : physical_regions)
{ {
if ((flags & region.flags) != region.flags) if ((flags & region.flags) != region.flags)
@ -292,16 +328,36 @@ void Shutdown()
g_arena.ReleaseView(*region.out_pointer, region.size); g_arena.ReleaseView(*region.out_pointer, region.size);
*region.out_pointer = nullptr; *region.out_pointer = nullptr;
} }
g_arena.ReleaseSHMSegment();
mmio_mapping.reset();
INFO_LOG(MEMMAP, "Memory system shut down.");
}
void ShutdownFastmemArena()
{
if (!is_fastmem_arena_initialized)
return;
u32 flags = GetFlags();
for (PhysicalMemoryRegion& region : physical_regions)
{
if ((flags & region.flags) != region.flags)
continue;
u8* base = physical_base + region.physical_address;
g_arena.ReleaseView(base, region.size);
}
for (auto& entry : logical_mapped_entries) for (auto& entry : logical_mapped_entries)
{ {
g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size); g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size);
} }
logical_mapped_entries.clear(); logical_mapped_entries.clear();
g_arena.ReleaseSHMSegment();
physical_base = nullptr; physical_base = nullptr;
logical_base = nullptr; logical_base = nullptr;
mmio_mapping.reset();
INFO_LOG(MEMMAP, "Memory system shut down."); is_fastmem_arena_initialized = false;
} }
void Clear() void Clear()

View File

@ -62,6 +62,8 @@ extern std::unique_ptr<MMIO::Mapping> mmio_mapping;
bool IsInitialized(); bool IsInitialized();
void Init(); void Init();
void Shutdown(); void Shutdown();
bool InitFastmemArena();
void ShutdownFastmemArena();
void DoState(PointerWrap& p); void DoState(PointerWrap& p);
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table); void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table);

View File

@ -333,6 +333,7 @@ void Jit64::Init()
InitializeInstructionTables(); InitializeInstructionTables();
EnableBlockLink(); EnableBlockLink();
jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
jo.accurateSinglePrecision = true; jo.accurateSinglePrecision = true;
UpdateMemoryOptions(); UpdateMemoryOptions();
@ -393,6 +394,8 @@ void Jit64::Shutdown()
FreeStack(); FreeStack();
FreeCodeSpace(); FreeCodeSpace();
Memory::ShutdownFastmemArena();
blocks.Shutdown(); blocks.Shutdown();
m_far_code.Shutdown(); m_far_code.Shutdown();
m_const_pool.Shutdown(); m_const_pool.Shutdown();

View File

@ -304,7 +304,9 @@ void Jit64::dcbz(UGeckoInstruction inst)
AND(32, R(RSCRATCH), Imm32(~31)); AND(32, R(RSCRATCH), Imm32(~31));
} }
if (MSR.DR) bool emit_fast_path = MSR.DR && m_jit.jo.fastmem_arena;
if (emit_fast_path)
{ {
// Perform lookup to see if we can use fast path. // Perform lookup to see if we can use fast path.
MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0])); MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0]));
@ -329,7 +331,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH); ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (MSR.DR) if (emit_fast_path)
{ {
FixupBranch end = J(true); FixupBranch end = J(true);
SwitchToNearCode(); SwitchToNearCode();

View File

@ -371,7 +371,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
FixupBranch exit; FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR; const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR;
const bool fast_check_address = !slowmem && dr_set; const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena;
if (fast_check_address) if (fast_check_address)
{ {
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
@ -435,7 +435,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
BitSet32 registersInUse, bool signExtend) BitSet32 registersInUse, bool signExtend)
{ {
// If the address is known to be RAM, just load it directly. // If the address is known to be RAM, just load it directly.
if (PowerPC::IsOptimizableRAMAddress(address)) if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address))
{ {
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend); UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
return; return;
@ -539,7 +539,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
FixupBranch exit; FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR; const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR;
const bool fast_check_address = !slowmem && dr_set; const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena;
if (fast_check_address) if (fast_check_address)
{ {
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);
@ -641,7 +641,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
m_jit.js.fifoBytesSinceCheck += accessSize >> 3; m_jit.js.fifoBytesSinceCheck += accessSize >> 3;
return false; return false;
} }
else if (PowerPC::IsOptimizableRAMAddress(address)) else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address))
{ {
WriteToConstRamAddress(accessSize, arg, address); WriteToConstRamAddress(accessSize, arg, address);
return false; return false;

View File

@ -489,6 +489,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
int size = sizes[type] * (single ? 1 : 2); int size = sizes[type] * (single ? 1 : 2);
bool isInline = quantize != -1; bool isInline = quantize != -1;
bool safe_access = m_jit.jo.memcheck || !m_jit.jo.fastmem;
// illegal // illegal
if (type == QUANTIZE_INVALID1 || type == QUANTIZE_INVALID2 || type == QUANTIZE_INVALID3) if (type == QUANTIZE_INVALID1 || type == QUANTIZE_INVALID2 || type == QUANTIZE_INVALID3)
@ -506,7 +507,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
bool extend = single && (type == QUANTIZE_S8 || type == QUANTIZE_S16); bool extend = single && (type == QUANTIZE_S8 || type == QUANTIZE_S16);
if (m_jit.jo.memcheck) if (safe_access)
{ {
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE_LOAD; BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE_LOAD;
int flags = isInline ? 0 : int flags = isInline ? 0 :
@ -632,8 +633,9 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
{ {
int size = single ? 32 : 64; int size = single ? 32 : 64;
bool extend = false; bool extend = false;
bool safe_access = m_jit.jo.memcheck || !m_jit.jo.fastmem;
if (m_jit.jo.memcheck) if (safe_access)
{ {
BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE; BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE;
int flags = isInline ? 0 : int flags = isInline ? 0 :
@ -644,7 +646,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
if (single) if (single)
{ {
if (m_jit.jo.memcheck) if (safe_access)
{ {
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
} }
@ -669,7 +671,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
// for a good reason, or merely because no game does this. // for a good reason, or merely because no game does this.
// If we find something that actually does do this, maybe this should be changed. How // If we find something that actually does do this, maybe this should be changed. How
// much of a performance hit would it be? // much of a performance hit would it be?
if (m_jit.jo.memcheck) if (safe_access)
{ {
ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); ROL(64, R(RSCRATCH_EXTRA), Imm8(32));
MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA));

View File

@ -49,6 +49,8 @@ void JitArm64::Init()
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE; size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE;
AllocCodeSpace(CODE_SIZE + child_code_size); AllocCodeSpace(CODE_SIZE + child_code_size);
AddChildCodeSpace(&farcode, child_code_size); AddChildCodeSpace(&farcode, child_code_size);
jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena();
jo.enableBlocklink = true; jo.enableBlocklink = true;
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
UpdateMemoryOptions(); UpdateMemoryOptions();
@ -133,6 +135,7 @@ void JitArm64::ClearCache()
void JitArm64::Shutdown() void JitArm64::Shutdown()
{ {
Memory::ShutdownFastmemArena();
FreeCodeSpace(); FreeCodeSpace();
blocks.Shutdown(); blocks.Shutdown();
FreeStack(); FreeStack();

View File

@ -45,6 +45,6 @@ bool JitBase::CanMergeNextInstructions(int count) const
void JitBase::UpdateMemoryOptions() void JitBase::UpdateMemoryOptions()
{ {
bool any_watchpoints = PowerPC::memchecks.HasAny(); bool any_watchpoints = PowerPC::memchecks.HasAny();
jo.fastmem = SConfig::GetInstance().bFastmem && (MSR.DR || !any_watchpoints); jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints);
jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints;
} }

View File

@ -48,6 +48,7 @@ protected:
bool optimizeGatherPipe; bool optimizeGatherPipe;
bool accurateSinglePrecision; bool accurateSinglePrecision;
bool fastmem; bool fastmem;
bool fastmem_arena;
bool memcheck; bool memcheck;
bool profile_blocks; bool profile_blocks;
}; };

View File

@ -1117,13 +1117,12 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe
for (int i = 0; i < 8; i++, pteg_addr += 8) for (int i = 0; i < 8; i++, pteg_addr += 8)
{ {
u32 pteg; u32 pteg = Common::swap32(Memory::Read_U32(pteg_addr));
std::memcpy(&pteg, &Memory::physical_base[pteg_addr], sizeof(u32));
if (pte1 == pteg) if (pte1 == pteg)
{ {
UPTE2 PTE2; UPTE2 PTE2;
PTE2.Hex = Common::swap32(&Memory::physical_base[pteg_addr + 4]); PTE2.Hex = Memory::Read_U32(pteg_addr + 4);
// set the access bits // set the access bits
switch (flag) switch (flag)
@ -1145,8 +1144,7 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe
if (!IsNoExceptionFlag(flag)) if (!IsNoExceptionFlag(flag))
{ {
const u32 swapped_pte2 = Common::swap32(PTE2.Hex); Memory::Write_U32(PTE2.Hex, pteg_addr + 4);
std::memcpy(&Memory::physical_base[pteg_addr + 4], &swapped_pte2, sizeof(u32));
} }
// We already updated the TLB entry if this was caused by a C bit. // We already updated the TLB entry if this was caused by a C bit.