mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 23:59:27 +01:00
Merge pull request #12664 from JosJuice/jitarm64-256-mib
JitArm64: Increase farcode & nearcode cache size
This commit is contained in:
commit
e69486d2cb
@ -82,6 +82,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool IsInSpace(const u8* ptr) const { return ptr >= region && ptr < (region + region_size); }
|
bool IsInSpace(const u8* ptr) const { return ptr >= region && ptr < (region + region_size); }
|
||||||
|
bool IsInSpaceOrChildSpace(const u8* ptr) const
|
||||||
|
{
|
||||||
|
return ptr >= region && ptr < (region + total_region_size);
|
||||||
|
}
|
||||||
void WriteProtect(bool allow_execute)
|
void WriteProtect(bool allow_execute)
|
||||||
{
|
{
|
||||||
Common::WriteProtectMemory(region, region_size, allow_execute);
|
Common::WriteProtectMemory(region, region_size, allow_execute);
|
||||||
@ -106,7 +110,7 @@ public:
|
|||||||
bool HasChildren() const { return region_size != total_region_size; }
|
bool HasChildren() const { return region_size != total_region_size; }
|
||||||
u8* AllocChildCodeSpace(size_t child_size)
|
u8* AllocChildCodeSpace(size_t child_size)
|
||||||
{
|
{
|
||||||
ASSERT_MSG(DYNA_REC, child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
|
ASSERT_MSG(DYNA_REC, child_size <= GetSpaceLeft(), "Insufficient space for child allocation.");
|
||||||
u8* child_region = region + region_size - child_size;
|
u8* child_region = region + region_size - child_size;
|
||||||
region_size -= child_size;
|
region_size -= child_size;
|
||||||
ResetCodePtr();
|
ResetCodePtr();
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
#include "Common/Arm64Emitter.h"
|
#include "Common/Arm64Emitter.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
@ -29,13 +30,13 @@
|
|||||||
|
|
||||||
using namespace Arm64Gen;
|
using namespace Arm64Gen;
|
||||||
|
|
||||||
constexpr size_t CODE_SIZE = 1024 * 1024 * 32;
|
constexpr size_t NEAR_CODE_SIZE = 1024 * 1024 * 64;
|
||||||
// We use a bigger farcode size for JitArm64 than Jit64, because JitArm64 always emits farcode
|
// We use a bigger farcode size for JitArm64 than Jit64, because JitArm64 always emits farcode
|
||||||
// for the slow path of each loadstore instruction. Jit64 postpones emitting farcode until the
|
// for the slow path of each loadstore instruction. Jit64 postpones emitting farcode until the
|
||||||
// farcode actually is needed, saving it from having to emit farcode for most instructions.
|
// farcode actually is needed, saving it from having to emit farcode for most instructions.
|
||||||
// TODO: Perhaps implement something similar to Jit64. But using more RAM isn't much of a problem.
|
// TODO: Perhaps implement something similar to Jit64. But using more RAM isn't much of a problem.
|
||||||
constexpr size_t FARCODE_SIZE = 1024 * 1024 * 64;
|
constexpr size_t FAR_CODE_SIZE = 1024 * 1024 * 64;
|
||||||
constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 64;
|
constexpr size_t TOTAL_CODE_SIZE = NEAR_CODE_SIZE * 2 + FAR_CODE_SIZE * 2;
|
||||||
|
|
||||||
JitArm64::JitArm64(Core::System& system) : JitBase(system), m_float_emit(this)
|
JitArm64::JitArm64(Core::System& system) : JitBase(system), m_float_emit(this)
|
||||||
{
|
{
|
||||||
@ -49,9 +50,18 @@ void JitArm64::Init()
|
|||||||
|
|
||||||
RefreshConfig();
|
RefreshConfig();
|
||||||
|
|
||||||
const size_t child_code_size = jo.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE;
|
// We want the regions to be laid out in this order in memory:
|
||||||
AllocCodeSpace(CODE_SIZE + child_code_size);
|
// m_far_code_0, m_near_code_0, m_near_code_1, m_far_code_1.
|
||||||
AddChildCodeSpace(&m_far_code, child_code_size);
|
// AddChildCodeSpace grabs space from the end of the parent region,
|
||||||
|
// so we have to call AddChildCodeSpace in reverse order.
|
||||||
|
AllocCodeSpace(TOTAL_CODE_SIZE);
|
||||||
|
AddChildCodeSpace(&m_far_code_1, FAR_CODE_SIZE);
|
||||||
|
AddChildCodeSpace(&m_near_code_1, NEAR_CODE_SIZE);
|
||||||
|
AddChildCodeSpace(&m_near_code_0, NEAR_CODE_SIZE);
|
||||||
|
AddChildCodeSpace(&m_far_code_0, FAR_CODE_SIZE);
|
||||||
|
ASSERT(m_far_code_0.GetCodeEnd() == m_near_code_0.GetCodePtr());
|
||||||
|
ASSERT(m_near_code_0.GetCodeEnd() == m_near_code_1.GetCodePtr());
|
||||||
|
ASSERT(m_near_code_1.GetCodeEnd() == m_far_code_1.GetCodePtr());
|
||||||
|
|
||||||
jo.optimizeGatherPipe = true;
|
jo.optimizeGatherPipe = true;
|
||||||
SetBlockLinkingEnabled(true);
|
SetBlockLinkingEnabled(true);
|
||||||
@ -66,9 +76,7 @@ void JitArm64::Init()
|
|||||||
|
|
||||||
InitBLROptimization();
|
InitBLROptimization();
|
||||||
|
|
||||||
GenerateAsm();
|
GenerateAsmAndResetFreeMemoryRanges();
|
||||||
|
|
||||||
ResetFreeMemoryRanges();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::SetBlockLinkingEnabled(bool enabled)
|
void JitArm64::SetBlockLinkingEnabled(bool enabled)
|
||||||
@ -113,7 +121,7 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
|||||||
success = HandleStackFault();
|
success = HandleStackFault();
|
||||||
|
|
||||||
// If the fault is in JIT code space, look for fastmem areas.
|
// If the fault is in JIT code space, look for fastmem areas.
|
||||||
if (!success && IsInSpace(reinterpret_cast<u8*>(ctx->CTX_PC)))
|
if (!success && IsInSpaceOrChildSpace(reinterpret_cast<u8*>(ctx->CTX_PC)))
|
||||||
{
|
{
|
||||||
auto& memory = m_system.GetMemory();
|
auto& memory = m_system.GetMemory();
|
||||||
if (memory.IsAddressInFastmemArea(reinterpret_cast<u8*>(access_address)))
|
if (memory.IsAddressInFastmemArea(reinterpret_cast<u8*>(access_address)))
|
||||||
@ -153,22 +161,47 @@ void JitArm64::ClearCache()
|
|||||||
blocks.Clear();
|
blocks.Clear();
|
||||||
blocks.ClearRangesToFree();
|
blocks.ClearRangesToFree();
|
||||||
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
|
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
|
||||||
ClearCodeSpace();
|
m_far_code_0.ClearCodeSpace();
|
||||||
m_far_code.ClearCodeSpace();
|
m_near_code_0.ClearCodeSpace();
|
||||||
|
m_near_code_1.ClearCodeSpace();
|
||||||
|
m_far_code_1.ClearCodeSpace();
|
||||||
RefreshConfig();
|
RefreshConfig();
|
||||||
|
|
||||||
|
GenerateAsmAndResetFreeMemoryRanges();
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitArm64::GenerateAsmAndResetFreeMemoryRanges()
|
||||||
|
{
|
||||||
|
SetCodePtr(m_near_code_1.GetWritableCodePtr(), m_near_code_1.GetWritableCodeEnd());
|
||||||
|
m_far_code.SetCodePtr(m_far_code_1.GetWritableCodePtr(), m_far_code_1.GetWritableCodeEnd());
|
||||||
|
|
||||||
|
const u8* routines_near_start = GetCodePtr();
|
||||||
|
const u8* routines_far_start = m_far_code.GetCodePtr();
|
||||||
|
|
||||||
GenerateAsm();
|
GenerateAsm();
|
||||||
|
|
||||||
ResetFreeMemoryRanges();
|
const u8* routines_near_end = GetCodePtr();
|
||||||
|
const u8* routines_far_end = m_far_code.GetCodePtr();
|
||||||
|
|
||||||
|
ResetFreeMemoryRanges(routines_near_end - routines_near_start,
|
||||||
|
routines_far_end - routines_far_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ResetFreeMemoryRanges()
|
void JitArm64::ResetFreeMemoryRanges(size_t routines_near_size, size_t routines_far_size)
|
||||||
{
|
{
|
||||||
// Set the near and far code regions as unused.
|
// Set the near and far code regions as unused.
|
||||||
m_free_ranges_near.clear();
|
m_free_ranges_far_0.clear();
|
||||||
m_free_ranges_near.insert(GetWritableCodePtr(), GetWritableCodeEnd());
|
m_free_ranges_far_0.insert(m_far_code_0.GetWritableCodePtr() + routines_near_size,
|
||||||
m_free_ranges_far.clear();
|
m_far_code_0.GetWritableCodeEnd());
|
||||||
m_free_ranges_far.insert(m_far_code.GetWritableCodePtr(), m_far_code.GetWritableCodeEnd());
|
m_free_ranges_near_0.clear();
|
||||||
|
m_free_ranges_near_0.insert(m_near_code_0.GetWritableCodePtr(),
|
||||||
|
m_near_code_0.GetWritableCodeEnd());
|
||||||
|
m_free_ranges_near_1.clear();
|
||||||
|
m_free_ranges_near_1.insert(m_near_code_1.GetWritableCodePtr() + routines_near_size,
|
||||||
|
m_near_code_1.GetWritableCodeEnd());
|
||||||
|
m_free_ranges_far_1.clear();
|
||||||
|
m_free_ranges_far_1.insert(m_far_code_1.GetWritableCodePtr() + routines_far_size,
|
||||||
|
m_far_code_1.GetWritableCodeEnd());
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::Shutdown()
|
void JitArm64::Shutdown()
|
||||||
@ -889,11 +922,17 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
|
|||||||
++last_fastmem_area;
|
++last_fastmem_area;
|
||||||
m_fault_to_handler.erase(first_fastmem_area, last_fastmem_area);
|
m_fault_to_handler.erase(first_fastmem_area, last_fastmem_area);
|
||||||
|
|
||||||
m_free_ranges_near.insert(range.first, range.second);
|
if (range.first < m_near_code_0.GetCodeEnd())
|
||||||
|
m_free_ranges_near_0.insert(range.first, range.second);
|
||||||
|
else
|
||||||
|
m_free_ranges_near_1.insert(range.first, range.second);
|
||||||
}
|
}
|
||||||
for (auto range : blocks.GetRangesToFreeFar())
|
for (auto range : blocks.GetRangesToFreeFar())
|
||||||
{
|
{
|
||||||
m_free_ranges_far.insert(range.first, range.second);
|
if (range.first < m_far_code_0.GetCodeEnd())
|
||||||
|
m_free_ranges_far_0.insert(range.first, range.second);
|
||||||
|
else
|
||||||
|
m_free_ranges_far_1.insert(range.first, range.second);
|
||||||
}
|
}
|
||||||
blocks.ClearRangesToFree();
|
blocks.ClearRangesToFree();
|
||||||
|
|
||||||
@ -939,7 +978,7 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SetEmitterStateToFreeCodeRegion())
|
if (std::optional<size_t> code_region_index = SetEmitterStateToFreeCodeRegion())
|
||||||
{
|
{
|
||||||
u8* near_start = GetWritableCodePtr();
|
u8* near_start = GetWritableCodePtr();
|
||||||
u8* far_start = m_far_code.GetWritableCodePtr();
|
u8* far_start = m_far_code.GetWritableCodePtr();
|
||||||
@ -952,10 +991,16 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
|
|||||||
// Mark the memory regions that this code block uses as used in the local rangesets.
|
// Mark the memory regions that this code block uses as used in the local rangesets.
|
||||||
u8* near_end = GetWritableCodePtr();
|
u8* near_end = GetWritableCodePtr();
|
||||||
if (near_start != near_end)
|
if (near_start != near_end)
|
||||||
m_free_ranges_near.erase(near_start, near_end);
|
{
|
||||||
|
(code_region_index == 0 ? m_free_ranges_near_0 : m_free_ranges_near_1)
|
||||||
|
.erase(near_start, near_end);
|
||||||
|
}
|
||||||
u8* far_end = m_far_code.GetWritableCodePtr();
|
u8* far_end = m_far_code.GetWritableCodePtr();
|
||||||
if (far_start != far_end)
|
if (far_start != far_end)
|
||||||
m_free_ranges_far.erase(far_start, far_end);
|
{
|
||||||
|
(code_region_index == 0 ? m_free_ranges_far_0 : m_free_ranges_far_1)
|
||||||
|
.erase(far_start, far_end);
|
||||||
|
}
|
||||||
|
|
||||||
// Store the used memory regions in the block so we know what to mark as unused when the
|
// Store the used memory regions in the block so we know what to mark as unused when the
|
||||||
// block gets invalidated.
|
// block gets invalidated.
|
||||||
@ -984,27 +1029,52 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
|
|||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JitArm64::SetEmitterStateToFreeCodeRegion()
|
std::optional<size_t> JitArm64::SetEmitterStateToFreeCodeRegion()
|
||||||
{
|
{
|
||||||
// Find the largest free memory blocks and set code emitters to point at them.
|
// Find some large free memory blocks and set code emitters to point at them. If we can't find
|
||||||
// If we can't find a free block return false instead, which will trigger a JIT cache clear.
|
// free blocks, return std::nullopt instead, which will trigger a JIT cache clear.
|
||||||
auto free_near = m_free_ranges_near.by_size_begin();
|
const auto free_near_0 = m_free_ranges_near_0.by_size_begin();
|
||||||
if (free_near == m_free_ranges_near.by_size_end())
|
const auto free_near_1 = m_free_ranges_near_1.by_size_begin();
|
||||||
{
|
const auto free_far_0 = m_free_ranges_far_0.by_size_begin();
|
||||||
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in near code region.");
|
const auto free_far_1 = m_free_ranges_far_1.by_size_begin();
|
||||||
return false;
|
|
||||||
}
|
|
||||||
SetCodePtr(free_near.from(), free_near.to());
|
|
||||||
|
|
||||||
auto free_far = m_free_ranges_far.by_size_begin();
|
const size_t free_near_1_size = free_near_1.to() - free_near_1.from();
|
||||||
if (free_far == m_free_ranges_far.by_size_end())
|
const size_t free_far_1_size = free_far_1.to() - free_far_1.from();
|
||||||
{
|
const size_t free_1_smallest_size = std::min(free_near_1_size, free_far_1_size);
|
||||||
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in far code region.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
m_far_code.SetCodePtr(free_far.from(), free_far.to());
|
|
||||||
|
|
||||||
return true;
|
if (free_1_smallest_size >= 1024 * 1024)
|
||||||
|
{
|
||||||
|
// Don't use region 0 unless region 1 is getting full. This improves cache friendliness.
|
||||||
|
SetCodePtr(free_near_1.from(), free_near_1.to());
|
||||||
|
m_far_code.SetCodePtr(free_far_1.from(), free_far_1.to());
|
||||||
|
return std::make_optional(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t free_near_0_size = free_near_0.to() - free_near_0.from();
|
||||||
|
const size_t free_far_0_size = free_far_0.to() - free_far_0.from();
|
||||||
|
const size_t free_0_smallest_size = std::min(free_near_0_size, free_far_0_size);
|
||||||
|
|
||||||
|
if (free_0_smallest_size == 0 && free_1_smallest_size == 0)
|
||||||
|
{
|
||||||
|
if (free_near_0_size == 0 && free_near_1_size == 0)
|
||||||
|
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in near code regions.");
|
||||||
|
else if (free_far_0_size == 0 && free_far_1_size == 0)
|
||||||
|
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in far code regions.");
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (free_0_smallest_size > free_1_smallest_size)
|
||||||
|
{
|
||||||
|
SetCodePtr(free_near_0.from(), free_near_0.to());
|
||||||
|
m_far_code.SetCodePtr(free_far_0.from(), free_far_0.to());
|
||||||
|
return std::make_optional(0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetCodePtr(free_near_1.from(), free_near_1.to());
|
||||||
|
m_far_code.SetCodePtr(free_far_1.from(), free_far_1.to());
|
||||||
|
return std::make_optional(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <optional>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include <rangeset/rangesizeset.h>
|
#include <rangeset/rangesizeset.h>
|
||||||
@ -285,14 +286,16 @@ protected:
|
|||||||
void Trace();
|
void Trace();
|
||||||
|
|
||||||
// Finds a free memory region and sets the near and far code emitters to point at that region.
|
// Finds a free memory region and sets the near and far code emitters to point at that region.
|
||||||
// Returns false if no free memory region can be found for either of the two.
|
// On success, returns the index of the memory region (either 0 or 1).
|
||||||
bool SetEmitterStateToFreeCodeRegion();
|
// If either near code or far code is full, returns std::nullopt.
|
||||||
|
std::optional<size_t> SetEmitterStateToFreeCodeRegion();
|
||||||
|
|
||||||
void DoDownCount();
|
void DoDownCount();
|
||||||
void Cleanup();
|
void Cleanup();
|
||||||
void ResetStack();
|
void ResetStack();
|
||||||
|
|
||||||
void ResetFreeMemoryRanges();
|
void GenerateAsmAndResetFreeMemoryRanges();
|
||||||
|
void ResetFreeMemoryRanges(size_t routines_near_size, size_t routines_far_size);
|
||||||
|
|
||||||
void IntializeSpeculativeConstants();
|
void IntializeSpeculativeConstants();
|
||||||
|
|
||||||
@ -372,6 +375,28 @@ protected:
|
|||||||
|
|
||||||
Arm64Gen::ARM64FloatEmitter m_float_emit;
|
Arm64Gen::ARM64FloatEmitter m_float_emit;
|
||||||
|
|
||||||
|
// Because B instructions can't jump farther than +/- 128 MiB, code memory is allocated like this:
|
||||||
|
//
|
||||||
|
// m_far_code_0: x MiB of unused space, followed by 64 - x MiB of far code
|
||||||
|
// m_near_code_0: 64 MiB of near code
|
||||||
|
// m_near_code_1: x MiB of asm routines, followed by 64 - x MiB of near code
|
||||||
|
// m_far_code_1: 64 MiB of far code
|
||||||
|
//
|
||||||
|
// This ensures that:
|
||||||
|
//
|
||||||
|
// * Any code in m_near_code_0 can reach any code in m_far_code_0, and vice versa
|
||||||
|
// * Any code in m_near_code_1 can reach any code in m_far_code_1, and vice versa
|
||||||
|
// * Any near code can reach any near code
|
||||||
|
// * Any code can reach any asm routine
|
||||||
|
//
|
||||||
|
// m_far_code_0 and m_far_code_1 can't reach each other, but that isn't needed, because all blocks
|
||||||
|
// have their entry points in near code.
|
||||||
|
|
||||||
|
Arm64Gen::ARM64CodeBlock m_near_code_0;
|
||||||
|
Arm64Gen::ARM64CodeBlock m_near_code_1;
|
||||||
|
Arm64Gen::ARM64CodeBlock m_far_code_0;
|
||||||
|
Arm64Gen::ARM64CodeBlock m_far_code_1;
|
||||||
|
|
||||||
Arm64Gen::ARM64CodeBlock m_far_code;
|
Arm64Gen::ARM64CodeBlock m_far_code;
|
||||||
bool m_in_far_code = false;
|
bool m_in_far_code = false;
|
||||||
|
|
||||||
@ -380,6 +405,8 @@ protected:
|
|||||||
u8* m_near_code_end = nullptr;
|
u8* m_near_code_end = nullptr;
|
||||||
bool m_near_code_write_failed = false;
|
bool m_near_code_write_failed = false;
|
||||||
|
|
||||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near;
|
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near_0;
|
||||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far;
|
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near_1;
|
||||||
|
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far_0;
|
||||||
|
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far_1;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user