From 70109f8fbd6810655047e63812f2d7a11bace495 Mon Sep 17 00:00:00 2001 From: lynxnb Date: Thu, 17 Nov 2022 19:28:48 +0100 Subject: [PATCH] Work around invalid values in `CNTFRQ_EL0` register Exynos SoCs have a bug where the `CNTFRQ_EL0` register is either set to 0 or contain incoherent values. With this patch, the frequency value is loaded into a static variable and used instead of reading the register. The value will be initialised to the correct value for affected SoCs, while unaffected ones will use the value from the register. --- app/src/main/cpp/skyline/common/utils.h | 33 ++++++++++++-- app/src/main/cpp/skyline/nce.cpp | 57 +++++++++++++++++++++---- app/src/main/cpp/skyline/nce/guest.S | 20 --------- app/src/main/cpp/skyline/nce/guest.h | 7 --- 4 files changed, 77 insertions(+), 40 deletions(-) diff --git a/app/src/main/cpp/skyline/common/utils.h b/app/src/main/cpp/skyline/common/utils.h index c187d672..8c330a03 100644 --- a/app/src/main/cpp/skyline/common/utils.h +++ b/app/src/main/cpp/skyline/common/utils.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include "base.h" @@ -20,13 +21,38 @@ namespace skyline::util { template concept TrivialObject = std::is_trivially_copyable_v && !requires(T v) { v.data(); }; + namespace detail { + /** + * @brief Retrieves the system counter clock frequency + * @note Some devices report an incorrect value so they need special handling + */ + inline u64 InitFrequency() { + char buffer[PROP_VALUE_MAX]; + int len{__system_property_get("ro.product.board", buffer)}; + std::string_view board{buffer, static_cast(len)}; + + u64 frequency; + if (board == "s5e9925") // Exynos 2200 + frequency = 25600000; + else if (board == "exynos2100") // Exynos 2100 + frequency = 26000000; + else if (board == "exynos9810") // Exynos 9810 + frequency = 26000000; + else + asm volatile("MRS %0, CNTFRQ_EL0" : "=r"(frequency)); + + return frequency; + } + } + + inline const u64 ClockFrequency{detail::InitFrequency()}; //!< The system counter clock frequency in Hz + /** * @brief Returns the current time in nanoseconds * @return The current time in nanoseconds */ inline i64 GetTimeNs() { - u64 frequency; - asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency)); + u64 frequency{ClockFrequency}; u64 ticks; asm volatile("MRS %0, CNTVCT_EL0" : "=r"(ticks)); return static_cast(((ticks / frequency) * constant::NsInSecond) + (((ticks % frequency) * constant::NsInSecond + (frequency / 2)) / frequency)); @@ -245,7 +271,7 @@ namespace skyline::util { }; namespace detail { - static thread_local std::mt19937_64 generator{GetTimeTicks()}; + inline thread_local std::mt19937_64 generator{GetTimeTicks()}; } /** @@ -326,7 +352,6 @@ namespace skyline::util { using Type = T; }; - template struct IsIncrementingT : std::false_type {}; diff --git a/app/src/main/cpp/skyline/nce.cpp b/app/src/main/cpp/skyline/nce.cpp index 638f36a9..5e79f619 100644 --- a/app/src/main/cpp/skyline/nce.cpp +++ b/app/src/main/cpp/skyline/nce.cpp @@ -297,6 +297,50 @@ namespace skyline::nce { return code; } + constexpr size_t RescaleClockSize{17}; //!< The size of the RescaleClock function in 32-bit ARMv8 instructions + + /** + * @brief Writes instructions to rescale the host clock to Tegra X1 levels + * @note Output is on stack with the stack pointer offset 32B from the initial point + */ + u32 *WriteRescaleClock(u32 *code) { + /* Reserve 32B of stack */ + /* Save working registers */ + *code++ = 0xD10083FF; // SUB SP, SP, #32 + *code++ = 0xA90107E0; // STP X0, X1, [SP, #16] + + /* Load magic constant */ + *code++ = 0xD28F0860; // MOV X0, #30787 + *code++ = 0xF2AE3680; // MOVK X0, #29108, LSL #16 + *code++ = 0xF2CB5880; // MOVK X0, #23236, LSL #32 + *code++ = 0xF2E14F80; // MOVK X0, #2684, LSL #48 + + /* Load clock frequency value */ + for (const auto &mov : instructions::MoveRegister(registers::X1, util::ClockFrequency)) + if (mov) + *code++ = mov; + + /* Multiply clock frequency by magic constant */ + *code++ = 0xD345FC21; // LSR X1, X1, #5 + *code++ = 0x9BC07C21; // UMULH X1, X1, X0 + *code++ = 0xD347FC21; // LSR X1, X1, #7 + + /* Load counter value */ + *code++ = 0xD53BE040; // MRS X0, CNTVCT_EL0 + + /* Rescale counter value */ + *code++ = 0x9AC10801; // UDIV X1, X0, X1 + *code++ = 0x8B010421; // ADD X1, X1, X1, LSL #1 + *code++ = 0xD37AE420; // LSL X0, X1, #6 + + /* Store result */ + /* Restore registers */ + *code++ = 0xF90003E0; // STR X0, [SP, #0] + *code++ = 0xA94107E0; // LDP X0, X1, [SP, #16] + + return code; + } + constexpr u32 TpidrEl0{0x5E82}; // ID of TPIDR_EL0 in MRS constexpr u32 TpidrroEl0{0x5E83}; // ID of TPIDRRO_EL0 in MRS constexpr u32 CntfrqEl0{0x5F00}; // ID of CNTFRQ_EL0 in MRS @@ -308,9 +352,7 @@ namespace skyline::nce { size_t size{guest::SaveCtxSize + guest::LoadCtxSize + TrampolineSize}; std::vector offsets; - u64 frequency; - asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency)); - bool rescaleClock{frequency != TegraX1Freq}; + bool rescaleClock{util::ClockFrequency != TegraX1Freq}; auto start{reinterpret_cast(text.data())}, end{reinterpret_cast(text.data() + text.size())}; for (const u32 *instruction{start}; instruction < end; instruction++) { @@ -329,7 +371,7 @@ namespace skyline::nce { } else { if (rescaleClock) { if (mrs.srcReg == CntpctEl0) { - size += guest::RescaleClockSize + 3; + size += RescaleClockSize + 3; offsets.push_back(instructionOffset); } else if (mrs.srcReg == CntfrqEl0) { size += 3; @@ -359,9 +401,7 @@ namespace skyline::nce { std::memcpy(patch, reinterpret_cast(&guest::LoadCtx), guest::LoadCtxSize * sizeof(u32)); patch += guest::LoadCtxSize; - u64 frequency; - asm("MRS %0, CNTFRQ_EL0" : "=r"(frequency)); - bool rescaleClock{frequency != TegraX1Freq}; + bool rescaleClock{util::ClockFrequency != TegraX1Freq}; for (auto offset : offsets) { u32 *instruction{reinterpret_cast(text.data()) + offset}; @@ -424,8 +464,7 @@ namespace skyline::nce { *instruction = instructions::B(static_cast(endOffset() + offset), true).raw; /* Rescale host clock */ - std::memcpy(patch, reinterpret_cast(&guest::RescaleClock), guest::RescaleClockSize * sizeof(u32)); - patch += guest::RescaleClockSize; + patch = WriteRescaleClock(patch); /* Load result from stack into destination register */ instructions::Ldr ldr(0xF94003E0); // LDR XOUT, [SP] diff --git a/app/src/main/cpp/skyline/nce/guest.S b/app/src/main/cpp/skyline/nce/guest.S index 70068024..969bff3b 100644 --- a/app/src/main/cpp/skyline/nce/guest.S +++ b/app/src/main/cpp/skyline/nce/guest.S @@ -93,23 +93,3 @@ LoadCtx: /* Restore Scratch Register */ LDR LR, [SP, #8] RET - -.global RescaleClock -RescaleClock: - SUB SP, SP, #32 - STP X0, X1, [SP, #16] - MOV X0, #30787 - MOVK X0, #29108, LSL #16 - MOVK X0, #23236, LSL #32 - MOVK X0, #2684, LSL #48 - MRS X1, CNTFRQ_EL0 - LSR X1, X1, #5 - UMULH X1, X1, X0 - LSR X1, X1, #7 - MRS X0, CNTVCT_EL0 - UDIV X1, X0, X1 - ADD X1, X1, X1, LSL #1 - LSL X0, X1, #6 - STR X0, [SP, #0] - LDP X0, X1, [SP, #16] - diff --git a/app/src/main/cpp/skyline/nce/guest.h b/app/src/main/cpp/skyline/nce/guest.h index b8283183..c49d9a90 100644 --- a/app/src/main/cpp/skyline/nce/guest.h +++ b/app/src/main/cpp/skyline/nce/guest.h @@ -105,7 +105,6 @@ namespace skyline { namespace guest { constexpr size_t SaveCtxSize{34}; //!< The size of the SaveCtx function in 32-bit ARMv8 instructions constexpr size_t LoadCtxSize{34}; //!< The size of the LoadCtx function in 32-bit ARMv8 instructions - constexpr size_t RescaleClockSize{16}; //!< The size of the RescaleClock function in 32-bit ARMv8 instructions /** * @brief Saves the context from CPU registers into TLS @@ -118,12 +117,6 @@ namespace skyline { * @note Assumes that 8B is reserved at an offset of 8B from SP */ extern "C" void LoadCtx(void); - - /** - * @brief Rescales the host clock to Tegra X1 levels - * @note Output is on stack with the stack pointer offset 32B from the initial point - */ - extern "C" __noreturn void RescaleClock(void); } } }