mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-22 00:59:18 +01:00
Make codebase more CPU-agnostic + MacOS disclaimer (#559)
This commit is contained in:
parent
445b0afa95
commit
2c81d240a5
80
dependencies/ih264d/CMakeLists.txt
vendored
80
dependencies/ih264d/CMakeLists.txt
vendored
@ -2,10 +2,6 @@
|
||||
|
||||
project ("ih264d")
|
||||
|
||||
set(LIBAVCDEC_X86_INCLUDES "common/x86" "decoder/x86")
|
||||
|
||||
include_directories("common/" "decoder/" ${LIBAVCDEC_X86_INCLUDES})
|
||||
|
||||
add_library (ih264d
|
||||
"common/ih264_buf_mgr.c"
|
||||
"common/ih264_buf_mgr.h"
|
||||
@ -53,21 +49,6 @@ add_library (ih264d
|
||||
"common/ih264_weighted_pred.h"
|
||||
"common/ithread.c"
|
||||
"common/ithread.h"
|
||||
"common/x86/ih264_chroma_intra_pred_filters_ssse3.c"
|
||||
"common/x86/ih264_deblk_chroma_ssse3.c"
|
||||
"common/x86/ih264_deblk_luma_ssse3.c"
|
||||
"common/x86/ih264_ihadamard_scaling_sse42.c"
|
||||
"common/x86/ih264_ihadamard_scaling_ssse3.c"
|
||||
"common/x86/ih264_inter_pred_filters_ssse3.c"
|
||||
"common/x86/ih264_iquant_itrans_recon_dc_ssse3.c"
|
||||
"common/x86/ih264_iquant_itrans_recon_sse42.c"
|
||||
"common/x86/ih264_iquant_itrans_recon_ssse3.c"
|
||||
"common/x86/ih264_luma_intra_pred_filters_ssse3.c"
|
||||
"common/x86/ih264_mem_fns_ssse3.c"
|
||||
"common/x86/ih264_padding_ssse3.c"
|
||||
"common/x86/ih264_platform_macros.h"
|
||||
"common/x86/ih264_resi_trans_quant_sse42.c"
|
||||
"common/x86/ih264_weighted_pred_sse42.c"
|
||||
"decoder/ih264d.h"
|
||||
"decoder/ih264d_api.c"
|
||||
"decoder/ih264d_bitstrm.c"
|
||||
@ -134,10 +115,71 @@ add_library (ih264d
|
||||
"decoder/ih264d_vui.h"
|
||||
"decoder/iv.h"
|
||||
"decoder/ivd.h"
|
||||
)
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")
|
||||
set(LIBAVCDEC_X86_INCLUDES "common/x86" "decoder/x86")
|
||||
include_directories("common/" "decoder/" ${LIBAVCDEC_X86_INCLUDES})
|
||||
target_sources(ih264d PRIVATE
|
||||
"common/x86/ih264_chroma_intra_pred_filters_ssse3.c"
|
||||
"common/x86/ih264_deblk_chroma_ssse3.c"
|
||||
"common/x86/ih264_deblk_luma_ssse3.c"
|
||||
"common/x86/ih264_ihadamard_scaling_sse42.c"
|
||||
"common/x86/ih264_ihadamard_scaling_ssse3.c"
|
||||
"common/x86/ih264_inter_pred_filters_ssse3.c"
|
||||
"common/x86/ih264_iquant_itrans_recon_dc_ssse3.c"
|
||||
"common/x86/ih264_iquant_itrans_recon_sse42.c"
|
||||
"common/x86/ih264_iquant_itrans_recon_ssse3.c"
|
||||
"common/x86/ih264_luma_intra_pred_filters_ssse3.c"
|
||||
"common/x86/ih264_mem_fns_ssse3.c"
|
||||
"common/x86/ih264_padding_ssse3.c"
|
||||
"common/x86/ih264_platform_macros.h"
|
||||
"common/x86/ih264_resi_trans_quant_sse42.c"
|
||||
"common/x86/ih264_weighted_pred_sse42.c"
|
||||
"decoder/x86/ih264d_function_selector.c"
|
||||
"decoder/x86/ih264d_function_selector_sse42.c"
|
||||
"decoder/x86/ih264d_function_selector_ssse3.c"
|
||||
)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
enable_language( C CXX ASM )
|
||||
set(LIBAVCDEC_ARM_INCLUDES "common/armv8" "decoder/arm")
|
||||
include_directories("common/" "decoder/" ${LIBAVCDEC_ARM_INCLUDES})
|
||||
target_sources(ih264d PRIVATE
|
||||
"common/armv8/ih264_deblk_chroma_av8.s"
|
||||
"common/armv8/ih264_deblk_luma_av8.s"
|
||||
"common/armv8/ih264_default_weighted_pred_av8.s"
|
||||
"common/armv8/ih264_ihadamard_scaling_av8.s"
|
||||
"common/armv8/ih264_inter_pred_chroma_av8.s"
|
||||
"common/armv8/ih264_inter_pred_filters_luma_horz_av8.s"
|
||||
"common/armv8/ih264_inter_pred_filters_luma_vert_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_copy_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s"
|
||||
"common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s"
|
||||
"common/armv8/ih264_intra_pred_chroma_av8.s"
|
||||
"common/armv8/ih264_intra_pred_luma_16x16_av8.s"
|
||||
"common/armv8/ih264_intra_pred_luma_4x4_av8.s"
|
||||
"common/armv8/ih264_intra_pred_luma_8x8_av8.s"
|
||||
"common/armv8/ih264_iquant_itrans_recon_av8.s"
|
||||
"common/armv8/ih264_iquant_itrans_recon_dc_av8.s"
|
||||
"common/armv8/ih264_mem_fns_neon_av8.s"
|
||||
"common/armv8/ih264_neon_macros.s"
|
||||
"common/armv8/ih264_padding_neon_av8.s"
|
||||
"common/armv8/ih264_platform_macros.h"
|
||||
"common/armv8/ih264_resi_trans_quant_av8.s"
|
||||
"common/armv8/ih264_weighted_bi_pred_av8.s"
|
||||
"common/armv8/ih264_weighted_pred_av8.s"
|
||||
"decoder/arm/ih264d_function_selector_a9q.c"
|
||||
"decoder/arm/ih264d_function_selector_av8.c"
|
||||
"decoder/arm/ih264d_function_selector.c"
|
||||
)
|
||||
target_compile_options(ih264d PRIVATE -DARMV8)
|
||||
else()
|
||||
message(FATAL_ERROR "ih264d unknown architecture: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
@ -203,7 +203,6 @@ extern uint64 ppcMainThreadDECCycleStart; // at which cycle the dec register was
|
||||
void PPCTimer_init();
|
||||
void PPCTimer_waitForInit();
|
||||
uint64 PPCTimer_getFromRDTSC();
|
||||
bool PPCTimer_hasInvariantRDTSCSupport();
|
||||
|
||||
uint64 PPCTimer_microsecondsToTsc(uint64 us);
|
||||
uint64 PPCTimer_tscToMicroseconds(uint64 us);
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include "Cafe/HW/Espresso/Const.h"
|
||||
#include <immintrin.h>
|
||||
#include "asm/x64util.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
#include <immintrin.h>
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
|
||||
uint64 _rdtscLastMeasure = 0;
|
||||
uint64 _rdtscFrequency = 0;
|
||||
@ -18,8 +23,6 @@ static_assert(sizeof(uint128_t) == 16);
|
||||
|
||||
uint128_t _rdtscAcc{};
|
||||
|
||||
#pragma intrinsic(__rdtsc)
|
||||
|
||||
uint64 muldiv64(uint64 a, uint64 b, uint64 d)
|
||||
{
|
||||
uint64 diva = a / d;
|
||||
@ -29,17 +32,12 @@ uint64 muldiv64(uint64 a, uint64 b, uint64 d)
|
||||
return diva * b + moda * divb + moda * modb / d;
|
||||
}
|
||||
|
||||
bool PPCTimer_hasInvariantRDTSCSupport()
|
||||
{
|
||||
uint32 cpuv[4];
|
||||
cpuid((int*)cpuv, 0x80000007);
|
||||
return ((cpuv[3] >> 8) & 1);
|
||||
}
|
||||
|
||||
uint64 PPCTimer_estimateRDTSCFrequency()
|
||||
{
|
||||
if (PPCTimer_hasInvariantRDTSCSupport() == false)
|
||||
forceLog_printf("Invariant TSC not supported");
|
||||
#if defined(ARCH_X86_64)
|
||||
if (!g_CPUFeatures.x86.invariant_tsc)
|
||||
cemuLog_log(LogType::Force, "Invariant TSC not supported");
|
||||
#endif
|
||||
|
||||
_mm_mfence();
|
||||
uint64 tscStart = __rdtsc();
|
||||
|
@ -8,11 +8,10 @@
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "config/LaunchSettings.h"
|
||||
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "Common/ExceptionHandler/ExceptionHandler.h"
|
||||
#include "Common/cpu_features.h"
|
||||
#include "util/helpers/fspinlock.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
|
||||
struct PPCInvalidationRange
|
||||
@ -461,6 +460,20 @@ void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr)
|
||||
PPCRecompilerState.recompilerSpinlock.unlock();
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
void PPCRecompiler_initPlatform()
|
||||
{
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
}
|
||||
#else
|
||||
void PPCRecompiler_initPlatform()
|
||||
{
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
void PPCRecompiler_init()
|
||||
{
|
||||
if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter)
|
||||
@ -569,21 +582,9 @@ void PPCRecompiler_init()
|
||||
ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br;
|
||||
}
|
||||
|
||||
// mxcsr
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000;
|
||||
ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80;
|
||||
PPCRecompiler_initPlatform();
|
||||
|
||||
// query processor extensions
|
||||
int cpuInfo[4];
|
||||
cpuid(cpuInfo, 0x80000001);
|
||||
hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0;
|
||||
cpuid(cpuInfo, 0x1);
|
||||
hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0;
|
||||
hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0;
|
||||
cpuidex(cpuInfo, 0x7, 0);
|
||||
hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0;
|
||||
|
||||
forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", hasLZCNTSupport ? "LZCNT " : "", hasMOVBESupport ? "MOVBE " : "", hasAVXSupport ? "AVX " : "");
|
||||
forceLog_printf("Recompiler initialized");
|
||||
|
||||
ppcRecompilerEnabled = true;
|
||||
|
||||
|
@ -384,12 +384,6 @@ extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)();
|
||||
|
||||
#define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1)
|
||||
|
||||
// CPUID
|
||||
extern bool hasLZCNTSupport;
|
||||
extern bool hasMOVBESupport;
|
||||
extern bool hasBMI2Support;
|
||||
extern bool hasAVXSupport;
|
||||
|
||||
// todo - move some of the stuff above into PPCRecompilerInternal.h
|
||||
|
||||
// recompiler interface
|
||||
|
@ -3,14 +3,6 @@
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "Cafe/GameProfile/GameProfile.h"
|
||||
|
||||
bool hasSSE1Support = true;
|
||||
bool hasSSE2Support = true;
|
||||
bool hasSSE3Support = true;
|
||||
bool hasLZCNTSupport = false;
|
||||
bool hasMOVBESupport = false;
|
||||
bool hasBMI2Support = false;
|
||||
bool hasAVXSupport = false;
|
||||
|
||||
void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER)
|
||||
{
|
||||
// load from memory
|
||||
@ -145,8 +137,6 @@ void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t
|
||||
|
||||
bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -167,8 +157,6 @@ bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
|
||||
|
||||
bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -191,8 +179,6 @@ bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||
|
||||
bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frD, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frD, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -218,8 +204,6 @@ bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||
|
||||
bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frD, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frD, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -247,8 +231,6 @@ bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -266,8 +248,6 @@ bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode
|
||||
|
||||
bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -288,8 +268,6 @@ bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||
|
||||
bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frD, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frD, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -308,8 +286,6 @@ bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||
|
||||
bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frD, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frD, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -330,8 +306,6 @@ bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -346,8 +320,6 @@ bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||
|
||||
bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -364,8 +336,6 @@ bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frS, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frS, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -392,8 +362,6 @@ bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frS, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frS, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -415,8 +383,6 @@ bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
|
||||
|
||||
bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -435,8 +401,6 @@ bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod
|
||||
|
||||
bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE1Support == false )
|
||||
return false;
|
||||
sint32 rA, frD;
|
||||
uint32 imm;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm);
|
||||
@ -458,8 +422,6 @@ bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frS, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frS, rA, rB);
|
||||
if( rA == 0 )
|
||||
@ -485,8 +447,6 @@ bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if( hasSSE2Support == false )
|
||||
return false;
|
||||
sint32 rA, frS, rB;
|
||||
PPC_OPC_TEMPL_X(opcode, frS, rA, rB);
|
||||
// get memory gpr registers
|
||||
@ -959,10 +919,6 @@ bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
sint32 crfD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
|
||||
crfD >>= 2;
|
||||
if( hasSSE2Support == false )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
|
||||
uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD);
|
||||
@ -974,10 +930,6 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
sint32 crfD, frA, frB;
|
||||
PPC_OPC_TEMPL_X(opcode, crfD, frA, frB);
|
||||
crfD >>= 2;
|
||||
if( hasSSE2Support == false )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA);
|
||||
uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB);
|
||||
PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD);
|
||||
@ -1120,8 +1072,6 @@ bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
|
||||
|
||||
bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if (hasSSE2Support == false)
|
||||
return false;
|
||||
int rA, frD;
|
||||
uint32 immUnused;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused);
|
||||
@ -1146,8 +1096,6 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
|
||||
|
||||
bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
|
||||
{
|
||||
if (hasSSE2Support == false)
|
||||
return false;
|
||||
int rA, frD;
|
||||
uint32 immUnused;
|
||||
PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused);
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
|
||||
|
||||
#include "util/MemMapper/MemMapper.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping
|
||||
{
|
||||
@ -381,7 +381,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
|
||||
{
|
||||
x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( hasMOVBESupport && switchEndian )
|
||||
if( g_CPUFeatures.x86.movbe && switchEndian )
|
||||
{
|
||||
if (indexed)
|
||||
{
|
||||
@ -419,7 +419,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
|
||||
{
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( hasMOVBESupport && switchEndian )
|
||||
if( g_CPUFeatures.x86.movbe && switchEndian )
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
@ -477,7 +477,7 @@ bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, p
|
||||
assert_dbg();
|
||||
if( indexed )
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move)
|
||||
if( hasMOVBESupport )
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
if( indexed && realRegisterMem != realRegisterData )
|
||||
@ -537,7 +537,7 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
|
||||
if (indexed)
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
uint32 valueRegister;
|
||||
if ((swapEndian == false || hasMOVBESupport) && realRegisterMem != realRegisterData)
|
||||
if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData)
|
||||
{
|
||||
valueRegister = realRegisterData;
|
||||
}
|
||||
@ -546,11 +546,11 @@ bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction,
|
||||
x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData);
|
||||
valueRegister = REG_RESV_TEMP;
|
||||
}
|
||||
if (hasMOVBESupport == false && swapEndian)
|
||||
if (g_CPUFeatures.x86.movbe == false && swapEndian)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister);
|
||||
if (indexed)
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
if (hasMOVBESupport && swapEndian)
|
||||
if (g_CPUFeatures.x86.movbe && swapEndian)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister);
|
||||
@ -802,8 +802,7 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, pp
|
||||
// count leading zeros
|
||||
PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext);
|
||||
cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER);
|
||||
// LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5])
|
||||
if( hasLZCNTSupport )
|
||||
if( g_CPUFeatures.x86.lzcnt )
|
||||
{
|
||||
x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA));
|
||||
}
|
||||
@ -1521,12 +1520,12 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction,
|
||||
sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA);
|
||||
sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB);
|
||||
|
||||
if (hasBMI2Support && imlInstruction->operation == PPCREC_IML_OP_SRW)
|
||||
if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW)
|
||||
{
|
||||
// use BMI2 SHRX if available
|
||||
x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
|
||||
}
|
||||
else if (hasBMI2Support && imlInstruction->operation == PPCREC_IML_OP_SLW)
|
||||
else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW)
|
||||
{
|
||||
// use BMI2 SHLX if available
|
||||
x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2);
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "PPCRecompilerIml.h"
|
||||
#include "PPCRecompilerX64.h"
|
||||
#include "asm/x64util.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction)
|
||||
{
|
||||
@ -86,7 +87,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
|
||||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg);
|
||||
if (hasMOVBESupport)
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32);
|
||||
}
|
||||
@ -98,7 +99,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
|
||||
}
|
||||
else
|
||||
{
|
||||
if (hasMOVBESupport)
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
{
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32);
|
||||
}
|
||||
@ -108,7 +109,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext,
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
}
|
||||
}
|
||||
if (hasAVXSupport)
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP);
|
||||
}
|
||||
@ -280,21 +281,21 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
||||
{
|
||||
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem);
|
||||
if( hasMOVBESupport )
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( hasMOVBESupport )
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
else
|
||||
x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32);
|
||||
}
|
||||
if( hasMOVBESupport == false )
|
||||
if( g_CPUFeatures.x86.movbe == false )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( hasAVXSupport )
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP);
|
||||
}
|
||||
@ -316,7 +317,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunctio
|
||||
}
|
||||
else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 )
|
||||
{
|
||||
if( hasAVXSupport )
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
if( indexed )
|
||||
{
|
||||
@ -419,7 +420,7 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext
|
||||
if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0)
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM);
|
||||
if (hasAVXSupport)
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
@ -428,14 +429,14 @@ void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext
|
||||
x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
if (hasMOVBESupport == false)
|
||||
if (g_CPUFeatures.x86.movbe == false)
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if (indexed)
|
||||
{
|
||||
cemu_assert_debug(memReg != memRegEx);
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx);
|
||||
}
|
||||
if (hasMOVBESupport)
|
||||
if (g_CPUFeatures.x86.movbe)
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP);
|
||||
@ -604,7 +605,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
||||
if (imlInstruction->op_storeLoad.flags2.notExpanded)
|
||||
{
|
||||
// value is already in single format
|
||||
if (hasAVXSupport)
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
}
|
||||
@ -617,7 +618,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
||||
else
|
||||
{
|
||||
x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM);
|
||||
if (hasAVXSupport)
|
||||
if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP);
|
||||
}
|
||||
@ -627,7 +628,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
||||
x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR));
|
||||
}
|
||||
}
|
||||
if( hasMOVBESupport == false )
|
||||
if( g_CPUFeatures.x86.movbe == false )
|
||||
x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP);
|
||||
if( indexed )
|
||||
{
|
||||
@ -635,7 +636,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
||||
assert_dbg();
|
||||
x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2);
|
||||
}
|
||||
if( hasMOVBESupport )
|
||||
if( g_CPUFeatures.x86.movbe )
|
||||
x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
else
|
||||
x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP);
|
||||
@ -668,7 +669,7 @@ bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFuncti
|
||||
}
|
||||
else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 )
|
||||
{
|
||||
if( hasAVXSupport )
|
||||
if( g_CPUFeatures.x86.avx )
|
||||
{
|
||||
x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM);
|
||||
}
|
||||
@ -749,7 +750,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
|
||||
// unpack top to bottom and top
|
||||
x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand);
|
||||
}
|
||||
//else if ( hasAVXSupport )
|
||||
//else if ( g_CPUFeatures.x86.avx )
|
||||
//{
|
||||
// // unpack top to bottom and top with non-destructive destination
|
||||
// // update: On Ivy Bridge this causes weird stalls?
|
||||
@ -1056,7 +1057,7 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFuncti
|
||||
{
|
||||
x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB);
|
||||
}
|
||||
else if (hasAVXSupport)
|
||||
else if (g_CPUFeatures.x86.avx)
|
||||
{
|
||||
x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA, imlInstruction->op_fpr_r_r_r.registerOperandB);
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
#if __GNUC__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
@ -14,6 +15,7 @@
|
||||
#define ATTRIBUTE_AVX2
|
||||
#define ATTRIBUTE_SSE41
|
||||
#endif
|
||||
#endif
|
||||
|
||||
struct
|
||||
{
|
||||
@ -292,6 +294,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
|
||||
indexMax = std::max(count, 1u) - 1;
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
ATTRIBUTE_AVX2
|
||||
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
@ -487,6 +490,7 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax)
|
||||
@ -669,19 +673,27 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
|
||||
{
|
||||
if (indexType == LatteIndexType::U16_BE)
|
||||
{
|
||||
if (_cpuExtension_AVX2)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else if (_cpuExtension_SSE4_1 && _cpuExtension_SSSE3)
|
||||
else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3)
|
||||
LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U32_BE)
|
||||
{
|
||||
if (_cpuExtension_AVX2)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U16_LE)
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteTexture.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
std::unordered_set<LatteTexture*> g_allTextures;
|
||||
|
||||
@ -146,7 +147,7 @@ uint32 LatteTexture_CalculateTextureDataHash(LatteTexture* hostTexture)
|
||||
if( isCompressedFormat == false )
|
||||
{
|
||||
#if BOOST_OS_WINDOWS
|
||||
if (_cpuExtension_AVX2)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
{
|
||||
__m256i h256 = { 0 };
|
||||
__m256i* readPtr = (__m256i*)texDataU32;
|
||||
|
@ -1075,7 +1075,9 @@ namespace coreinit
|
||||
{
|
||||
OSHostThread* hostThread = (OSHostThread*)_thread;
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
_mm_setcsr(_mm_getcsr() | 0x8000); // flush denormals to zero
|
||||
#endif
|
||||
|
||||
uint32 lehmer_lcg = 12345;
|
||||
|
||||
@ -1116,7 +1118,9 @@ namespace coreinit
|
||||
{
|
||||
SetThreadName(fmt::format("OSSchedulerThread[core={}]", (uintptr_t)_assignedCoreIndex).c_str());
|
||||
t_assignedCoreIndex = (sint32)(uintptr_t)_assignedCoreIndex;
|
||||
#if defined(ARCH_X86_64)
|
||||
_mm_setcsr(_mm_getcsr() | 0x8000); // flush denormals to zero
|
||||
#endif
|
||||
t_schedulerFiber = Fiber::PrepareCurrentThread();
|
||||
|
||||
// create scheduler idle fiber and switch to it
|
||||
|
@ -1,5 +1,7 @@
|
||||
add_library(CemuCommon
|
||||
betype.h
|
||||
cpu_features.cpp
|
||||
cpu_features.h
|
||||
enumFlags.h
|
||||
ExceptionHandler/ExceptionHandler.h
|
||||
FileStream.h
|
||||
|
101
src/Common/cpu_features.cpp
Normal file
101
src/Common/cpu_features.cpp
Normal file
@ -0,0 +1,101 @@
|
||||
#include "cpu_features.h"
|
||||
|
||||
// wrappers with uniform prototype for implementation-specific x86 CPU id
|
||||
#if defined(ARCH_X86_64)
|
||||
#ifdef __GNUC__
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
inline void cpuid(int cpuInfo[4], int functionId) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuid(cpuInfo, functionId);
|
||||
#elif defined(__GNUC__)
|
||||
__cpuid(functionId, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
||||
#else
|
||||
#error No definition for cpuid
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void cpuidex(int cpuInfo[4], int functionId, int subFunctionId) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuidex(cpuInfo, functionId, subFunctionId);
|
||||
#elif defined(__GNUC__)
|
||||
__cpuid_count(functionId, subFunctionId, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
||||
#else
|
||||
#error No definition for cpuidex
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
CPUFeaturesImpl::CPUFeaturesImpl()
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
int cpuInfo[4];
|
||||
cpuid(cpuInfo, 0x80000001);
|
||||
x86.lzcnt = ((cpuInfo[2] >> 5) & 1) != 0;
|
||||
cpuid(cpuInfo, 0x1);
|
||||
x86.movbe = ((cpuInfo[2] >> 22) & 1) != 0;
|
||||
x86.avx = ((cpuInfo[2] >> 28) & 1) != 0;
|
||||
x86.aesni = ((cpuInfo[2] >> 25) & 1) != 0;
|
||||
x86.ssse3 = ((cpuInfo[2] >> 9) & 1) != 0;
|
||||
x86.sse4_1 = ((cpuInfo[2] >> 19) & 1) != 0;
|
||||
cpuidex(cpuInfo, 0x7, 0);
|
||||
x86.avx2 = ((cpuInfo[1] >> 5) & 1) != 0;
|
||||
x86.bmi2 = ((cpuInfo[1] >> 8) & 1) != 0;
|
||||
cpuid(cpuInfo, 0x80000007);
|
||||
x86.invariant_tsc = ((cpuInfo[3] >> 8) & 1);
|
||||
// get CPU brand name
|
||||
uint32_t nExIds, i = 0;
|
||||
memset(m_cpuBrandName, 0, sizeof(m_cpuBrandName));
|
||||
cpuid(cpuInfo, 0x80000000);
|
||||
nExIds = (uint32_t)cpuInfo[0];
|
||||
for (uint32_t i = 0x80000000; i <= nExIds; ++i)
|
||||
{
|
||||
cpuid(cpuInfo, i);
|
||||
if (i == 0x80000002)
|
||||
memcpy(m_cpuBrandName, cpuInfo, sizeof(cpuInfo));
|
||||
else if (i == 0x80000003)
|
||||
memcpy(m_cpuBrandName + 16, cpuInfo, sizeof(cpuInfo));
|
||||
else if (i == 0x80000004)
|
||||
memcpy(m_cpuBrandName + 32, cpuInfo, sizeof(cpuInfo));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string CPUFeaturesImpl::GetCPUName()
|
||||
{
|
||||
return { m_cpuBrandName };
|
||||
}
|
||||
|
||||
std::string CPUFeaturesImpl::GetCommaSeparatedExtensionList()
|
||||
{
|
||||
std::string tmp;
|
||||
auto appendExt = [&tmp](const char* str)
|
||||
{
|
||||
if (!tmp.empty())
|
||||
tmp.append(", ");
|
||||
tmp.append(str);
|
||||
};
|
||||
if (x86.ssse3)
|
||||
appendExt("SSSE3");
|
||||
if (x86.sse4_1)
|
||||
appendExt("SSE4.1");
|
||||
if (x86.avx)
|
||||
appendExt("AVX");
|
||||
if (x86.avx2)
|
||||
appendExt("AVX2");
|
||||
if (x86.lzcnt)
|
||||
appendExt("LZCNT");
|
||||
if (x86.movbe)
|
||||
appendExt("MOVBE");
|
||||
if (x86.bmi2)
|
||||
appendExt("BMI2");
|
||||
if (x86.aesni)
|
||||
appendExt("AES-NI");
|
||||
if(x86.invariant_tsc)
|
||||
appendExt("INVARIANT-TSC");
|
||||
return tmp;
|
||||
}
|
||||
|
||||
CPUFeaturesImpl g_CPUFeatures;
|
26
src/Common/cpu_features.h
Normal file
26
src/Common/cpu_features.h
Normal file
@ -0,0 +1,26 @@
|
||||
|
||||
class CPUFeaturesImpl
|
||||
{
|
||||
public:
|
||||
CPUFeaturesImpl();
|
||||
|
||||
std::string GetCPUName(); // empty if not available
|
||||
std::string GetCommaSeparatedExtensionList();
|
||||
|
||||
struct
|
||||
{
|
||||
bool ssse3{ false };
|
||||
bool sse4_1{ false };
|
||||
bool avx{ false };
|
||||
bool avx2{ false };
|
||||
bool lzcnt{ false };
|
||||
bool movbe{ false };
|
||||
bool bmi2{ false };
|
||||
bool aesni{ false };
|
||||
bool invariant_tsc{ false };
|
||||
}x86;
|
||||
private:
|
||||
char m_cpuBrandName[0x40]{ 0 };
|
||||
};
|
||||
|
||||
extern CPUFeaturesImpl g_CPUFeatures;
|
@ -24,13 +24,22 @@
|
||||
// }
|
||||
// #endif
|
||||
|
||||
// arch defines
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
|
||||
#define ARCH_X86_64
|
||||
#endif
|
||||
|
||||
// c includes
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include <cassert>
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
// c++ includes
|
||||
#include <string>
|
||||
@ -106,11 +115,6 @@ using uint8le = uint8_t;
|
||||
#include "Cemu/Logging/CemuDebugLogging.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
|
||||
// CPU extensions
|
||||
extern bool _cpuExtension_SSSE3;
|
||||
extern bool _cpuExtension_SSE4_1;
|
||||
extern bool _cpuExtension_AVX2;
|
||||
|
||||
// manual endian-swapping
|
||||
|
||||
#if _MSC_VER
|
||||
@ -251,30 +255,35 @@ inline uint64 _udiv128(uint64 highDividend, uint64 lowDividend, uint64 divisor,
|
||||
#define NOEXPORT __attribute__ ((visibility ("hidden")))
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
// On aarch64 we handle some of the x86 intrinsics by implementing them as wrappers
|
||||
#if defined(__aarch64__)
|
||||
|
||||
inline void cpuid(int cpuInfo[4], int functionId) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuid(cpuInfo, functionId);
|
||||
#elif defined(__GNUC__)
|
||||
__cpuid(functionId, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
||||
#else
|
||||
#error No definition for cpuid
|
||||
#endif
|
||||
inline void _mm_pause()
|
||||
{
|
||||
asm volatile("yield");
|
||||
}
|
||||
|
||||
inline void cpuidex(int cpuInfo[4], int functionId, int subFunctionId) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuidex(cpuInfo, functionId, subFunctionId);
|
||||
#elif defined(__GNUC__)
|
||||
__cpuid_count(functionId, subFunctionId, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]);
|
||||
#else
|
||||
#error No definition for cpuidex
|
||||
#endif
|
||||
inline uint64 __rdtsc()
|
||||
{
|
||||
uint64 t;
|
||||
asm volatile("mrs %0, cntvct_el0" : "=r" (t));
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void _mm_mfence()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result)
|
||||
{
|
||||
*result = a + b + (unsigned long long)carry;
|
||||
if (*result < a)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// MEMPTR
|
||||
#include "Common/MemPtr.h"
|
||||
|
@ -1,6 +1,8 @@
|
||||
project(CemuAsm C)
|
||||
|
||||
if (WIN32)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
|
||||
|
||||
if (WIN32)
|
||||
|
||||
enable_language(C ASM_MASM)
|
||||
|
||||
@ -12,7 +14,9 @@ if (WIN32)
|
||||
# possibly related to https://gitlab.kitware.com/cmake/cmake/-/issues/18889
|
||||
set(CMAKE_ASM_MASM_CREATE_STATIC_LIBRARY "<CMAKE_AR> /OUT:<TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
|
||||
else()
|
||||
set_property(TARGET CemuAsm PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
else()
|
||||
|
||||
# NASM
|
||||
if (APPLE)
|
||||
@ -34,6 +38,10 @@ else()
|
||||
endif()
|
||||
set_target_properties(CemuAsm PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_property(TARGET CemuAsm PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
|
||||
add_library(CemuAsm stub.cpp)
|
||||
else()
|
||||
message(STATUS "CemuAsm - Unsupported arch: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
1
src/asm/stub.cpp
Normal file
1
src/asm/stub.cpp
Normal file
@ -0,0 +1 @@
|
||||
|
@ -1,4 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
|
||||
extern "C" void recompiler_fres();
|
||||
extern "C" void recompiler_frsqrte();
|
||||
|
||||
#else
|
||||
|
||||
// stubbed on non-x86 for now
|
||||
static void recompiler_fres()
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
static void recompiler_frsqrte()
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -64,6 +64,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
|
||||
save_screenshot = parser.get("save_screenshot", save_screenshot);
|
||||
did_show_vulkan_warning = parser.get("vk_warning", did_show_vulkan_warning);
|
||||
did_show_graphic_pack_download = parser.get("gp_download", did_show_graphic_pack_download);
|
||||
did_show_macos_disclaimer = parser.get("macos_disclaimer", did_show_macos_disclaimer);
|
||||
fullscreen = parser.get("fullscreen", fullscreen);
|
||||
proxy_server = parser.get("proxy_server", "");
|
||||
|
||||
@ -365,6 +366,7 @@ void CemuConfig::Save(XMLConfigParser& parser)
|
||||
config.set<bool>("save_screenshot", save_screenshot);
|
||||
config.set<bool>("vk_warning", did_show_vulkan_warning);
|
||||
config.set<bool>("gp_download", did_show_graphic_pack_download);
|
||||
config.set<bool>("macos_disclaimer", did_show_macos_disclaimer);
|
||||
config.set<bool>("fullscreen", fullscreen);
|
||||
config.set("proxy_server", proxy_server.GetValue().c_str());
|
||||
|
||||
|
@ -407,6 +407,7 @@ struct CemuConfig
|
||||
|
||||
ConfigValue<bool> did_show_vulkan_warning{false};
|
||||
ConfigValue<bool> did_show_graphic_pack_download{false};
|
||||
ConfigValue<bool> did_show_macos_disclaimer{false};
|
||||
|
||||
int game_list_style = 0;
|
||||
std::string game_list_column_order;
|
||||
|
@ -174,6 +174,23 @@ bool CemuApp::OnInit()
|
||||
|
||||
SetTopWindow(m_mainFrame);
|
||||
m_mainFrame->Show();
|
||||
|
||||
// show warning on macOS about state of builds
|
||||
#if BOOST_OS_MACOS
|
||||
if (!GetConfig().did_show_macos_disclaimer)
|
||||
{
|
||||
const auto message = _(
|
||||
"Thank you for testing the in-development build of Cemu for macOS.\n \n"
|
||||
"The macOS port is currently purely experimental and should not be considered stable or ready for issue-free gameplay. "
|
||||
"There are also known issues with degraded performance due to the use of MoltenVk and Rosetta for ARM Macs. We appreciate your patience while we improve Cemu for macOS.");
|
||||
wxMessageDialog dialog(nullptr, message, "Preview version", wxCENTRE | wxOK | wxICON_WARNING);
|
||||
dialog.SetOKLabel(_("I understand"));
|
||||
dialog.ShowModal();
|
||||
GetConfig().did_show_macos_disclaimer = true;
|
||||
g_config.Save();
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
80
src/main.cpp
80
src/main.cpp
@ -19,6 +19,7 @@
|
||||
#include "Cafe/TitleList/SaveList.h"
|
||||
|
||||
#include "Common/ExceptionHandler/ExceptionHandler.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
#include <wx/setup.h>
|
||||
#include "util/helpers/helpers.h"
|
||||
@ -37,8 +38,13 @@
|
||||
#define SDL_MAIN_HANDLED
|
||||
#include <SDL.h>
|
||||
|
||||
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
||||
#if BOOST_OS_LINUX
|
||||
#define _putenv(__s) putenv((char*)(__s))
|
||||
#include <sys/sysinfo.h>
|
||||
#elif BOOST_OS_MACOS
|
||||
#define _putenv(__s) putenv((char*)(__s))
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#if BOOST_OS_WINDOWS
|
||||
@ -49,41 +55,32 @@ extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
bool _cpuExtension_SSSE3 = false;
|
||||
bool _cpuExtension_SSE4_1 = false;
|
||||
bool _cpuExtension_AVX2 = false;
|
||||
|
||||
std::atomic_bool g_isGPUInitFinished = false;
|
||||
|
||||
std::wstring executablePath;
|
||||
|
||||
void logCPUAndMemoryInfo()
|
||||
{
|
||||
#if BOOST_OS_WINDOWS
|
||||
int CPUInfo[4] = { -1 };
|
||||
unsigned nExIds, i = 0;
|
||||
char CPUBrandString[0x40];
|
||||
// Get the information associated with each extended ID.
|
||||
cpuid(CPUInfo, 0x80000000);
|
||||
nExIds = CPUInfo[0];
|
||||
for (i = 0x80000000; i <= nExIds; ++i)
|
||||
{
|
||||
cpuid(CPUInfo, i);
|
||||
// Interpret CPU brand string
|
||||
if (i == 0x80000002)
|
||||
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
|
||||
else if (i == 0x80000003)
|
||||
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
|
||||
else if (i == 0x80000004)
|
||||
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
|
||||
}
|
||||
forceLog_printf("CPU: %s", CPUBrandString);
|
||||
std::string cpuName = g_CPUFeatures.GetCPUName();
|
||||
if (!cpuName.empty())
|
||||
cemuLog_log(LogType::Force, "CPU: {}", cpuName);
|
||||
|
||||
#if BOOST_OS_WINDOWS
|
||||
MEMORYSTATUSEX statex;
|
||||
statex.dwLength = sizeof(statex);
|
||||
GlobalMemoryStatusEx(&statex);
|
||||
uint32 memoryInMB = (uint32)(statex.ullTotalPhys / 1024LL / 1024LL);
|
||||
forceLog_printf("RAM: %uMB", memoryInMB);
|
||||
#elif BOOST_OS_LINUX
|
||||
struct sysinfo info {};
|
||||
sysinfo(&info);
|
||||
cemuLog_log(LogType::Force, "RAM: {}MB", ((static_cast<uint64_t>(info.totalram) * info.mem_unit) / 1024LL / 1024LL));
|
||||
#elif BOOST_OS_MACOS
|
||||
int64_t totalRam;
|
||||
size_t size = sizeof(totalRam);
|
||||
int result = sysctlbyname("hw.memsize", &totalRam, &size, NULL, 0);
|
||||
if (result == 0)
|
||||
cemuLog_log(LogType::Force, "RAM: {}MB", (totalRam / 1024LL / 1024LL));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -120,32 +117,7 @@ void infoLog_cemuStartup()
|
||||
checkForWine();
|
||||
// CPU and RAM info
|
||||
logCPUAndMemoryInfo();
|
||||
// extensions that Cemu uses
|
||||
char cpuExtensionStr[256];
|
||||
strcpy(cpuExtensionStr, "");
|
||||
if (_cpuExtension_SSSE3)
|
||||
{
|
||||
strcat(cpuExtensionStr, "SSSE3");
|
||||
}
|
||||
if (_cpuExtension_SSE4_1)
|
||||
{
|
||||
if (cpuExtensionStr[0] != '\0')
|
||||
strcat(cpuExtensionStr, ", ");
|
||||
strcat(cpuExtensionStr, "SSE4.1");
|
||||
}
|
||||
if (_cpuExtension_AVX2)
|
||||
{
|
||||
if (cpuExtensionStr[0] != '\0')
|
||||
strcat(cpuExtensionStr, ", ");
|
||||
strcat(cpuExtensionStr, "AVX2");
|
||||
}
|
||||
if (AES128_useAESNI())
|
||||
{
|
||||
if (cpuExtensionStr[0] != '\0')
|
||||
strcat(cpuExtensionStr, ", ");
|
||||
strcat(cpuExtensionStr, "AES-NI");
|
||||
}
|
||||
cemuLog_force("Used CPU extensions: {}", cpuExtensionStr);
|
||||
cemuLog_log(LogType::Force, "Used CPU extensions: {}", g_CPUFeatures.GetCommaSeparatedExtensionList());
|
||||
}
|
||||
|
||||
// some implementations of _putenv dont copy the string and instead only store a pointer
|
||||
@ -194,14 +166,6 @@ void mainEmulatorCommonInit()
|
||||
AES128_init();
|
||||
// init PPC timer (call this as early as possible because it measures frequency of RDTSC using an asynchronous thread over 3 seconds)
|
||||
PPCTimer_init();
|
||||
// check available CPU extensions
|
||||
int cpuInfo[4];
|
||||
cpuid(cpuInfo, 0x1);
|
||||
_cpuExtension_SSSE3 = ((cpuInfo[2] >> 9) & 1) != 0;
|
||||
_cpuExtension_SSE4_1 = ((cpuInfo[2] >> 19) & 1) != 0;
|
||||
|
||||
cpuidex(cpuInfo, 0x7, 0);
|
||||
_cpuExtension_AVX2 = ((cpuInfo[1] >> 5) & 1) != 0;
|
||||
|
||||
#if BOOST_OS_WINDOWS
|
||||
executablePath.resize(4096);
|
||||
|
@ -10,6 +10,7 @@
|
||||
/* Includes: */
|
||||
/*****************************************************************************/
|
||||
#include "aes128.h"
|
||||
#include "Common/cpu_features.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Defines: */
|
||||
@ -23,8 +24,6 @@
|
||||
// The number of rounds in AES Cipher.
|
||||
#define Nr 10
|
||||
|
||||
bool useAESNI = false;
|
||||
|
||||
typedef uint8 state_t[4][4];
|
||||
|
||||
typedef struct
|
||||
@ -601,6 +600,7 @@ void AES128_CBC_decrypt_updateIV(uint8* output, uint8* input, uint32 length, con
|
||||
memcpy(iv, newIv, KEYLEN);
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
inline __m128i AESNI128_ASSIST(
|
||||
__m128i temp1,
|
||||
__m128i temp2)
|
||||
@ -792,6 +792,7 @@ void __aesni__AES128_ECB_encrypt(uint8* input, const uint8* key, uint8* output)
|
||||
feedback = _mm_aesenclast_si128(feedback, ((__m128i*)expandedKey)[10]);
|
||||
_mm_storeu_si128(&((__m128i*)output)[0], feedback);
|
||||
}
|
||||
#endif
|
||||
|
||||
void(*AES128_ECB_encrypt)(uint8* input, const uint8* key, uint8* output);
|
||||
void (*AES128_CBC_decrypt)(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv) = nullptr;
|
||||
@ -836,10 +837,8 @@ void AES128_init()
|
||||
lookupTable_multiply[i] = (vE << 0) | (v9 << 8) | (vD << 16) | (vB << 24);
|
||||
}
|
||||
// check if AES-NI is available
|
||||
int v[4];
|
||||
cpuid(v, 1);
|
||||
useAESNI = (v[2] & 0x2000000) != 0;
|
||||
if (useAESNI)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.aesni)
|
||||
{
|
||||
// AES-NI implementation
|
||||
AES128_CBC_decrypt = __aesni__AES128_CBC_decrypt;
|
||||
@ -851,9 +850,8 @@ void AES128_init()
|
||||
AES128_CBC_decrypt = __soft__AES128_CBC_decrypt;
|
||||
AES128_ECB_encrypt = __soft__AES128_ECB_encrypt;
|
||||
}
|
||||
}
|
||||
|
||||
bool AES128_useAESNI()
|
||||
{
|
||||
return useAESNI;
|
||||
#else
|
||||
AES128_CBC_decrypt = __soft__AES128_CBC_decrypt;
|
||||
AES128_ECB_encrypt = __soft__AES128_ECB_encrypt;
|
||||
#endif
|
||||
}
|
@ -2,7 +2,6 @@
|
||||
#define _AES_H_
|
||||
|
||||
void AES128_init();
|
||||
bool AES128_useAESNI();
|
||||
|
||||
extern void(*AES128_ECB_encrypt)(uint8* input, const uint8* key, uint8* output);
|
||||
|
||||
|
@ -19,7 +19,8 @@ public:
|
||||
{
|
||||
if (!m_lockBool.exchange(true, std::memory_order_acquire))
|
||||
break;
|
||||
while (m_lockBool.load(std::memory_order_relaxed)) _mm_pause();
|
||||
while (m_lockBool.load(std::memory_order_relaxed))
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user