diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt index 212cf346..d97d6dda 100644 --- a/dependencies/ih264d/CMakeLists.txt +++ b/dependencies/ih264d/CMakeLists.txt @@ -183,4 +183,10 @@ endif() if(MSVC) set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + +# tune settings for slightly better performance +target_compile_options(ih264d PRIVATE $<$:/Oi>) # enable intrinsic functions +target_compile_options(ih264d PRIVATE $<$:/Ot>) # favor speed +target_compile_options(ih264d PRIVATE "/GS-") # disable runtime checks + endif() diff --git a/dependencies/ih264d/common/ithread.c b/dependencies/ih264d/common/ithread.c index d710e323..2c25bdb0 100644 --- a/dependencies/ih264d/common/ithread.c +++ b/dependencies/ih264d/common/ithread.c @@ -85,28 +85,59 @@ UWORD32 ithread_get_mutex_lock_size(void) return sizeof(CRITICAL_SECTION); } +struct _ithread_launch_param +{ + void (*startFunc)(void* argument); + void* argument; +}; + +DWORD WINAPI _ithread_WinThreadStartRoutine(LPVOID lpThreadParameter) +{ + struct _ithread_launch_param* param = (struct _ithread_launch_param*)lpThreadParameter; + typedef void *(*ThreadStartRoutineType)(void *); + ThreadStartRoutineType pfnThreadRoutine = (ThreadStartRoutineType)param->startFunc; + void* arg = param->argument; + free(param); + pfnThreadRoutine(arg); + return 0; +} + WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument) { - //UNUSED(attribute); - //return pthread_create((pthread_t*)thread_handle, NULL, (void* (*)(void*)) strt, argument); - __debugbreak(); + UNUSED(attribute); + struct _ithread_launch_param* param = malloc(sizeof(struct _ithread_launch_param)); + param->startFunc = (void (*)(void*))strt; + param->argument = argument; + HANDLE *handle = (HANDLE*)thread_handle; + *handle = CreateThread(NULL, 0, _ithread_WinThreadStartRoutine, param, 0, NULL); + if(*handle == NULL) + { + return -1; + } return 0; } WORD32 ithread_join(void* thread_handle, void** val_ptr) { //UNUSED(val_ptr); - //pthread_t* pthread_handle = (pthread_t*)thread_handle; - //return pthread_join(*pthread_handle, NULL); - - __debugbreak(); - return 0; + HANDLE *handle = (HANDLE*)thread_handle; + DWORD result = WaitForSingleObject(*handle, INFINITE); + if(result == WAIT_OBJECT_0) + { + CloseHandle(*handle); + return 0; + } + else + { + return -1; + } } WORD32 ithread_get_mutex_struct_size(void) { return sizeof(CRITICAL_SECTION); } + WORD32 ithread_mutex_init(void* mutex) { InitializeCriticalSection((LPCRITICAL_SECTION)mutex); @@ -153,7 +184,6 @@ UWORD32 ithread_get_sem_struct_size(void) //return(sizeof(sem_t)); } - WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value) { __debugbreak(); @@ -168,7 +198,6 @@ WORD32 ithread_sem_post(void* sem) //return sem_post((sem_t*)sem); } - WORD32 ithread_sem_wait(void* sem) { __debugbreak(); @@ -176,7 +205,6 @@ WORD32 ithread_sem_wait(void* sem) //return sem_wait((sem_t*)sem); } - WORD32 ithread_sem_destroy(void* sem) { __debugbreak(); diff --git a/dependencies/ih264d/common/x86/ih264_platform_macros.h b/dependencies/ih264d/common/x86/ih264_platform_macros.h index ebc1b106..22de33d6 100644 --- a/dependencies/ih264d/common/x86/ih264_platform_macros.h +++ b/dependencies/ih264d/common/x86/ih264_platform_macros.h @@ -79,10 +79,8 @@ static inline int __builtin_clz(unsigned x) { unsigned long n; - if (x == 0) - return 32; _BitScanReverse(&n, x); - return 31 - n; + return n ^ 31; } static inline int __builtin_ctz(unsigned x) { diff --git a/src/Cafe/OS/libs/h264_avc/H264Dec.cpp b/src/Cafe/OS/libs/h264_avc/H264Dec.cpp index 88ce272a..d88a29d4 100644 --- a/src/Cafe/OS/libs/h264_avc/H264Dec.cpp +++ b/src/Cafe/OS/libs/h264_avc/H264Dec.cpp @@ -254,6 +254,8 @@ namespace H264 m_codecCtx->pv_fxns = (void*)&ih264d_api_function; m_codecCtx->u4_size = sizeof(iv_obj_t); + SetDecoderCoreCount(1); + m_isBufferedMode = isBufferedMode; UpdateParameters(false); @@ -278,6 +280,19 @@ namespace H264 m_codecCtx = nullptr; } + void SetDecoderCoreCount(uint32 coreCount) + { + ih264d_ctl_set_num_cores_ip_t s_set_cores_ip; + ih264d_ctl_set_num_cores_op_t s_set_cores_op; + s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES; + s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4 + s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); + s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); + IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op); + cemu_assert(status == IV_SUCCESS); + } + static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight) { // create temporary decoder @@ -702,7 +717,6 @@ namespace H264 decodeResult = m_bufferedResults.front(); m_bufferedResults.erase(m_bufferedResults.begin()); } - private: iv_obj_t* m_codecCtx{nullptr}; bool m_hasBufferSizeInfo{ false };