ih264d: Small optimizations and experiments with multi-threading

Using the multi-threaded decoder doesn't seem to be worth it but at least we have a way to enable it now
This commit is contained in:
Exzap 2023-09-10 08:13:53 +02:00
parent f04c7575d7
commit fda5ec2697
4 changed files with 61 additions and 15 deletions

View File

@ -183,4 +183,10 @@ endif()
if(MSVC) if(MSVC)
set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
# tune settings for slightly better performance
target_compile_options(ih264d PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:/Oi>) # enable intrinsic functions
target_compile_options(ih264d PRIVATE $<$<CONFIG:Release,RelWithDebInfo>:/Ot>) # favor speed
target_compile_options(ih264d PRIVATE "/GS-") # disable runtime checks
endif() endif()

View File

@ -85,28 +85,59 @@ UWORD32 ithread_get_mutex_lock_size(void)
return sizeof(CRITICAL_SECTION); return sizeof(CRITICAL_SECTION);
} }
struct _ithread_launch_param
{
void (*startFunc)(void* argument);
void* argument;
};
DWORD WINAPI _ithread_WinThreadStartRoutine(LPVOID lpThreadParameter)
{
struct _ithread_launch_param* param = (struct _ithread_launch_param*)lpThreadParameter;
typedef void *(*ThreadStartRoutineType)(void *);
ThreadStartRoutineType pfnThreadRoutine = (ThreadStartRoutineType)param->startFunc;
void* arg = param->argument;
free(param);
pfnThreadRoutine(arg);
return 0;
}
WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument) WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument)
{ {
//UNUSED(attribute); UNUSED(attribute);
//return pthread_create((pthread_t*)thread_handle, NULL, (void* (*)(void*)) strt, argument); struct _ithread_launch_param* param = malloc(sizeof(struct _ithread_launch_param));
__debugbreak(); param->startFunc = (void (*)(void*))strt;
param->argument = argument;
HANDLE *handle = (HANDLE*)thread_handle;
*handle = CreateThread(NULL, 0, _ithread_WinThreadStartRoutine, param, 0, NULL);
if(*handle == NULL)
{
return -1;
}
return 0; return 0;
} }
WORD32 ithread_join(void* thread_handle, void** val_ptr) WORD32 ithread_join(void* thread_handle, void** val_ptr)
{ {
//UNUSED(val_ptr); //UNUSED(val_ptr);
//pthread_t* pthread_handle = (pthread_t*)thread_handle; HANDLE *handle = (HANDLE*)thread_handle;
//return pthread_join(*pthread_handle, NULL); DWORD result = WaitForSingleObject(*handle, INFINITE);
if(result == WAIT_OBJECT_0)
__debugbreak(); {
CloseHandle(*handle);
return 0; return 0;
} }
else
{
return -1;
}
}
WORD32 ithread_get_mutex_struct_size(void) WORD32 ithread_get_mutex_struct_size(void)
{ {
return sizeof(CRITICAL_SECTION); return sizeof(CRITICAL_SECTION);
} }
WORD32 ithread_mutex_init(void* mutex) WORD32 ithread_mutex_init(void* mutex)
{ {
InitializeCriticalSection((LPCRITICAL_SECTION)mutex); InitializeCriticalSection((LPCRITICAL_SECTION)mutex);
@ -153,7 +184,6 @@ UWORD32 ithread_get_sem_struct_size(void)
//return(sizeof(sem_t)); //return(sizeof(sem_t));
} }
WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value) WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value)
{ {
__debugbreak(); __debugbreak();
@ -168,7 +198,6 @@ WORD32 ithread_sem_post(void* sem)
//return sem_post((sem_t*)sem); //return sem_post((sem_t*)sem);
} }
WORD32 ithread_sem_wait(void* sem) WORD32 ithread_sem_wait(void* sem)
{ {
__debugbreak(); __debugbreak();
@ -176,7 +205,6 @@ WORD32 ithread_sem_wait(void* sem)
//return sem_wait((sem_t*)sem); //return sem_wait((sem_t*)sem);
} }
WORD32 ithread_sem_destroy(void* sem) WORD32 ithread_sem_destroy(void* sem)
{ {
__debugbreak(); __debugbreak();

View File

@ -79,10 +79,8 @@
static inline int __builtin_clz(unsigned x) static inline int __builtin_clz(unsigned x)
{ {
unsigned long n; unsigned long n;
if (x == 0)
return 32;
_BitScanReverse(&n, x); _BitScanReverse(&n, x);
return 31 - n; return n ^ 31;
} }
static inline int __builtin_ctz(unsigned x) { static inline int __builtin_ctz(unsigned x) {

View File

@ -254,6 +254,8 @@ namespace H264
m_codecCtx->pv_fxns = (void*)&ih264d_api_function; m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
m_codecCtx->u4_size = sizeof(iv_obj_t); m_codecCtx->u4_size = sizeof(iv_obj_t);
SetDecoderCoreCount(1);
m_isBufferedMode = isBufferedMode; m_isBufferedMode = isBufferedMode;
UpdateParameters(false); UpdateParameters(false);
@ -278,6 +280,19 @@ namespace H264
m_codecCtx = nullptr; m_codecCtx = nullptr;
} }
void SetDecoderCoreCount(uint32 coreCount)
{
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
cemu_assert(status == IV_SUCCESS);
}
static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight) static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight)
{ {
// create temporary decoder // create temporary decoder
@ -702,7 +717,6 @@ namespace H264
decodeResult = m_bufferedResults.front(); decodeResult = m_bufferedResults.front();
m_bufferedResults.erase(m_bufferedResults.begin()); m_bufferedResults.erase(m_bufferedResults.begin());
} }
private: private:
iv_obj_t* m_codecCtx{nullptr}; iv_obj_t* m_codecCtx{nullptr};
bool m_hasBufferSizeInfo{ false }; bool m_hasBufferSizeInfo{ false };