diff --git a/include/SDL_hints.h b/include/SDL_hints.h index 0302b8d8f..bac14a736 100644 --- a/include/SDL_hints.h +++ b/include/SDL_hints.h @@ -1210,6 +1210,22 @@ extern "C" { */ #define SDL_HINT_WINDOWS_FORCE_MUTEX_CRITICAL_SECTIONS "SDL_WINDOWS_FORCE_MUTEX_CRITICAL_SECTIONS" +/** + * \brief Force SDL to use Kernel Semaphores on Windows. + * Kernel Semaphores are inter-process and require a context + * switch on every interaction. On Windows 8 and newer, the + * WaitOnAddress API is available. Using that and atomics to + * implement semaphores increases performance. + * SDL will fall back to Kernel Objects on older OS versions + * or if forced to by this hint. + * + * This variable can be set to the following values: + * "0" - Use Atomics and WaitOnAddress API when available. If not, fall back to Kernel Objects. (default) + * "1" - Force the use of Kernel Objects in all cases. + * + */ +#define SDL_HINT_WINDOWS_FORCE_SEMAPHORE_KERNEL "SDL_WINDOWS_FORCE_SEMAPHORE_KERNEL" + /** * \brief Tell SDL which Dispmanx layer to use on a Raspberry PI * diff --git a/src/thread/windows/SDL_syssem.c b/src/thread/windows/SDL_syssem.c index 217ea32c7..ed0e78b2c 100644 --- a/src/thread/windows/SDL_syssem.c +++ b/src/thread/windows/SDL_syssem.c @@ -22,27 +22,239 @@ #if SDL_THREAD_WINDOWS -/* Semaphore functions using the Win32 API */ +/** + * Semaphore functions using the Win32 API + * There are two implementations available based on: + * - Kernel Semaphores. Available on all OS versions. (kern) + * Heavy-weight inter-process kernel objects. + * - Atomics and WaitOnAddress API. (atom) + * Faster due to significantly less context switches. + * Requires Windows 8 or newer. + * which are chosen at runtime. +*/ #include "../../core/windows/SDL_windows.h" +#include "SDL_hints.h" #include "SDL_thread.h" +#include "SDL_timer.h" -struct SDL_semaphore +typedef SDL_sem * (*pfnSDL_CreateSemaphore)(Uint32); +typedef void (*pfnSDL_DestroySemaphore)(SDL_sem *); +typedef int (*pfnSDL_SemWaitTimeout)(SDL_sem *, Uint32); +typedef int (*pfnSDL_SemTryWait)(SDL_sem *); +typedef int (*pfnSDL_SemWait)(SDL_sem *); +typedef Uint32 (*pfnSDL_SemValue)(SDL_sem *); +typedef int (*pfnSDL_SemPost)(SDL_sem *); + +typedef struct SDL_semaphore_impl_t +{ + pfnSDL_CreateSemaphore Create; + pfnSDL_DestroySemaphore Destroy; + pfnSDL_SemWaitTimeout WaitTimeout; + pfnSDL_SemTryWait TryWait; + pfnSDL_SemWait Wait; + pfnSDL_SemValue Value; + pfnSDL_SemPost Post; +} SDL_sem_impl_t; + +/* Implementation will be chosen at runtime based on available Kernel features */ +static SDL_sem_impl_t SDL_sem_impl_active = {0}; + + +/** + * Atomic + WaitOnAddress implementation + */ + +typedef BOOL(WINAPI *pfnWaitOnAddress)(volatile VOID*, PVOID, SIZE_T, DWORD); +typedef VOID(WINAPI *pfnWakeByAddressSingle)(PVOID); + +static pfnWaitOnAddress pWaitOnAddress = NULL; +static pfnWakeByAddressSingle pWakeByAddressSingle = NULL; + +typedef struct SDL_semaphore_atom { - HANDLE id; LONG count; +} SDL_sem_atom; + +static SDL_sem * +SDL_CreateSemaphore_atom(Uint32 initial_value) +{ + SDL_sem_atom *sem; + + sem = (SDL_sem_atom *) SDL_malloc(sizeof(*sem)); + if (sem) { + sem->count = initial_value; + } else { + SDL_OutOfMemory(); + } + return (SDL_sem *)sem; +} + +static void +SDL_DestroySemaphore_atom(SDL_sem * sem) +{ + if (sem) { + SDL_free(sem); + } +} + +static int +SDL_SemTryWait_atom(SDL_sem * _sem) +{ + SDL_sem_atom *sem = (SDL_sem_atom *)_sem; + LONG count; + + if (!sem) { + return SDL_SetError("Passed a NULL sem"); + } + + count = sem->count; + if (count == 0) { + return SDL_MUTEX_TIMEDOUT; + } + + if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) { + return 0; + } + + return SDL_MUTEX_TIMEDOUT; +} + +static int +SDL_SemWait_atom(SDL_sem * _sem) +{ + SDL_sem_atom *sem = (SDL_sem_atom *)_sem; + LONG count; + + if (!sem) { + return SDL_SetError("Passed a NULL sem"); + } + + for (;;) { + count = sem->count; + while (count == 0) { + if (pWaitOnAddress(&sem->count, &count, sizeof(sem->count), INFINITE) == FALSE) { + return SDL_SetError("WaitOnAddress() failed"); + } + count = sem->count; + } + + if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) { + return 0; + } + } +} + +static int +SDL_SemWaitTimeout_atom(SDL_sem * _sem, Uint32 timeout) +{ + SDL_sem_atom *sem = (SDL_sem_atom *)_sem; + LONG count; + Uint32 now; + Uint32 deadline; + DWORD timeout_eff; + + if (timeout == SDL_MUTEX_MAXWAIT) { + return SDL_SemWait_atom(_sem); + } + + if (!sem) { + return SDL_SetError("Passed a NULL sem"); + } + + /** + * WaitOnAddress is subject to spurious and stolen wakeups so we + * need to recalculate the effective timeout before every wait + */ + now = SDL_GetTicks(); + deadline = now + (DWORD) timeout; + + for (;;) { + count = sem->count; + /* If no semaphore is available we need to wait */ + while (count == 0) { + now = SDL_GetTicks(); + if (deadline > now) { + timeout_eff = deadline - now; + } else { + return SDL_MUTEX_TIMEDOUT; + } + if (pWaitOnAddress(&sem->count, &count, sizeof(count), timeout_eff) == FALSE) { + if (GetLastError() == ERROR_TIMEOUT) { + return SDL_MUTEX_TIMEDOUT; + } + return SDL_SetError("WaitOnAddress() failed"); + } + count = sem->count; + } + + /* Actually the semaphore is only consumed if this succeeds */ + /* If it doesn't we need to do everything again */ + if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) { + return 0; + } + } +} + +static Uint32 +SDL_SemValue_atom(SDL_sem * _sem) +{ + SDL_sem_atom *sem = (SDL_sem_atom *)_sem; + + if (!sem) { + SDL_SetError("Passed a NULL sem"); + return 0; + } + + return (Uint32)sem->count; +} + +static int +SDL_SemPost_atom(SDL_sem * _sem) +{ + SDL_sem_atom *sem = (SDL_sem_atom *)_sem; + + if (!sem) { + return SDL_SetError("Passed a NULL sem"); + } + + InterlockedIncrement(&sem->count); + pWakeByAddressSingle(&sem->count); + + return 0; +} + +static const SDL_sem_impl_t SDL_sem_impl_atom = +{ + &SDL_CreateSemaphore_atom, + &SDL_DestroySemaphore_atom, + &SDL_SemWaitTimeout_atom, + &SDL_SemTryWait_atom, + &SDL_SemWait_atom, + &SDL_SemValue_atom, + &SDL_SemPost_atom, }; -/* Create a semaphore */ -SDL_sem * -SDL_CreateSemaphore(Uint32 initial_value) +/** + * Fallback Semaphore implementation using Kernel Semaphores + */ + +typedef struct SDL_semaphore_kern { - SDL_sem *sem; + HANDLE id; + LONG count; +} SDL_sem_kern; + +/* Create a semaphore */ +static SDL_sem * +SDL_CreateSemaphore_kern(Uint32 initial_value) +{ + SDL_sem_kern *sem; /* Allocate sem memory */ - sem = (SDL_sem *) SDL_malloc(sizeof(*sem)); + sem = (SDL_sem_kern *) SDL_malloc(sizeof(*sem)); if (sem) { /* Create the semaphore, with max value 32K */ #if __WINRT__ @@ -59,13 +271,14 @@ SDL_CreateSemaphore(Uint32 initial_value) } else { SDL_OutOfMemory(); } - return (sem); + return (SDL_sem *)sem; } /* Free the semaphore */ -void -SDL_DestroySemaphore(SDL_sem * sem) +static void +SDL_DestroySemaphore_kern(SDL_sem * _sem) { + SDL_sem_kern *sem = (SDL_sem_kern *)_sem; if (sem) { if (sem->id) { CloseHandle(sem->id); @@ -75,9 +288,10 @@ SDL_DestroySemaphore(SDL_sem * sem) } } -int -SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout) +static int +SDL_SemWaitTimeout_kern(SDL_sem * _sem, Uint32 timeout) { + SDL_sem_kern *sem = (SDL_sem_kern *)_sem; int retval; DWORD dwMilliseconds; @@ -105,22 +319,23 @@ SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout) return retval; } -int -SDL_SemTryWait(SDL_sem * sem) +static int +SDL_SemTryWait_kern(SDL_sem * sem) { - return SDL_SemWaitTimeout(sem, 0); + return SDL_SemWaitTimeout_kern(sem, 0); } -int -SDL_SemWait(SDL_sem * sem) +static int +SDL_SemWait_kern(SDL_sem * sem) { - return SDL_SemWaitTimeout(sem, SDL_MUTEX_MAXWAIT); + return SDL_SemWaitTimeout_kern(sem, SDL_MUTEX_MAXWAIT); } /* Returns the current count of the semaphore */ -Uint32 -SDL_SemValue(SDL_sem * sem) +static Uint32 +SDL_SemValue_kern(SDL_sem * _sem) { + SDL_sem_kern *sem = (SDL_sem_kern *)_sem; if (!sem) { SDL_SetError("Passed a NULL sem"); return 0; @@ -128,9 +343,10 @@ SDL_SemValue(SDL_sem * sem) return (Uint32)sem->count; } -int -SDL_SemPost(SDL_sem * sem) +static int +SDL_SemPost_kern(SDL_sem * _sem) { + SDL_sem_kern *sem = (SDL_sem_kern *)_sem; if (!sem) { return SDL_SetError("Passed a NULL sem"); } @@ -147,6 +363,90 @@ SDL_SemPost(SDL_sem * sem) return 0; } +static const SDL_sem_impl_t SDL_sem_impl_kern = +{ + &SDL_CreateSemaphore_kern, + &SDL_DestroySemaphore_kern, + &SDL_SemWaitTimeout_kern, + &SDL_SemTryWait_kern, + &SDL_SemWait_kern, + &SDL_SemValue_kern, + &SDL_SemPost_kern, +}; + + +/** + * Runtime selection and redirection + */ + +SDL_sem * +SDL_CreateSemaphore(Uint32 initial_value) +{ + if (SDL_sem_impl_active.Create == NULL) { + /* Default to fallback implementation */ + const SDL_sem_impl_t * impl = &SDL_sem_impl_kern; + + if (!SDL_GetHintBoolean(SDL_HINT_WINDOWS_FORCE_SEMAPHORE_KERNEL, SDL_FALSE)) { + /* We already statically link to features from this Api + * Set (e.g. WaitForSingleObject). Dynamically loading + * API Sets is not explicitly documented but according to + * Microsoft our specific use case is legal and correct: + * https://github.com/microsoft/STL/pull/593#issuecomment-655799859 + */ + HMODULE synch120 = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll"); + if (synch120) { + /* Try to load required functions provided by Win 8 or newer */ + pWaitOnAddress = (pfnWaitOnAddress) GetProcAddress(synch120, "WaitOnAddress"); + pWakeByAddressSingle = (pfnWakeByAddressSingle) GetProcAddress(synch120, "WakeByAddressSingle"); + + if(pWaitOnAddress && pWakeByAddressSingle) { + impl = &SDL_sem_impl_atom; + } + } + } + + /* Copy instead of using pointer to save one level of indirection */ + SDL_memcpy(&SDL_sem_impl_active, impl, sizeof(SDL_sem_impl_active)); + } + return SDL_sem_impl_active.Create(initial_value); +} + +void +SDL_DestroySemaphore(SDL_sem * sem) +{ + SDL_sem_impl_active.Destroy(sem); +} + +int +SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout) +{ + return SDL_sem_impl_active.WaitTimeout(sem, timeout); +} + +int +SDL_SemTryWait(SDL_sem * sem) +{ + return SDL_sem_impl_active.TryWait(sem); +} + +int +SDL_SemWait(SDL_sem * sem) +{ + return SDL_sem_impl_active.Wait(sem); +} + +Uint32 +SDL_SemValue(SDL_sem * sem) +{ + return SDL_sem_impl_active.Value(sem); +} + +int +SDL_SemPost(SDL_sem * sem) +{ + return SDL_sem_impl_active.Post(sem); +} + #endif /* SDL_THREAD_WINDOWS */ /* vi: set ts=4 sw=4 expandtab: */