Add SDL_sem implementation using Atomics and WaitOnAddress API.

Keep Semaphore Kernel Object impl for Windows 7 and older - choose at runtime

v2: - Fix mixed int/LONG types
    - Reorder definitions
    - Add missing include

v3: - Use `GetModuleHandle()` to load the API Set
This commit is contained in:
Joel Linn 2020-12-23 13:36:46 -08:00
parent 2b040cebbe
commit d0b8295c0d
2 changed files with 339 additions and 23 deletions

View File

@ -1210,6 +1210,22 @@ extern "C" {
*/
#define SDL_HINT_WINDOWS_FORCE_MUTEX_CRITICAL_SECTIONS "SDL_WINDOWS_FORCE_MUTEX_CRITICAL_SECTIONS"
/**
* \brief Force SDL to use Kernel Semaphores on Windows.
* Kernel Semaphores are inter-process and require a context
* switch on every interaction. On Windows 8 and newer, the
* WaitOnAddress API is available. Using that and atomics to
* implement semaphores increases performance.
* SDL will fall back to Kernel Objects on older OS versions
* or if forced to by this hint.
*
* This variable can be set to the following values:
* "0" - Use Atomics and WaitOnAddress API when available. If not, fall back to Kernel Objects. (default)
* "1" - Force the use of Kernel Objects in all cases.
*
*/
#define SDL_HINT_WINDOWS_FORCE_SEMAPHORE_KERNEL "SDL_WINDOWS_FORCE_SEMAPHORE_KERNEL"
/**
* \brief Tell SDL which Dispmanx layer to use on a Raspberry PI
*

View File

@ -22,27 +22,239 @@
#if SDL_THREAD_WINDOWS
/* Semaphore functions using the Win32 API */
/**
* Semaphore functions using the Win32 API
* There are two implementations available based on:
* - Kernel Semaphores. Available on all OS versions. (kern)
* Heavy-weight inter-process kernel objects.
* - Atomics and WaitOnAddress API. (atom)
* Faster due to significantly less context switches.
* Requires Windows 8 or newer.
* which are chosen at runtime.
*/
#include "../../core/windows/SDL_windows.h"
#include "SDL_hints.h"
#include "SDL_thread.h"
#include "SDL_timer.h"
struct SDL_semaphore
typedef SDL_sem * (*pfnSDL_CreateSemaphore)(Uint32);
typedef void (*pfnSDL_DestroySemaphore)(SDL_sem *);
typedef int (*pfnSDL_SemWaitTimeout)(SDL_sem *, Uint32);
typedef int (*pfnSDL_SemTryWait)(SDL_sem *);
typedef int (*pfnSDL_SemWait)(SDL_sem *);
typedef Uint32 (*pfnSDL_SemValue)(SDL_sem *);
typedef int (*pfnSDL_SemPost)(SDL_sem *);
typedef struct SDL_semaphore_impl_t
{
pfnSDL_CreateSemaphore Create;
pfnSDL_DestroySemaphore Destroy;
pfnSDL_SemWaitTimeout WaitTimeout;
pfnSDL_SemTryWait TryWait;
pfnSDL_SemWait Wait;
pfnSDL_SemValue Value;
pfnSDL_SemPost Post;
} SDL_sem_impl_t;
/* Implementation will be chosen at runtime based on available Kernel features */
static SDL_sem_impl_t SDL_sem_impl_active = {0};
/**
* Atomic + WaitOnAddress implementation
*/
typedef BOOL(WINAPI *pfnWaitOnAddress)(volatile VOID*, PVOID, SIZE_T, DWORD);
typedef VOID(WINAPI *pfnWakeByAddressSingle)(PVOID);
static pfnWaitOnAddress pWaitOnAddress = NULL;
static pfnWakeByAddressSingle pWakeByAddressSingle = NULL;
typedef struct SDL_semaphore_atom
{
HANDLE id;
LONG count;
} SDL_sem_atom;
static SDL_sem *
SDL_CreateSemaphore_atom(Uint32 initial_value)
{
SDL_sem_atom *sem;
sem = (SDL_sem_atom *) SDL_malloc(sizeof(*sem));
if (sem) {
sem->count = initial_value;
} else {
SDL_OutOfMemory();
}
return (SDL_sem *)sem;
}
static void
SDL_DestroySemaphore_atom(SDL_sem * sem)
{
if (sem) {
SDL_free(sem);
}
}
static int
SDL_SemTryWait_atom(SDL_sem * _sem)
{
SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
LONG count;
if (!sem) {
return SDL_SetError("Passed a NULL sem");
}
count = sem->count;
if (count == 0) {
return SDL_MUTEX_TIMEDOUT;
}
if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) {
return 0;
}
return SDL_MUTEX_TIMEDOUT;
}
static int
SDL_SemWait_atom(SDL_sem * _sem)
{
SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
LONG count;
if (!sem) {
return SDL_SetError("Passed a NULL sem");
}
for (;;) {
count = sem->count;
while (count == 0) {
if (pWaitOnAddress(&sem->count, &count, sizeof(sem->count), INFINITE) == FALSE) {
return SDL_SetError("WaitOnAddress() failed");
}
count = sem->count;
}
if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) {
return 0;
}
}
}
static int
SDL_SemWaitTimeout_atom(SDL_sem * _sem, Uint32 timeout)
{
SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
LONG count;
Uint32 now;
Uint32 deadline;
DWORD timeout_eff;
if (timeout == SDL_MUTEX_MAXWAIT) {
return SDL_SemWait_atom(_sem);
}
if (!sem) {
return SDL_SetError("Passed a NULL sem");
}
/**
* WaitOnAddress is subject to spurious and stolen wakeups so we
* need to recalculate the effective timeout before every wait
*/
now = SDL_GetTicks();
deadline = now + (DWORD) timeout;
for (;;) {
count = sem->count;
/* If no semaphore is available we need to wait */
while (count == 0) {
now = SDL_GetTicks();
if (deadline > now) {
timeout_eff = deadline - now;
} else {
return SDL_MUTEX_TIMEDOUT;
}
if (pWaitOnAddress(&sem->count, &count, sizeof(count), timeout_eff) == FALSE) {
if (GetLastError() == ERROR_TIMEOUT) {
return SDL_MUTEX_TIMEDOUT;
}
return SDL_SetError("WaitOnAddress() failed");
}
count = sem->count;
}
/* Actually the semaphore is only consumed if this succeeds */
/* If it doesn't we need to do everything again */
if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) {
return 0;
}
}
}
static Uint32
SDL_SemValue_atom(SDL_sem * _sem)
{
SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
if (!sem) {
SDL_SetError("Passed a NULL sem");
return 0;
}
return (Uint32)sem->count;
}
static int
SDL_SemPost_atom(SDL_sem * _sem)
{
SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
if (!sem) {
return SDL_SetError("Passed a NULL sem");
}
InterlockedIncrement(&sem->count);
pWakeByAddressSingle(&sem->count);
return 0;
}
static const SDL_sem_impl_t SDL_sem_impl_atom =
{
&SDL_CreateSemaphore_atom,
&SDL_DestroySemaphore_atom,
&SDL_SemWaitTimeout_atom,
&SDL_SemTryWait_atom,
&SDL_SemWait_atom,
&SDL_SemValue_atom,
&SDL_SemPost_atom,
};
/* Create a semaphore */
SDL_sem *
SDL_CreateSemaphore(Uint32 initial_value)
/**
* Fallback Semaphore implementation using Kernel Semaphores
*/
typedef struct SDL_semaphore_kern
{
SDL_sem *sem;
HANDLE id;
LONG count;
} SDL_sem_kern;
/* Create a semaphore */
static SDL_sem *
SDL_CreateSemaphore_kern(Uint32 initial_value)
{
SDL_sem_kern *sem;
/* Allocate sem memory */
sem = (SDL_sem *) SDL_malloc(sizeof(*sem));
sem = (SDL_sem_kern *) SDL_malloc(sizeof(*sem));
if (sem) {
/* Create the semaphore, with max value 32K */
#if __WINRT__
@ -59,13 +271,14 @@ SDL_CreateSemaphore(Uint32 initial_value)
} else {
SDL_OutOfMemory();
}
return (sem);
return (SDL_sem *)sem;
}
/* Free the semaphore */
void
SDL_DestroySemaphore(SDL_sem * sem)
static void
SDL_DestroySemaphore_kern(SDL_sem * _sem)
{
SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
if (sem) {
if (sem->id) {
CloseHandle(sem->id);
@ -75,9 +288,10 @@ SDL_DestroySemaphore(SDL_sem * sem)
}
}
int
SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout)
static int
SDL_SemWaitTimeout_kern(SDL_sem * _sem, Uint32 timeout)
{
SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
int retval;
DWORD dwMilliseconds;
@ -105,22 +319,23 @@ SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout)
return retval;
}
int
SDL_SemTryWait(SDL_sem * sem)
static int
SDL_SemTryWait_kern(SDL_sem * sem)
{
return SDL_SemWaitTimeout(sem, 0);
return SDL_SemWaitTimeout_kern(sem, 0);
}
int
SDL_SemWait(SDL_sem * sem)
static int
SDL_SemWait_kern(SDL_sem * sem)
{
return SDL_SemWaitTimeout(sem, SDL_MUTEX_MAXWAIT);
return SDL_SemWaitTimeout_kern(sem, SDL_MUTEX_MAXWAIT);
}
/* Returns the current count of the semaphore */
Uint32
SDL_SemValue(SDL_sem * sem)
static Uint32
SDL_SemValue_kern(SDL_sem * _sem)
{
SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
if (!sem) {
SDL_SetError("Passed a NULL sem");
return 0;
@ -128,9 +343,10 @@ SDL_SemValue(SDL_sem * sem)
return (Uint32)sem->count;
}
int
SDL_SemPost(SDL_sem * sem)
static int
SDL_SemPost_kern(SDL_sem * _sem)
{
SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
if (!sem) {
return SDL_SetError("Passed a NULL sem");
}
@ -147,6 +363,90 @@ SDL_SemPost(SDL_sem * sem)
return 0;
}
static const SDL_sem_impl_t SDL_sem_impl_kern =
{
&SDL_CreateSemaphore_kern,
&SDL_DestroySemaphore_kern,
&SDL_SemWaitTimeout_kern,
&SDL_SemTryWait_kern,
&SDL_SemWait_kern,
&SDL_SemValue_kern,
&SDL_SemPost_kern,
};
/**
* Runtime selection and redirection
*/
SDL_sem *
SDL_CreateSemaphore(Uint32 initial_value)
{
if (SDL_sem_impl_active.Create == NULL) {
/* Default to fallback implementation */
const SDL_sem_impl_t * impl = &SDL_sem_impl_kern;
if (!SDL_GetHintBoolean(SDL_HINT_WINDOWS_FORCE_SEMAPHORE_KERNEL, SDL_FALSE)) {
/* We already statically link to features from this Api
* Set (e.g. WaitForSingleObject). Dynamically loading
* API Sets is not explicitly documented but according to
* Microsoft our specific use case is legal and correct:
* https://github.com/microsoft/STL/pull/593#issuecomment-655799859
*/
HMODULE synch120 = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
if (synch120) {
/* Try to load required functions provided by Win 8 or newer */
pWaitOnAddress = (pfnWaitOnAddress) GetProcAddress(synch120, "WaitOnAddress");
pWakeByAddressSingle = (pfnWakeByAddressSingle) GetProcAddress(synch120, "WakeByAddressSingle");
if(pWaitOnAddress && pWakeByAddressSingle) {
impl = &SDL_sem_impl_atom;
}
}
}
/* Copy instead of using pointer to save one level of indirection */
SDL_memcpy(&SDL_sem_impl_active, impl, sizeof(SDL_sem_impl_active));
}
return SDL_sem_impl_active.Create(initial_value);
}
void
SDL_DestroySemaphore(SDL_sem * sem)
{
SDL_sem_impl_active.Destroy(sem);
}
int
SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout)
{
return SDL_sem_impl_active.WaitTimeout(sem, timeout);
}
int
SDL_SemTryWait(SDL_sem * sem)
{
return SDL_sem_impl_active.TryWait(sem);
}
int
SDL_SemWait(SDL_sem * sem)
{
return SDL_sem_impl_active.Wait(sem);
}
Uint32
SDL_SemValue(SDL_sem * sem)
{
return SDL_sem_impl_active.Value(sem);
}
int
SDL_SemPost(SDL_sem * sem)
{
return SDL_sem_impl_active.Post(sem);
}
#endif /* SDL_THREAD_WINDOWS */
/* vi: set ts=4 sw=4 expandtab: */