mirror of
https://github.com/ekeeke/Genesis-Plus-GX.git
synced 2025-01-16 05:09:09 +01:00
408 lines
12 KiB
C
408 lines
12 KiB
C
|
/*
|
||
|
libco.ppc (2010-10-17)
|
||
|
author: blargg
|
||
|
license: public domain
|
||
|
*/
|
||
|
|
||
|
/* PowerPC 32/64 using embedded or external asm, with optional
|
||
|
floating-point and AltiVec save/restore */
|
||
|
|
||
|
#define LIBCO_C
|
||
|
#include <libco.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <stdint.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM)
|
||
|
|
||
|
#if LIBCO_MPROTECT
|
||
|
#include <unistd.h>
|
||
|
#include <sys/mman.h>
|
||
|
#endif
|
||
|
|
||
|
/* State format (offsets in 32-bit words)
|
||
|
|
||
|
+0 Pointer to swap code
|
||
|
Rest of function descriptor for entry function
|
||
|
+8 PC
|
||
|
+10 SP
|
||
|
Special regs
|
||
|
GPRs
|
||
|
FPRs
|
||
|
VRs
|
||
|
stack
|
||
|
*/
|
||
|
|
||
|
enum { state_size = 1024 };
|
||
|
enum { above_stack = 2048 };
|
||
|
enum { stack_align = 256 };
|
||
|
|
||
|
static thread_local cothread_t co_active_handle = 0;
|
||
|
|
||
|
/**** Determine environment ****/
|
||
|
|
||
|
#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__)
|
||
|
|
||
|
/* Whether function calls are indirect through a descriptor,
|
||
|
or are directly to function */
|
||
|
#ifndef LIBCO_PPCDESC
|
||
|
#if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64)
|
||
|
#define LIBCO_PPCDESC 1
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifdef LIBCO_PPC_ASM
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C"
|
||
|
#endif
|
||
|
|
||
|
/* Swap code is in ppc.S */
|
||
|
void co_swap_asm( cothread_t, cothread_t );
|
||
|
#define CO_SWAP_ASM( x, y ) co_swap_asm( x, y )
|
||
|
|
||
|
#else
|
||
|
|
||
|
/* Swap code is here in array. Please leave dieassembly comments,
|
||
|
as they make it easy to see what it does, and reorder instructions
|
||
|
if one wants to see whether that improves performance. */
|
||
|
static const uint32_t libco_ppc_code [] = {
|
||
|
#if LIBCO_PPC64
|
||
|
0x7d000026, /* mfcr r8 */
|
||
|
0xf8240028, /* std r1,40(r4) */
|
||
|
0x7d2802a6, /* mflr r9 */
|
||
|
0xf9c40048, /* std r14,72(r4) */
|
||
|
0xf9e40050, /* std r15,80(r4) */
|
||
|
0xfa040058, /* std r16,88(r4) */
|
||
|
0xfa240060, /* std r17,96(r4) */
|
||
|
0xfa440068, /* std r18,104(r4) */
|
||
|
0xfa640070, /* std r19,112(r4) */
|
||
|
0xfa840078, /* std r20,120(r4) */
|
||
|
0xfaa40080, /* std r21,128(r4) */
|
||
|
0xfac40088, /* std r22,136(r4) */
|
||
|
0xfae40090, /* std r23,144(r4) */
|
||
|
0xfb040098, /* std r24,152(r4) */
|
||
|
0xfb2400a0, /* std r25,160(r4) */
|
||
|
0xfb4400a8, /* std r26,168(r4) */
|
||
|
0xfb6400b0, /* std r27,176(r4) */
|
||
|
0xfb8400b8, /* std r28,184(r4) */
|
||
|
0xfba400c0, /* std r29,192(r4) */
|
||
|
0xfbc400c8, /* std r30,200(r4) */
|
||
|
0xfbe400d0, /* std r31,208(r4) */
|
||
|
0xf9240020, /* std r9,32(r4) */
|
||
|
0xe8e30020, /* ld r7,32(r3) */
|
||
|
0xe8230028, /* ld r1,40(r3) */
|
||
|
0x48000009, /* bl 1 */
|
||
|
0x7fe00008, /* trap */
|
||
|
0x91040030,/*1:stw r8,48(r4) */
|
||
|
0x80c30030, /* lwz r6,48(r3) */
|
||
|
0x7ce903a6, /* mtctr r7 */
|
||
|
0xe9c30048, /* ld r14,72(r3) */
|
||
|
0xe9e30050, /* ld r15,80(r3) */
|
||
|
0xea030058, /* ld r16,88(r3) */
|
||
|
0xea230060, /* ld r17,96(r3) */
|
||
|
0xea430068, /* ld r18,104(r3) */
|
||
|
0xea630070, /* ld r19,112(r3) */
|
||
|
0xea830078, /* ld r20,120(r3) */
|
||
|
0xeaa30080, /* ld r21,128(r3) */
|
||
|
0xeac30088, /* ld r22,136(r3) */
|
||
|
0xeae30090, /* ld r23,144(r3) */
|
||
|
0xeb030098, /* ld r24,152(r3) */
|
||
|
0xeb2300a0, /* ld r25,160(r3) */
|
||
|
0xeb4300a8, /* ld r26,168(r3) */
|
||
|
0xeb6300b0, /* ld r27,176(r3) */
|
||
|
0xeb8300b8, /* ld r28,184(r3) */
|
||
|
0xeba300c0, /* ld r29,192(r3) */
|
||
|
0xebc300c8, /* ld r30,200(r3) */
|
||
|
0xebe300d0, /* ld r31,208(r3) */
|
||
|
0x7ccff120, /* mtcr r6 */
|
||
|
#else
|
||
|
0x7d000026, /* mfcr r8 */
|
||
|
0x90240028, /* stw r1,40(r4) */
|
||
|
0x7d2802a6, /* mflr r9 */
|
||
|
0x91a4003c, /* stw r13,60(r4) */
|
||
|
0x91c40040, /* stw r14,64(r4) */
|
||
|
0x91e40044, /* stw r15,68(r4) */
|
||
|
0x92040048, /* stw r16,72(r4) */
|
||
|
0x9224004c, /* stw r17,76(r4) */
|
||
|
0x92440050, /* stw r18,80(r4) */
|
||
|
0x92640054, /* stw r19,84(r4) */
|
||
|
0x92840058, /* stw r20,88(r4) */
|
||
|
0x92a4005c, /* stw r21,92(r4) */
|
||
|
0x92c40060, /* stw r22,96(r4) */
|
||
|
0x92e40064, /* stw r23,100(r4) */
|
||
|
0x93040068, /* stw r24,104(r4) */
|
||
|
0x9324006c, /* stw r25,108(r4) */
|
||
|
0x93440070, /* stw r26,112(r4) */
|
||
|
0x93640074, /* stw r27,116(r4) */
|
||
|
0x93840078, /* stw r28,120(r4) */
|
||
|
0x93a4007c, /* stw r29,124(r4) */
|
||
|
0x93c40080, /* stw r30,128(r4) */
|
||
|
0x93e40084, /* stw r31,132(r4) */
|
||
|
0x91240020, /* stw r9,32(r4) */
|
||
|
0x80e30020, /* lwz r7,32(r3) */
|
||
|
0x80230028, /* lwz r1,40(r3) */
|
||
|
0x48000009, /* bl 1 */
|
||
|
0x7fe00008, /* trap */
|
||
|
0x91040030,/*1:stw r8,48(r4) */
|
||
|
0x80c30030, /* lwz r6,48(r3) */
|
||
|
0x7ce903a6, /* mtctr r7 */
|
||
|
0x81a3003c, /* lwz r13,60(r3) */
|
||
|
0x81c30040, /* lwz r14,64(r3) */
|
||
|
0x81e30044, /* lwz r15,68(r3) */
|
||
|
0x82030048, /* lwz r16,72(r3) */
|
||
|
0x8223004c, /* lwz r17,76(r3) */
|
||
|
0x82430050, /* lwz r18,80(r3) */
|
||
|
0x82630054, /* lwz r19,84(r3) */
|
||
|
0x82830058, /* lwz r20,88(r3) */
|
||
|
0x82a3005c, /* lwz r21,92(r3) */
|
||
|
0x82c30060, /* lwz r22,96(r3) */
|
||
|
0x82e30064, /* lwz r23,100(r3) */
|
||
|
0x83030068, /* lwz r24,104(r3) */
|
||
|
0x8323006c, /* lwz r25,108(r3) */
|
||
|
0x83430070, /* lwz r26,112(r3) */
|
||
|
0x83630074, /* lwz r27,116(r3) */
|
||
|
0x83830078, /* lwz r28,120(r3) */
|
||
|
0x83a3007c, /* lwz r29,124(r3) */
|
||
|
0x83c30080, /* lwz r30,128(r3) */
|
||
|
0x83e30084, /* lwz r31,132(r3) */
|
||
|
0x7ccff120, /* mtcr r6 */
|
||
|
#endif
|
||
|
|
||
|
#ifndef LIBCO_PPC_NOFP
|
||
|
0xd9c400e0, /* stfd f14,224(r4) */
|
||
|
0xd9e400e8, /* stfd f15,232(r4) */
|
||
|
0xda0400f0, /* stfd f16,240(r4) */
|
||
|
0xda2400f8, /* stfd f17,248(r4) */
|
||
|
0xda440100, /* stfd f18,256(r4) */
|
||
|
0xda640108, /* stfd f19,264(r4) */
|
||
|
0xda840110, /* stfd f20,272(r4) */
|
||
|
0xdaa40118, /* stfd f21,280(r4) */
|
||
|
0xdac40120, /* stfd f22,288(r4) */
|
||
|
0xdae40128, /* stfd f23,296(r4) */
|
||
|
0xdb040130, /* stfd f24,304(r4) */
|
||
|
0xdb240138, /* stfd f25,312(r4) */
|
||
|
0xdb440140, /* stfd f26,320(r4) */
|
||
|
0xdb640148, /* stfd f27,328(r4) */
|
||
|
0xdb840150, /* stfd f28,336(r4) */
|
||
|
0xdba40158, /* stfd f29,344(r4) */
|
||
|
0xdbc40160, /* stfd f30,352(r4) */
|
||
|
0xdbe40168, /* stfd f31,360(r4) */
|
||
|
0xc9c300e0, /* lfd f14,224(r3) */
|
||
|
0xc9e300e8, /* lfd f15,232(r3) */
|
||
|
0xca0300f0, /* lfd f16,240(r3) */
|
||
|
0xca2300f8, /* lfd f17,248(r3) */
|
||
|
0xca430100, /* lfd f18,256(r3) */
|
||
|
0xca630108, /* lfd f19,264(r3) */
|
||
|
0xca830110, /* lfd f20,272(r3) */
|
||
|
0xcaa30118, /* lfd f21,280(r3) */
|
||
|
0xcac30120, /* lfd f22,288(r3) */
|
||
|
0xcae30128, /* lfd f23,296(r3) */
|
||
|
0xcb030130, /* lfd f24,304(r3) */
|
||
|
0xcb230138, /* lfd f25,312(r3) */
|
||
|
0xcb430140, /* lfd f26,320(r3) */
|
||
|
0xcb630148, /* lfd f27,328(r3) */
|
||
|
0xcb830150, /* lfd f28,336(r3) */
|
||
|
0xcba30158, /* lfd f29,344(r3) */
|
||
|
0xcbc30160, /* lfd f30,352(r3) */
|
||
|
0xcbe30168, /* lfd f31,360(r3) */
|
||
|
#endif
|
||
|
|
||
|
#ifdef __ALTIVEC__
|
||
|
0x7ca042a6, /* mfvrsave r5 */
|
||
|
0x39040180, /* addi r8,r4,384 */
|
||
|
0x39240190, /* addi r9,r4,400 */
|
||
|
0x70a00fff, /* andi. r0,r5,4095 */
|
||
|
0x90a40034, /* stw r5,52(r4) */
|
||
|
0x4182005c, /* beq- 2 */
|
||
|
0x7e8041ce, /* stvx v20,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7ea049ce, /* stvx v21,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7ec041ce, /* stvx v22,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7ee049ce, /* stvx v23,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7f0041ce, /* stvx v24,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7f2049ce, /* stvx v25,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7f4041ce, /* stvx v26,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7f6049ce, /* stvx v27,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7f8041ce, /* stvx v28,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7fa049ce, /* stvx v29,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7fc041ce, /* stvx v30,r0,r8 */
|
||
|
0x7fe049ce, /* stvx v31,r0,r9 */
|
||
|
0x80a30034,/*2:lwz r5,52(r3) */
|
||
|
0x39030180, /* addi r8,r3,384 */
|
||
|
0x39230190, /* addi r9,r3,400 */
|
||
|
0x70a00fff, /* andi. r0,r5,4095 */
|
||
|
0x7ca043a6, /* mtvrsave r5 */
|
||
|
0x4d820420, /* beqctr */
|
||
|
0x7e8040ce, /* lvx v20,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7ea048ce, /* lvx v21,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7ec040ce, /* lvx v22,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7ee048ce, /* lvx v23,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7f0040ce, /* lvx v24,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7f2048ce, /* lvx v25,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7f4040ce, /* lvx v26,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7f6048ce, /* lvx v27,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7f8040ce, /* lvx v28,r0,r8 */
|
||
|
0x39080020, /* addi r8,r8,32 */
|
||
|
0x7fa048ce, /* lvx v29,r0,r9 */
|
||
|
0x39290020, /* addi r9,r9,32 */
|
||
|
0x7fc040ce, /* lvx v30,r0,r8 */
|
||
|
0x7fe048ce, /* lvx v31,r0,r9 */
|
||
|
#endif
|
||
|
|
||
|
0x4e800420, /* bctr */
|
||
|
};
|
||
|
|
||
|
#if LIBCO_PPCDESC
|
||
|
/* Function call goes through indirect descriptor */
|
||
|
#define CO_SWAP_ASM( x, y ) \
|
||
|
((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y )
|
||
|
#else
|
||
|
/* Function call goes directly to code */
|
||
|
#define CO_SWAP_ASM( x, y ) \
|
||
|
((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y )
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
|
||
|
static uint32_t* co_create_( unsigned size, uintptr_t entry )
|
||
|
{
|
||
|
uint32_t* t = (uint32_t*) malloc( size );
|
||
|
|
||
|
(void) entry;
|
||
|
|
||
|
#if LIBCO_PPCDESC
|
||
|
if ( t )
|
||
|
{
|
||
|
/* Copy entry's descriptor */
|
||
|
memcpy( t, (void*) entry, sizeof (void*) * 3 );
|
||
|
|
||
|
/* Set function pointer to swap routine */
|
||
|
#ifdef LIBCO_PPC_ASM
|
||
|
*(const void**) t = *(void**) &co_swap_asm;
|
||
|
#else
|
||
|
*(const void**) t = libco_ppc_code;
|
||
|
#endif
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
return t;
|
||
|
}
|
||
|
|
||
|
cothread_t co_create( unsigned int size, void (*entry_)( void ) )
|
||
|
{
|
||
|
uintptr_t entry = (uintptr_t) entry_;
|
||
|
uint32_t* t = NULL;
|
||
|
|
||
|
/* Be sure main thread was successfully allocated */
|
||
|
if ( co_active() )
|
||
|
{
|
||
|
size += state_size + above_stack + stack_align;
|
||
|
t = co_create_( size, entry );
|
||
|
}
|
||
|
|
||
|
if ( t )
|
||
|
{
|
||
|
uintptr_t sp;
|
||
|
int shift;
|
||
|
|
||
|
/* Save current registers into new thread, so that any special ones will
|
||
|
have proper values when thread is begun */
|
||
|
CO_SWAP_ASM( t, t );
|
||
|
|
||
|
#if LIBCO_PPCDESC
|
||
|
/* Get real address */
|
||
|
entry = (uintptr_t) *(void**) entry;
|
||
|
#endif
|
||
|
|
||
|
/* Put stack near end of block, and align */
|
||
|
sp = (uintptr_t) t + size - above_stack;
|
||
|
sp -= sp % stack_align;
|
||
|
|
||
|
/* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we
|
||
|
save and restore them as 64 bits, regardless of the size the ABI
|
||
|
uses. So, we manually write pointers at the proper size. We always
|
||
|
save and restore at the same address, and since PPC is big-endian,
|
||
|
we must put the low byte first on PPC32. */
|
||
|
|
||
|
/* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts
|
||
|
and don't have to care how many bits uintptr_t is. */
|
||
|
#if LIBCO_PPC64
|
||
|
shift = 16;
|
||
|
#else
|
||
|
shift = 0;
|
||
|
#endif
|
||
|
|
||
|
/* Set up so entry will be called on next swap */
|
||
|
t [8] = (uint32_t) (entry >> shift >> shift);
|
||
|
t [9] = (uint32_t) entry;
|
||
|
|
||
|
t [10] = (uint32_t) (sp >> shift >> shift);
|
||
|
t [11] = (uint32_t) sp;
|
||
|
}
|
||
|
|
||
|
return t;
|
||
|
}
|
||
|
|
||
|
void co_delete( cothread_t t )
|
||
|
{
|
||
|
free(t);
|
||
|
}
|
||
|
|
||
|
static void co_init_( void )
|
||
|
{
|
||
|
#if LIBCO_MPROTECT
|
||
|
/* TODO: pre- and post-pad PPC code so that this doesn't make other
|
||
|
data executable and writable */
|
||
|
long page_size = sysconf( _SC_PAGESIZE );
|
||
|
if ( page_size > 0 )
|
||
|
{
|
||
|
uintptr_t align = page_size;
|
||
|
uintptr_t begin = (uintptr_t) libco_ppc_code;
|
||
|
uintptr_t end = begin + sizeof libco_ppc_code;
|
||
|
|
||
|
/* Align beginning and end */
|
||
|
end += align - 1;
|
||
|
end -= end % align;
|
||
|
begin -= begin % align;
|
||
|
|
||
|
mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC );
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
co_active_handle = co_create_( state_size, (uintptr_t) &co_switch );
|
||
|
}
|
||
|
|
||
|
cothread_t co_active(void)
|
||
|
{
|
||
|
if (!co_active_handle)
|
||
|
co_init_();
|
||
|
|
||
|
return co_active_handle;
|
||
|
}
|
||
|
|
||
|
void co_switch(cothread_t t)
|
||
|
{
|
||
|
cothread_t old = co_active_handle;
|
||
|
co_active_handle = t;
|
||
|
|
||
|
CO_SWAP_ASM( t, old );
|
||
|
}
|