407 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			407 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
  libco.ppc (2010-10-17)
 | 
						|
  author: blargg
 | 
						|
  license: public domain
 | 
						|
*/
 | 
						|
 | 
						|
/* PowerPC 32/64 using embedded or external asm, with optional
 | 
						|
floating-point and AltiVec save/restore */
 | 
						|
 | 
						|
#define LIBCO_C
 | 
						|
#include <libco.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <stdint.h>
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM)
 | 
						|
 | 
						|
#if LIBCO_MPROTECT
 | 
						|
	#include <unistd.h>
 | 
						|
	#include <sys/mman.h>
 | 
						|
#endif
 | 
						|
 | 
						|
/* State format (offsets in 32-bit words)
 | 
						|
 | 
						|
+0	Pointer to swap code
 | 
						|
	Rest of function descriptor for entry function
 | 
						|
+8	PC
 | 
						|
+10	SP
 | 
						|
	Special regs
 | 
						|
	GPRs
 | 
						|
	FPRs
 | 
						|
	VRs
 | 
						|
	stack
 | 
						|
*/
 | 
						|
 | 
						|
enum { state_size  = 1024 };
 | 
						|
enum { above_stack = 2048 };
 | 
						|
enum { stack_align = 256  };
 | 
						|
 | 
						|
static thread_local cothread_t co_active_handle = 0;
 | 
						|
 | 
						|
/**** Determine environment ****/
 | 
						|
 | 
						|
#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__)
 | 
						|
 | 
						|
/* Whether function calls are indirect through a descriptor,
 | 
						|
or are directly to function */
 | 
						|
#ifndef LIBCO_PPCDESC
 | 
						|
	#if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64)
 | 
						|
		#define LIBCO_PPCDESC 1
 | 
						|
	#endif
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef LIBCO_PPC_ASM
 | 
						|
 | 
						|
	#ifdef __cplusplus
 | 
						|
		extern "C"
 | 
						|
	#endif
 | 
						|
	
 | 
						|
	/* Swap code is in ppc.S */
 | 
						|
	void co_swap_asm( cothread_t, cothread_t );
 | 
						|
	#define CO_SWAP_ASM( x, y ) co_swap_asm( x, y )
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
/* Swap code is here in array. Please leave dieassembly comments,
 | 
						|
as they make it easy to see what it does, and reorder instructions
 | 
						|
if one wants to see whether that improves performance. */
 | 
						|
static const uint32_t libco_ppc_code [] = {
 | 
						|
#if LIBCO_PPC64
 | 
						|
    0x7d000026, /* mfcr    r8 */
 | 
						|
    0xf8240028, /* std     r1,40(r4) */
 | 
						|
    0x7d2802a6, /* mflr    r9 */
 | 
						|
    0xf9c40048, /* std     r14,72(r4) */
 | 
						|
    0xf9e40050, /* std     r15,80(r4) */
 | 
						|
    0xfa040058, /* std     r16,88(r4) */
 | 
						|
    0xfa240060, /* std     r17,96(r4) */
 | 
						|
    0xfa440068, /* std     r18,104(r4) */
 | 
						|
    0xfa640070, /* std     r19,112(r4) */
 | 
						|
    0xfa840078, /* std     r20,120(r4) */
 | 
						|
    0xfaa40080, /* std     r21,128(r4) */
 | 
						|
    0xfac40088, /* std     r22,136(r4) */
 | 
						|
    0xfae40090, /* std     r23,144(r4) */
 | 
						|
    0xfb040098, /* std     r24,152(r4) */
 | 
						|
    0xfb2400a0, /* std     r25,160(r4) */
 | 
						|
    0xfb4400a8, /* std     r26,168(r4) */
 | 
						|
    0xfb6400b0, /* std     r27,176(r4) */
 | 
						|
    0xfb8400b8, /* std     r28,184(r4) */
 | 
						|
    0xfba400c0, /* std     r29,192(r4) */
 | 
						|
    0xfbc400c8, /* std     r30,200(r4) */
 | 
						|
    0xfbe400d0, /* std     r31,208(r4) */
 | 
						|
    0xf9240020, /* std     r9,32(r4) */
 | 
						|
    0xe8e30020, /* ld      r7,32(r3) */
 | 
						|
    0xe8230028, /* ld      r1,40(r3) */
 | 
						|
    0x48000009, /* bl      1 */
 | 
						|
	0x7fe00008, /* trap */
 | 
						|
    0x91040030,/*1:stw     r8,48(r4) */
 | 
						|
    0x80c30030, /* lwz     r6,48(r3) */
 | 
						|
    0x7ce903a6, /* mtctr   r7 */
 | 
						|
    0xe9c30048, /* ld      r14,72(r3) */
 | 
						|
    0xe9e30050, /* ld      r15,80(r3) */
 | 
						|
    0xea030058, /* ld      r16,88(r3) */
 | 
						|
    0xea230060, /* ld      r17,96(r3) */
 | 
						|
    0xea430068, /* ld      r18,104(r3) */
 | 
						|
    0xea630070, /* ld      r19,112(r3) */
 | 
						|
    0xea830078, /* ld      r20,120(r3) */
 | 
						|
    0xeaa30080, /* ld      r21,128(r3) */
 | 
						|
    0xeac30088, /* ld      r22,136(r3) */
 | 
						|
    0xeae30090, /* ld      r23,144(r3) */
 | 
						|
    0xeb030098, /* ld      r24,152(r3) */
 | 
						|
    0xeb2300a0, /* ld      r25,160(r3) */
 | 
						|
    0xeb4300a8, /* ld      r26,168(r3) */
 | 
						|
    0xeb6300b0, /* ld      r27,176(r3) */
 | 
						|
    0xeb8300b8, /* ld      r28,184(r3) */
 | 
						|
    0xeba300c0, /* ld      r29,192(r3) */
 | 
						|
    0xebc300c8, /* ld      r30,200(r3) */
 | 
						|
    0xebe300d0, /* ld      r31,208(r3) */
 | 
						|
    0x7ccff120, /* mtcr    r6 */
 | 
						|
#else
 | 
						|
	0x7d000026, /* mfcr    r8 */
 | 
						|
	0x90240028, /* stw     r1,40(r4) */
 | 
						|
	0x7d2802a6, /* mflr    r9 */
 | 
						|
	0x91a4003c, /* stw     r13,60(r4) */
 | 
						|
	0x91c40040, /* stw     r14,64(r4) */
 | 
						|
	0x91e40044, /* stw     r15,68(r4) */
 | 
						|
	0x92040048, /* stw     r16,72(r4) */
 | 
						|
	0x9224004c, /* stw     r17,76(r4) */
 | 
						|
	0x92440050, /* stw     r18,80(r4) */
 | 
						|
	0x92640054, /* stw     r19,84(r4) */
 | 
						|
	0x92840058, /* stw     r20,88(r4) */
 | 
						|
	0x92a4005c, /* stw     r21,92(r4) */
 | 
						|
	0x92c40060, /* stw     r22,96(r4) */
 | 
						|
	0x92e40064, /* stw     r23,100(r4) */
 | 
						|
	0x93040068, /* stw     r24,104(r4) */
 | 
						|
	0x9324006c, /* stw     r25,108(r4) */
 | 
						|
	0x93440070, /* stw     r26,112(r4) */
 | 
						|
	0x93640074, /* stw     r27,116(r4) */
 | 
						|
	0x93840078, /* stw     r28,120(r4) */
 | 
						|
	0x93a4007c, /* stw     r29,124(r4) */
 | 
						|
	0x93c40080, /* stw     r30,128(r4) */
 | 
						|
	0x93e40084, /* stw     r31,132(r4) */
 | 
						|
	0x91240020, /* stw     r9,32(r4) */
 | 
						|
	0x80e30020, /* lwz     r7,32(r3) */
 | 
						|
	0x80230028, /* lwz     r1,40(r3) */
 | 
						|
	0x48000009, /* bl      1 */
 | 
						|
	0x7fe00008, /* trap */
 | 
						|
	0x91040030,/*1:stw     r8,48(r4) */
 | 
						|
	0x80c30030, /* lwz     r6,48(r3) */
 | 
						|
	0x7ce903a6, /* mtctr   r7 */
 | 
						|
	0x81a3003c, /* lwz     r13,60(r3) */
 | 
						|
	0x81c30040, /* lwz     r14,64(r3) */
 | 
						|
	0x81e30044, /* lwz     r15,68(r3) */
 | 
						|
	0x82030048, /* lwz     r16,72(r3) */
 | 
						|
	0x8223004c, /* lwz     r17,76(r3) */
 | 
						|
	0x82430050, /* lwz     r18,80(r3) */
 | 
						|
	0x82630054, /* lwz     r19,84(r3) */
 | 
						|
	0x82830058, /* lwz     r20,88(r3) */
 | 
						|
	0x82a3005c, /* lwz     r21,92(r3) */
 | 
						|
	0x82c30060, /* lwz     r22,96(r3) */
 | 
						|
	0x82e30064, /* lwz     r23,100(r3) */
 | 
						|
	0x83030068, /* lwz     r24,104(r3) */
 | 
						|
	0x8323006c, /* lwz     r25,108(r3) */
 | 
						|
	0x83430070, /* lwz     r26,112(r3) */
 | 
						|
	0x83630074, /* lwz     r27,116(r3) */
 | 
						|
	0x83830078, /* lwz     r28,120(r3) */
 | 
						|
	0x83a3007c, /* lwz     r29,124(r3) */
 | 
						|
	0x83c30080, /* lwz     r30,128(r3) */
 | 
						|
	0x83e30084, /* lwz     r31,132(r3) */
 | 
						|
	0x7ccff120, /* mtcr    r6 */
 | 
						|
#endif
 | 
						|
 | 
						|
#ifndef LIBCO_PPC_NOFP
 | 
						|
	0xd9c400e0, /* stfd    f14,224(r4) */
 | 
						|
	0xd9e400e8, /* stfd    f15,232(r4) */
 | 
						|
	0xda0400f0, /* stfd    f16,240(r4) */
 | 
						|
	0xda2400f8, /* stfd    f17,248(r4) */
 | 
						|
	0xda440100, /* stfd    f18,256(r4) */
 | 
						|
	0xda640108, /* stfd    f19,264(r4) */
 | 
						|
	0xda840110, /* stfd    f20,272(r4) */
 | 
						|
	0xdaa40118, /* stfd    f21,280(r4) */
 | 
						|
	0xdac40120, /* stfd    f22,288(r4) */
 | 
						|
	0xdae40128, /* stfd    f23,296(r4) */
 | 
						|
	0xdb040130, /* stfd    f24,304(r4) */
 | 
						|
	0xdb240138, /* stfd    f25,312(r4) */
 | 
						|
	0xdb440140, /* stfd    f26,320(r4) */
 | 
						|
	0xdb640148, /* stfd    f27,328(r4) */
 | 
						|
	0xdb840150, /* stfd    f28,336(r4) */
 | 
						|
	0xdba40158, /* stfd    f29,344(r4) */
 | 
						|
	0xdbc40160, /* stfd    f30,352(r4) */
 | 
						|
	0xdbe40168, /* stfd    f31,360(r4) */
 | 
						|
	0xc9c300e0, /* lfd     f14,224(r3) */
 | 
						|
	0xc9e300e8, /* lfd     f15,232(r3) */
 | 
						|
	0xca0300f0, /* lfd     f16,240(r3) */
 | 
						|
	0xca2300f8, /* lfd     f17,248(r3) */
 | 
						|
	0xca430100, /* lfd     f18,256(r3) */
 | 
						|
	0xca630108, /* lfd     f19,264(r3) */
 | 
						|
	0xca830110, /* lfd     f20,272(r3) */
 | 
						|
	0xcaa30118, /* lfd     f21,280(r3) */
 | 
						|
	0xcac30120, /* lfd     f22,288(r3) */
 | 
						|
	0xcae30128, /* lfd     f23,296(r3) */
 | 
						|
	0xcb030130, /* lfd     f24,304(r3) */
 | 
						|
	0xcb230138, /* lfd     f25,312(r3) */
 | 
						|
	0xcb430140, /* lfd     f26,320(r3) */
 | 
						|
	0xcb630148, /* lfd     f27,328(r3) */
 | 
						|
	0xcb830150, /* lfd     f28,336(r3) */
 | 
						|
	0xcba30158, /* lfd     f29,344(r3) */
 | 
						|
	0xcbc30160, /* lfd     f30,352(r3) */
 | 
						|
	0xcbe30168, /* lfd     f31,360(r3) */
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef __ALTIVEC__
 | 
						|
	0x7ca042a6, /* mfvrsave r5 */
 | 
						|
	0x39040180, /* addi    r8,r4,384 */
 | 
						|
	0x39240190, /* addi    r9,r4,400 */
 | 
						|
	0x70a00fff, /* andi.   r0,r5,4095 */
 | 
						|
	0x90a40034, /* stw     r5,52(r4) */
 | 
						|
	0x4182005c, /* beq-    2 */
 | 
						|
	0x7e8041ce, /* stvx    v20,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7ea049ce, /* stvx    v21,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7ec041ce, /* stvx    v22,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7ee049ce, /* stvx    v23,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7f0041ce, /* stvx    v24,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7f2049ce, /* stvx    v25,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7f4041ce, /* stvx    v26,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7f6049ce, /* stvx    v27,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7f8041ce, /* stvx    v28,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7fa049ce, /* stvx    v29,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7fc041ce, /* stvx    v30,r0,r8 */
 | 
						|
	0x7fe049ce, /* stvx    v31,r0,r9 */
 | 
						|
	0x80a30034,/*2:lwz     r5,52(r3) */
 | 
						|
	0x39030180, /* addi    r8,r3,384 */
 | 
						|
	0x39230190, /* addi    r9,r3,400 */
 | 
						|
	0x70a00fff, /* andi.   r0,r5,4095 */
 | 
						|
	0x7ca043a6, /* mtvrsave r5 */
 | 
						|
	0x4d820420, /* beqctr   */
 | 
						|
	0x7e8040ce, /* lvx     v20,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7ea048ce, /* lvx     v21,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7ec040ce, /* lvx     v22,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7ee048ce, /* lvx     v23,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7f0040ce, /* lvx     v24,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7f2048ce, /* lvx     v25,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7f4040ce, /* lvx     v26,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7f6048ce, /* lvx     v27,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7f8040ce, /* lvx     v28,r0,r8 */
 | 
						|
	0x39080020, /* addi    r8,r8,32 */
 | 
						|
	0x7fa048ce, /* lvx     v29,r0,r9 */
 | 
						|
	0x39290020, /* addi    r9,r9,32 */
 | 
						|
	0x7fc040ce, /* lvx     v30,r0,r8 */
 | 
						|
	0x7fe048ce, /* lvx     v31,r0,r9 */
 | 
						|
#endif
 | 
						|
 | 
						|
	0x4e800420, /* bctr */
 | 
						|
};
 | 
						|
 | 
						|
	#if LIBCO_PPCDESC
 | 
						|
		/* Function call goes through indirect descriptor */
 | 
						|
		#define CO_SWAP_ASM( x, y ) \
 | 
						|
			((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y )
 | 
						|
	#else
 | 
						|
		/* Function call goes directly to code */
 | 
						|
		#define CO_SWAP_ASM( x, y ) \
 | 
						|
			((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y )
 | 
						|
	#endif
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
static uint32_t* co_create_( unsigned size, uintptr_t entry )
 | 
						|
{
 | 
						|
	uint32_t* t = (uint32_t*) malloc( size );
 | 
						|
	
 | 
						|
	(void) entry;
 | 
						|
	
 | 
						|
	#if LIBCO_PPCDESC
 | 
						|
		if ( t )
 | 
						|
		{
 | 
						|
			/* Copy entry's descriptor */
 | 
						|
			memcpy( t, (void*) entry, sizeof (void*) * 3 );
 | 
						|
			
 | 
						|
			/* Set function pointer to swap routine */
 | 
						|
			#ifdef LIBCO_PPC_ASM
 | 
						|
				*(const void**) t = *(void**) &co_swap_asm;
 | 
						|
			#else
 | 
						|
				*(const void**) t = libco_ppc_code;
 | 
						|
			#endif
 | 
						|
		}
 | 
						|
	#endif
 | 
						|
	
 | 
						|
	return t;
 | 
						|
}
 | 
						|
 | 
						|
cothread_t co_create( unsigned int size, void (*entry_)( void ) )
 | 
						|
{
 | 
						|
	uintptr_t entry = (uintptr_t) entry_;
 | 
						|
	uint32_t* t = NULL;
 | 
						|
	
 | 
						|
	/* Be sure main thread was successfully allocated */
 | 
						|
	if ( co_active() )
 | 
						|
	{
 | 
						|
		size += state_size + above_stack + stack_align;
 | 
						|
		t = co_create_( size, entry );
 | 
						|
	}
 | 
						|
	
 | 
						|
	if ( t )
 | 
						|
	{
 | 
						|
		uintptr_t sp;
 | 
						|
		int shift;
 | 
						|
		
 | 
						|
		/* Save current registers into new thread, so that any special ones will
 | 
						|
		have proper values when thread is begun */
 | 
						|
		CO_SWAP_ASM( t, t );
 | 
						|
		
 | 
						|
		#if LIBCO_PPCDESC
 | 
						|
			/* Get real address */
 | 
						|
			entry = (uintptr_t) *(void**) entry;
 | 
						|
		#endif
 | 
						|
		
 | 
						|
		/* Put stack near end of block, and align */
 | 
						|
		sp = (uintptr_t) t + size - above_stack;
 | 
						|
		sp -= sp % stack_align;
 | 
						|
		
 | 
						|
		/* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we
 | 
						|
		save and restore them as 64 bits, regardless of the size the ABI
 | 
						|
		uses. So, we manually write pointers at the proper size. We always
 | 
						|
		save and restore at the same address, and since PPC is big-endian,
 | 
						|
		we must put the low byte first on PPC32. */
 | 
						|
		
 | 
						|
		/* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts
 | 
						|
		and don't have to care how many bits uintptr_t is. */
 | 
						|
		#if LIBCO_PPC64
 | 
						|
			shift = 16;
 | 
						|
		#else
 | 
						|
			shift = 0;
 | 
						|
		#endif
 | 
						|
		
 | 
						|
		/* Set up so entry will be called on next swap */
 | 
						|
		t [8] = (uint32_t) (entry >> shift >> shift);
 | 
						|
		t [9] = (uint32_t) entry;
 | 
						|
		
 | 
						|
		t [10] = (uint32_t) (sp >> shift >> shift); 
 | 
						|
		t [11] = (uint32_t) sp;
 | 
						|
	}
 | 
						|
	
 | 
						|
	return t;
 | 
						|
}
 | 
						|
 | 
						|
void co_delete( cothread_t t )
 | 
						|
{
 | 
						|
   free(t);
 | 
						|
}
 | 
						|
 | 
						|
static void co_init_( void )
 | 
						|
{
 | 
						|
#if LIBCO_MPROTECT
 | 
						|
   /* TODO: pre- and post-pad PPC code so that this doesn't make other
 | 
						|
      data executable and writable */
 | 
						|
   long page_size = sysconf( _SC_PAGESIZE );
 | 
						|
   if ( page_size > 0 )
 | 
						|
   {
 | 
						|
      uintptr_t align = page_size;
 | 
						|
      uintptr_t begin = (uintptr_t) libco_ppc_code;
 | 
						|
      uintptr_t end   = begin + sizeof libco_ppc_code;
 | 
						|
 | 
						|
      /* Align beginning and end */
 | 
						|
      end   += align - 1;
 | 
						|
      end   -= end   % align;
 | 
						|
      begin -= begin % align;
 | 
						|
 | 
						|
      mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC );
 | 
						|
   }
 | 
						|
#endif
 | 
						|
 | 
						|
   co_active_handle = co_create_( state_size, (uintptr_t) &co_switch );
 | 
						|
}
 | 
						|
 | 
						|
cothread_t co_active(void)
 | 
						|
{
 | 
						|
   if (!co_active_handle)
 | 
						|
      co_init_();
 | 
						|
 | 
						|
   return co_active_handle;
 | 
						|
}
 | 
						|
 | 
						|
void co_switch(cothread_t t)
 | 
						|
{
 | 
						|
   cothread_t old = co_active_handle;
 | 
						|
   co_active_handle = t;
 | 
						|
 | 
						|
   CO_SWAP_ASM( t, old );
 | 
						|
}
 |