From 15d98ead96e9c6453ff04515c748186bfbeb1bf1 Mon Sep 17 00:00:00 2001 From: David Rose Date: Tue, 27 Apr 2010 21:30:08 +0000 Subject: [PATCH] experiment with WIN_THREAD_CONTEXT --- panda/src/pipeline/Sources.pp | 2 + panda/src/pipeline/contextSwitch.c | 426 +----------------- panda/src/pipeline/contextSwitch.h | 35 +- .../src/pipeline/contextSwitch_longjmp_src.c | 308 +++++++++++++ .../src/pipeline/contextSwitch_ucontext_src.c | 141 ++++++ .../src/pipeline/contextSwitch_windows_src.c | 153 +++++++ panda/src/pipeline/threadSimpleImpl.I | 18 + panda/src/pipeline/threadSimpleImpl.cxx | 25 +- panda/src/pipeline/threadSimpleImpl.h | 16 + panda/src/pipeline/threadSimpleManager.I | 12 +- panda/src/pipeline/threadSimpleManager.cxx | 15 +- panda/src/pipeline/threadSimpleManager.h | 18 +- 12 files changed, 711 insertions(+), 458 deletions(-) create mode 100755 panda/src/pipeline/contextSwitch_longjmp_src.c create mode 100755 panda/src/pipeline/contextSwitch_ucontext_src.c create mode 100755 panda/src/pipeline/contextSwitch_windows_src.c diff --git a/panda/src/pipeline/Sources.pp b/panda/src/pipeline/Sources.pp index 09ae37c91c..67ab20a59a 100644 --- a/panda/src/pipeline/Sources.pp +++ b/panda/src/pipeline/Sources.pp @@ -85,6 +85,8 @@ conditionVarSimpleImpl.cxx \ conditionVarSpinlockImpl.cxx \ config_pipeline.cxx \ + contextSwitch_longjmp_src.c contextSwitch_ucontext_src.c \ + contextSwitch_windows_src.c \ cycleData.cxx \ cycleDataLockedReader.cxx \ cycleDataLockedStageReader.cxx \ diff --git a/panda/src/pipeline/contextSwitch.c b/panda/src/pipeline/contextSwitch.c index 570bf276e1..548f95a102 100644 --- a/panda/src/pipeline/contextSwitch.c +++ b/panda/src/pipeline/contextSwitch.c @@ -17,418 +17,26 @@ #include #include -#ifdef THREAD_SIMPLE_IMPL +#if defined(THREAD_SIMPLE_IMPL) && !defined(CPPPARSER) -#if defined(PHAVE_UCONTEXT_H) +#ifdef WIN32 +/* Define this macro to use native Windows threading constructs to + switch contexts. */ +#define WIN_THREAD_CONTEXT +#endif -#else /* PHAVE_UCONTEXT_H */ +#if defined(WIN_THREAD_CONTEXT) + +#include "contextSwitch_windows_src.c" + +#elif defined(PHAVE_UCONTEXT_H) + +#include "contextSwitch_ucontext_src.c" + +#else + +#include "contextSwitch_longjmp_src.c" #endif /* PHAVE_UCONTEXT_H */ -#if defined(PHAVE_UCONTEXT_H) - -/* We'd prefer to use getcontext() / setcontext() to portably change - execution contexts within C code. That's what these library - functions are designed for. */ -#ifdef __APPLE__ -#include -#else -#include -#endif - -struct ThreadContext { - ucontext_t _ucontext; -#ifdef __APPLE__ - // Due to a bug in OSX 10.5, the system ucontext_t declaration - // doesn't reserve enough space, and we need to reserve some - // additional space to make room. -#define EXTRA_PADDING_SIZE 4096 - char _extra_padding[EXTRA_PADDING_SIZE]; -#endif -}; - -static void -begin_context(ContextFunction *thread_func, void *data) { - (*thread_func)(data); -} - -void -init_thread_context(struct ThreadContext *context, - unsigned char *stack, size_t stack_size, - ContextFunction *thread_func, void *data) { - if (getcontext(&context->_ucontext) != 0) { - fprintf(stderr, "getcontext failed in init_thread_context!\n"); - // Too bad for you. - abort(); - } - - context->_ucontext.uc_stack.ss_sp = stack; - context->_ucontext.uc_stack.ss_size = stack_size; - context->_ucontext.uc_stack.ss_flags = 0; - context->_ucontext.uc_link = NULL; - - makecontext(&context->_ucontext, (void (*)())&begin_context, 2, thread_func, data); -} - -void -save_thread_context(struct ThreadContext *context, - ContextFunction *next_context, void *data) { - /* getcontext requires us to use a volatile auto variable to - differentiate between pass 1 (immediate return) and pass 2 - (return from setcontext). */ - volatile int context_return = 0; - - if (getcontext(&context->_ucontext) != 0) { - fprintf(stderr, "getcontext failed!\n"); - // Nothing to do here. - abort(); - } - - if (context_return) { - /* We have just returned from setcontext. In this case, return - from the function. The stack is still good. */ - return; - } - - context_return = 1; - - /* We are still in the calling thread. In this case, we cannot - return from the function without damaging the stack. Insted, - call next_context() and trust the caller to call - switch_to_thread_context() in there somewhere. */ - - (*next_context)(data); - - /* We shouldn't get here. */ - abort(); -} - -void -switch_to_thread_context(struct ThreadContext *context) { - setcontext(&context->_ucontext); - - /* Shouldn't get here. */ - abort(); -} - -#else - -/* Unfortunately, setcontext() is not defined everywhere (even though - it claims to be adopted by Posix). So we have to fall back to - setjmp() / longjmp() in its absence. This is a hackier solution. */ - -#if defined(_M_IX86) || defined(__i386__) -/* Maybe we can implement our own setjmp/longjmp in assembly code. - This will be safer than the system version, since who knows what - that one's really doing? */ - -typedef int cs_jmp_buf[33]; - -#define CS_JB_SP 4 - -#else - -/* Fall back to the system implmentation of setjmp/longjmp. */ -#include - -typedef jmp_buf cs_jmp_buf; -#define cs_setjmp setjmp -#define cs_longjmp(buf) longjmp(buf, 1) - -#ifdef JB_SP -#define CS_JB_SP JB_SP - -#elif defined(__ppc__) - /* This was determined experimentally through test_setjmp. */ -#define CS_JB_SP 0 - -#endif - -#endif /* __i386__ */ - -struct ThreadContext { - cs_jmp_buf _jmp_context; -}; - -/* The approach is: hack our way onto the new stack pointer right now, - then call setjmp() to record that stack pointer in the - _jmp_context. Then restore back to the original stack pointer. */ - -#if defined(_M_IX86) -/* Here is our own implementation of setjmp and longjmp for I386, via - Windows syntax. */ - -/* warning C4731: frame pointer register 'ebp' modified by inline assembly code */ -#pragma warning(disable:4731) - -int -cs_setjmp(cs_jmp_buf env) { - __asm { - pop ebp; /* Restore the frame pointer that the compiler pushed */ - - pop edx; /* edx = return address */ - pop eax; /* eax = &env */ - push eax; /* keep &env on the stack; the caller will remove it */ - - mov [eax + 0], ebx; - mov [eax + 4], edi; - mov [eax + 8], esi; - mov [eax + 12], ebp; - mov [eax + 16], esp; - mov [eax + 20], edx; - - fnsave [eax + 24]; /* save floating-point state */ - - xor eax,eax; /* return 0: pass 1 return */ - jmp edx; /* this works like ret */ - } -} - -void -cs_longjmp(cs_jmp_buf env) { - _asm { - mov eax, env; - - mov ebx, [eax + 0]; - mov edi, [eax + 4]; - mov esi, [eax + 8]; - mov ebp, [eax + 12]; - mov esp, [eax + 16]; - mov edx, [eax + 20]; - - frstor [eax + 24]; /* restore floating-point state */ - - mov eax, 1; /* return 1 from setjmp: pass 2 return */ - jmp edx; /* return from above setjmp call */ - } -} - - -#elif defined(__i386__) -/* Here is our own implementation of setjmp and longjmp for I386, via - GNU syntax. */ - -#if defined(IS_LINUX) -/* On Linux, the leading underscores are not implicitly added for C - function names. */ -#define cs_setjmp _cs_setjmp -#define cs_longjmp _cs_longjmp -#endif - -int cs_setjmp(cs_jmp_buf env); -void cs_longjmp(cs_jmp_buf env); - -__asm__ -("_cs_setjmp:\n" - "popl %edx\n" - "popl %eax\n" - "pushl %eax\n" - - "movl %ebx, 0(%eax)\n" - "movl %edi, 4(%eax)\n" - "movl %esi, 8(%eax)\n" - "movl %ebp, 12(%eax)\n" - "movl %esp, 16(%eax)\n" - "movl %edx, 20(%eax)\n" - - "fnsave 24(%eax)\n" - - "xorl %eax, %eax\n" - "jmp *%edx\n"); - -__asm__ -("_cs_longjmp:\n" - "popl %edx\n" - "popl %eax\n" - - "movl 0(%eax), %ebx\n" - "movl 4(%eax), %edi\n" - "movl 8(%eax), %esi\n" - "movl 12(%eax), %ebp\n" - "movl 16(%eax), %esp\n" - "movl 20(%eax), %edx\n" - - "frstor 24(%eax)\n" - - "mov $1,%eax\n" - "jmp *%edx\n"); - -#endif /* __i386__ */ - -/* Ideally, including setjmp.h would have defined JB_SP, which will - tell us where in the context structure we can muck with the stack - pointer. If it didn't define this symbol, we have to guess it. */ -#ifndef CS_JB_SP - -#if defined(IS_OSX) && defined(__i386__) -/* We have determined this value empirically, via test_setjmp.cxx in - this directory. */ -#define CS_JB_SP 9 - -#endif - -#endif /* CS_JB_SP */ - -static struct ThreadContext *st_context; -static unsigned char *st_stack; -static size_t st_stack_size; -static ContextFunction *st_thread_func; -static void *st_data; - -static cs_jmp_buf orig_stack; - -/* We can't declare this function static--gcc might want to inline it - in that case, and then the code crashes. I hope this doesn't mean - that the stack is still not getting restored correctly in the above - assembly code. */ -void -setup_context_2(void) { - /* Here we are running on the new stack. Copy the key data onto our - new stack. */ - ContextFunction *volatile thread_func = st_thread_func; - void *volatile data = st_data; - - if (cs_setjmp(st_context->_jmp_context) == 0) { - /* The _jmp_context is set up and ready to run. Now restore the - original stack and return. We can't simply return from this - function, since it might overwrite some of the stack data on - the way out. */ - cs_longjmp(orig_stack); - - /* Shouldn't get here. */ - abort(); - } - - /* We come here the first time the thread starts. */ - (*thread_func)(data); - - /* We shouldn't get here, since we don't expect the thread_func to - return. */ - abort(); -} - -static void -setup_context_1(void) { - /* Save the current stack frame so we can return to it (at the end - of setup_context_2()). */ - if (cs_setjmp(orig_stack) == 0) { - /* First, switch to the new stack. Save the current context using - setjmp(). This saves out all of the processor register values, - though it doesn't muck with the stack. */ - static cs_jmp_buf temp; - if (cs_setjmp(temp) == 0) { - /* This is the initial return from setjmp. Still the original - stack. */ - - /* Now we overwrite the stack pointer value in the saved - register context. This doesn't work with all implementations - of setjmp/longjmp. */ - - /* We give ourselves a small buffer of unused space at the top - of the stack, to allow for the stack frame and such that this - code might be assuming is there. */ - (*(void **)&temp[CS_JB_SP]) = (st_stack + st_stack_size - 0x100); - - /* And finally, we place ourselves on the new stack by using - longjmp() to reload the modified context. */ - cs_longjmp(temp); - - /* Shouldn't get here. */ - abort(); - } - - /* This is the second return from setjmp. Now we're on the new - stack. */ - setup_context_2(); - - /* Shouldn't get here. */ - abort(); - } - - /* By now we are back to the original stack. */ -} - -void -init_thread_context(struct ThreadContext *context, - unsigned char *stack, size_t stack_size, - ContextFunction *thread_func, void *data) { - /* Copy all of the input parameters to static variables, then begin - the stack-switching process. */ - st_context = context; - st_stack = stack; - st_stack_size = stack_size; - st_thread_func = thread_func; - st_data = data; - - setup_context_1(); -} - -void -save_thread_context(struct ThreadContext *context, - ContextFunction *next_context, void *data) { - if (cs_setjmp(context->_jmp_context) != 0) { - /* We have just returned from longjmp. In this case, return from - the function. The stack is still good. */ - return; - } - - /* We are still in the calling thread. In this case, we cannot - return from the function without damaging the stack. Insted, - call next_context() and trust the caller to call - switch_to_thread_context() in there somewhere. */ - - (*next_context)(data); - - /* We shouldn't get here. */ - abort(); -} - -void -switch_to_thread_context(struct ThreadContext *context) { - cs_longjmp(context->_jmp_context); - - /* Shouldn't get here. */ - abort(); -} - -#endif /* PHAVE_UCONTEXT_H */ - -struct ThreadContext * -alloc_thread_context() { - struct ThreadContext *context = - (struct ThreadContext *)malloc(sizeof(struct ThreadContext)); - -#if defined(__APPLE__) && defined(_DEBUG) - { - int p; - // Pre-fill the extra_padding with bytes that we can recognize - // later. - for (p = 0; p < EXTRA_PADDING_SIZE; ++p) { - context->_extra_padding[p] = (p & 0xff); - } - } -#endif // __APPLE__ - - return context; -} - -void -free_thread_context(struct ThreadContext *context) { -#if defined(__APPLE__) && defined(_DEBUG) - { - // Because of the OSX 10.5 bug, we anticipate that the extra_padding - // may have been filled in with junk. Confirm this. - int p = EXTRA_PADDING_SIZE; - while (p > 0) { - --p; - if (context->_extra_padding[p] != (char)(p & 0xff)) { - fprintf(stderr, "Context was mangled at byte %d: %d!\n", p, context->_extra_padding[p]); - break; - } - } - } -#endif // __APPLE__ - free(context); -} - #endif /* THREAD_SIMPLE_IMPL */ diff --git a/panda/src/pipeline/contextSwitch.h b/panda/src/pipeline/contextSwitch.h index bacfaf4daf..5262a5f8a2 100644 --- a/panda/src/pipeline/contextSwitch.h +++ b/panda/src/pipeline/contextSwitch.h @@ -36,16 +36,21 @@ struct ThreadContext; extern "C" { #endif -typedef void ContextFunction(void *); +typedef void ContextFunction(struct ThreadContext *from_context, void *); +typedef void ThreadFunction(void *); -/* Call this to fill in the appropriate values in context. The stack - must already have been allocated. The context will be initialized - so that when switch_to_thread_context() is called, it will begin - executing thread_func(data), which should not return. This function - will return normally. */ +extern const int needs_stack_prealloc; + +/* Call this to fill in the appropriate values in context. If + needs_stack_prealloc (above) is true, the stack must already have + been allocated; if needs_stack_prealloc is false, the stack pointer + is not used and may be NULL. The context will be initialized so + that when switch_to_thread_context() is called, it will begin + executing thread_func(data), which should not return. This + function will return normally. */ void init_thread_context(struct ThreadContext *context, unsigned char *stack, size_t stack_size, - ContextFunction *thread_func, void *data); + ThreadFunction *thread_func, void *data); /* Call this to save the current thread context. This function does not return until switch_to_thread_context() is called. Instead it @@ -53,15 +58,19 @@ void init_thread_context(struct ThreadContext *context, void save_thread_context(struct ThreadContext *context, ContextFunction *next_context, void *data); -/* Call this to resume executing a previously saved context. When - called, it will return from save_thread_context() in the saved - stack (or begin executing thread_func()). */ -void switch_to_thread_context(struct ThreadContext *context); +/* Call this to resume executing a previously saved context. + from_context must be the currently-executing context, and + to_context is the context to resume. When called, it will return + from save_thread_context() in the saved stack (or begin executing + thread_func()). */ +void switch_to_thread_context(struct ThreadContext *from_context, + struct ThreadContext *to_context); /* Use this pair of functions to transparently allocate and destroy an opaque ThreadContext object of the appropriate size. These - functions only allocate memory; they do not initialize the values - of the context (see init_thread_context(), above, for that). */ + functions allocate memory, and initialize the context as + appropriate for the main thread. See init_main_context() to finish + the initialization for a new thread. */ struct ThreadContext *alloc_thread_context(); void free_thread_context(struct ThreadContext *context); diff --git a/panda/src/pipeline/contextSwitch_longjmp_src.c b/panda/src/pipeline/contextSwitch_longjmp_src.c new file mode 100755 index 0000000000..3dd444b198 --- /dev/null +++ b/panda/src/pipeline/contextSwitch_longjmp_src.c @@ -0,0 +1,308 @@ +/* Filename: contextSwitch_longjmp_src.c + * Created by: drose (15Apr10) + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* This is the implementation of user-space context switching using + setmp() / longjmp(). This is the hackier implementation, + which is necessary if setcontext() is not available. */ + +const int needs_stack_prealloc = 1; + +#if defined(_M_IX86) || defined(__i386__) +/* Maybe we can implement our own setjmp/longjmp in assembly code. + This will be safer than the system version, since who knows what + that one's really doing? */ + +typedef int cs_jmp_buf[33]; + +#define CS_JB_SP 4 + +#else + +/* Fall back to the system implementation of setjmp/longjmp. */ +#include + +typedef jmp_buf cs_jmp_buf; +#define cs_setjmp setjmp +#define cs_longjmp(buf) longjmp(buf, 1) + +#ifdef JB_SP +#define CS_JB_SP JB_SP + +#elif defined(__ppc__) + /* This was determined experimentally through test_setjmp. */ +#define CS_JB_SP 0 + +#endif + +#endif /* __i386__ */ + +struct ThreadContext { + cs_jmp_buf _jmp_context; +}; + +/* The approach is: hack our way onto the new stack pointer right now, + then call setjmp() to record that stack pointer in the + _jmp_context. Then restore back to the original stack pointer. */ + +#if defined(_M_IX86) +/* Here is our own implementation of setjmp and longjmp for I386, via + Windows syntax. */ + +/* warning C4731: frame pointer register 'ebp' modified by inline assembly code */ +#pragma warning(disable:4731) + +int +cs_setjmp(cs_jmp_buf env) { + __asm { + pop ebp; /* Restore the frame pointer that the compiler pushed */ + + pop edx; /* edx = return address */ + pop eax; /* eax = &env */ + push eax; /* keep &env on the stack; the caller will remove it */ + + mov [eax + 0], ebx; + mov [eax + 4], edi; + mov [eax + 8], esi; + mov [eax + 12], ebp; + mov [eax + 16], esp; + mov [eax + 20], edx; + + fnsave [eax + 24]; /* save floating-point state */ + + xor eax,eax; /* return 0: pass 1 return */ + jmp edx; /* this works like ret */ + } +} + +void +cs_longjmp(cs_jmp_buf env) { + _asm { + mov eax, env; + + mov ebx, [eax + 0]; + mov edi, [eax + 4]; + mov esi, [eax + 8]; + mov ebp, [eax + 12]; + mov esp, [eax + 16]; + mov edx, [eax + 20]; + + frstor [eax + 24]; /* restore floating-point state */ + + mov eax, 1; /* return 1 from setjmp: pass 2 return */ + jmp edx; /* return from above setjmp call */ + } +} + + +#elif defined(__i386__) +/* Here is our own implementation of setjmp and longjmp for I386, via + GNU syntax. */ + +#if defined(IS_LINUX) +/* On Linux, the leading underscores are not implicitly added for C + function names. */ +#define cs_setjmp _cs_setjmp +#define cs_longjmp _cs_longjmp +#endif + +int cs_setjmp(cs_jmp_buf env); +void cs_longjmp(cs_jmp_buf env); + +__asm__ +("_cs_setjmp:\n" + "popl %edx\n" + "popl %eax\n" + "pushl %eax\n" + + "movl %ebx, 0(%eax)\n" + "movl %edi, 4(%eax)\n" + "movl %esi, 8(%eax)\n" + "movl %ebp, 12(%eax)\n" + "movl %esp, 16(%eax)\n" + "movl %edx, 20(%eax)\n" + + "fnsave 24(%eax)\n" + + "xorl %eax, %eax\n" + "jmp *%edx\n"); + +__asm__ +("_cs_longjmp:\n" + "popl %edx\n" + "popl %eax\n" + + "movl 0(%eax), %ebx\n" + "movl 4(%eax), %edi\n" + "movl 8(%eax), %esi\n" + "movl 12(%eax), %ebp\n" + "movl 16(%eax), %esp\n" + "movl 20(%eax), %edx\n" + + "frstor 24(%eax)\n" + + "mov $1,%eax\n" + "jmp *%edx\n"); + +#endif /* __i386__ */ + +/* Ideally, including setjmp.h would have defined JB_SP, which will + tell us where in the context structure we can muck with the stack + pointer. If it didn't define this symbol, we have to guess it. */ +#ifndef CS_JB_SP + +#if defined(IS_OSX) && defined(__i386__) +/* We have determined this value empirically, via test_setjmp.cxx in + this directory. */ +#define CS_JB_SP 9 + +#endif + +#endif /* CS_JB_SP */ + +static struct ThreadContext *st_context; +static unsigned char *st_stack; +static size_t st_stack_size; +static ThreadFunction *st_thread_func; +static void *st_data; + +static cs_jmp_buf orig_stack; + +/* We can't declare this function static--gcc might want to inline it + in that case, and then the code crashes. I hope this doesn't mean + that the stack is still not getting restored correctly in the above + assembly code. */ +void +setup_context_2(void) { + /* Here we are running on the new stack. Copy the key data onto our + new stack. */ + ThreadFunction *volatile thread_func = st_thread_func; + void *volatile data = st_data; + + if (cs_setjmp(st_context->_jmp_context) == 0) { + /* The _jmp_context is set up and ready to run. Now restore the + original stack and return. We can't simply return from this + function, since it might overwrite some of the stack data on + the way out. */ + cs_longjmp(orig_stack); + + /* Shouldn't get here. */ + abort(); + } + + /* We come here the first time the thread starts. */ + (*thread_func)(data); + + /* We shouldn't get here, since we don't expect the thread_func to + return. */ + abort(); +} + +static void +setup_context_1(void) { + /* Save the current stack frame so we can return to it (at the end + of setup_context_2()). */ + if (cs_setjmp(orig_stack) == 0) { + /* First, switch to the new stack. Save the current context using + setjmp(). This saves out all of the processor register values, + though it doesn't muck with the stack. */ + static cs_jmp_buf temp; + if (cs_setjmp(temp) == 0) { + /* This is the initial return from setjmp. Still the original + stack. */ + + /* Now we overwrite the stack pointer value in the saved + register context. This doesn't work with all implementations + of setjmp/longjmp. */ + + /* We give ourselves a small buffer of unused space at the top + of the stack, to allow for the stack frame and such that this + code might be assuming is there. */ + (*(void **)&temp[CS_JB_SP]) = (st_stack + st_stack_size - 0x100); + + /* And finally, we place ourselves on the new stack by using + longjmp() to reload the modified context. */ + cs_longjmp(temp); + + /* Shouldn't get here. */ + abort(); + } + + /* This is the second return from setjmp. Now we're on the new + stack. */ + setup_context_2(); + + /* Shouldn't get here. */ + abort(); + } + + /* By now we are back to the original stack. */ +} + +void +init_thread_context(struct ThreadContext *context, + unsigned char *stack, size_t stack_size, + ThreadFunction *thread_func, void *data) { + /* Copy all of the input parameters to static variables, then begin + the stack-switching process. */ + st_context = context; + st_stack = stack; + st_stack_size = stack_size; + st_thread_func = thread_func; + st_data = data; + + setup_context_1(); +} + +void +save_thread_context(struct ThreadContext *context, + ContextFunction *next_context, void *data) { + if (cs_setjmp(context->_jmp_context) != 0) { + /* We have just returned from longjmp. In this case, return from + the function. The stack is still good. */ + return; + } + + /* We are still in the calling thread. In this case, we cannot + return from the function without damaging the stack. Insted, + call next_context() and trust the caller to call + switch_to_thread_context() in there somewhere. */ + + (*next_context)(context, data); + + /* We shouldn't get here. */ + abort(); +} + +void +switch_to_thread_context(struct ThreadContext *from_context, + struct ThreadContext *to_context) { + cs_longjmp(to_context->_jmp_context); + + /* Shouldn't get here. */ + abort(); +} + +struct ThreadContext * +alloc_thread_context() { + struct ThreadContext *context = + (struct ThreadContext *)malloc(sizeof(struct ThreadContext)); + memset(context, 0, sizeof(struct ThreadContext)); + + return context; +} + +void +free_thread_context(struct ThreadContext *context) { + free(context); +} diff --git a/panda/src/pipeline/contextSwitch_ucontext_src.c b/panda/src/pipeline/contextSwitch_ucontext_src.c new file mode 100755 index 0000000000..fdbfe84436 --- /dev/null +++ b/panda/src/pipeline/contextSwitch_ucontext_src.c @@ -0,0 +1,141 @@ +/* Filename: contextSwitch_ucontext_src.c + * Created by: drose (15Apr10) + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* This is the implementation of user-space context switching using + getcontext() / setcontext(). This is the preferred implementation, + if these library functions are available; that's what they are + designed for. */ + +#ifdef __APPLE__ +#include +#else +#include +#endif + +const int needs_stack_prealloc = 1; + +struct ThreadContext { + ucontext_t _ucontext; +#ifdef __APPLE__ + // Due to a bug in OSX 10.5, the system ucontext_t declaration + // doesn't reserve enough space, and we need to reserve some + // additional space to make room. +#define EXTRA_PADDING_SIZE 4096 + char _extra_padding[EXTRA_PADDING_SIZE]; +#endif +}; + +static void +begin_context(ThreadFunction *thread_func, void *data) { + (*thread_func)(data); +} + +void +init_thread_context(struct ThreadContext *context, + unsigned char *stack, size_t stack_size, + ThreadFunction *thread_func, void *data) { + if (getcontext(&context->_ucontext) != 0) { + fprintf(stderr, "getcontext failed in init_thread_context!\n"); + // Too bad for you. + abort(); + } + + context->_ucontext.uc_stack.ss_sp = stack; + context->_ucontext.uc_stack.ss_size = stack_size; + context->_ucontext.uc_stack.ss_flags = 0; + context->_ucontext.uc_link = NULL; + + makecontext(&context->_ucontext, (void (*)())&begin_context, 2, thread_func, data); +} + +void +save_thread_context(struct ThreadContext *context, + ContextFunction *next_context, void *data) { + /* getcontext requires us to use a volatile auto variable to + differentiate between pass 1 (immediate return) and pass 2 + (return from setcontext). */ + volatile int context_return = 0; + + if (getcontext(&context->_ucontext) != 0) { + fprintf(stderr, "getcontext failed!\n"); + // Nothing to do here. + abort(); + } + + if (context_return) { + /* We have just returned from setcontext. In this case, return + from the function. The stack is still good. */ + return; + } + + context_return = 1; + + /* We are still in the calling thread. In this case, we cannot + return from the function without damaging the stack. Insted, + call next_context() and trust the caller to call + switch_to_thread_context() in there somewhere. */ + + (*next_context)(context, data); + + /* We shouldn't get here. */ + abort(); +} + +void +switch_to_thread_context(struct ThreadContext *from_context, + struct ThreadContext *to_context) { + setcontext(&to_context->_ucontext); + + /* Shouldn't get here. */ + abort(); +} + +struct ThreadContext * +alloc_thread_context() { + struct ThreadContext *context = + (struct ThreadContext *)malloc(sizeof(struct ThreadContext)); + memset(context, 0, sizeof(struct ThreadContext)); + +#if defined(__APPLE__) && defined(_DEBUG) + { + int p; + // Pre-fill the extra_padding with bytes that we can recognize + // later. + for (p = 0; p < EXTRA_PADDING_SIZE; ++p) { + context->_extra_padding[p] = (p & 0xff); + } + } +#endif // __APPLE__ + + return context; +} + +void +free_thread_context(struct ThreadContext *context) { +#if defined(__APPLE__) && defined(_DEBUG) + { + // Because of the OSX 10.5 bug, we anticipate that the extra_padding + // may have been filled in with junk. Confirm this. + int p = EXTRA_PADDING_SIZE; + while (p > 0) { + --p; + if (context->_extra_padding[p] != (char)(p & 0xff)) { + fprintf(stderr, "Context was mangled at byte %d: %d!\n", p, context->_extra_padding[p]); + break; + } + } + } +#endif // __APPLE__ + free(context); +} diff --git a/panda/src/pipeline/contextSwitch_windows_src.c b/panda/src/pipeline/contextSwitch_windows_src.c new file mode 100755 index 0000000000..c38a543461 --- /dev/null +++ b/panda/src/pipeline/contextSwitch_windows_src.c @@ -0,0 +1,153 @@ +/* Filename: contextSwitch_windows_src.c + * Created by: drose (15Apr10) + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* This is the implementation of user-space context switching using + native Windows threading constructs to manage the different + execution contexts. This isn't strictly user-space, since we use + OS threading constructs, but we use a global lock to ensure that + only one thread at a time is active. Thus, we still don't have to + defend the code against critical sections globally, so we still get + the low-overhead benefit of SIMPLE_THREADS; this is just a simple, + reliable way to manage context switches. */ + +#include +#include + +/* The Windows implementation doesn't use the stack pointer. */ +const int needs_stack_prealloc = 0; + +static struct ThreadContext *current_context = NULL; + +struct ThreadContext { + /* Each context is really its own thread. */ + HANDLE _thread; + + /* This event is in the signaled state when the thread is ready to + roll. */ + HANDLE _ready; + + // This is set FALSE while the thread is alive, and TRUE if the + // thread is to be terminated when it next wakes up. + int _terminated; + + /* These are preloaded with the startup parameters, then cleared to + NULL for subsequent runs. */ + ThreadFunction *_thread_func; + void *_data; + + /* We use setjmp()/longjmp() to manage the detail of returning from + save_thread_context() when we call switch_to_thread_context(). */ + jmp_buf _jmp_context; +}; + +static DWORD WINAPI +thread_main(LPVOID data) { + struct ThreadContext *context = (struct ThreadContext *)data; + + // Wait for the thread to be awoken. + WaitForSingleObject(context->_ready, INFINITE); + + if (context->_terminated) { + /* We've been rudely terminated. Exit gracefully. */ + ExitThread(1); + } + + // Now we can begin. + (*context->_thread_func)(context->_data); + + return 0; +} + +void +init_thread_context(struct ThreadContext *context, + unsigned char *stack, size_t stack_size, + ThreadFunction *thread_func, void *data) { + context->_thread_func = thread_func; + context->_data = data; + + context->_thread = CreateThread(NULL, stack_size, + thread_main, context, 0, NULL); +} + +void +save_thread_context(struct ThreadContext *context, + ContextFunction *next_context, void *data) { + /* Save the current context so we can return here when the thread is + awoken. */ + if (setjmp(context->_jmp_context) != 0) { + /* We have just returned from longjmp. In this case, return from + the function. */ + return; + } + + current_context = context; + (*next_context)(context, data); + + /* Should not get here. */ + assert(FALSE); + abort(); +} + +void +switch_to_thread_context(struct ThreadContext *from_context, + struct ThreadContext *to_context) { + /* Pause the current thread, and switch to the indicated context. + This function should not return. */ + assert(from_context == current_context); + + /* Wake up the target thread. */ + SetEvent(to_context->_ready); + + /* And now put the from thread to sleep until it is again awoken. */ + WaitForSingleObject(from_context->_ready, INFINITE); + + if (from_context->_terminated) { + /* We've been rudely terminated. Exit gracefully. */ + ExitThread(1); + } + + /* Now we have been signaled again, and we're ready to resume the + thread. */ + longjmp(from_context->_jmp_context, 1); + + /* Should not get here. */ + assert(FALSE); + abort(); +} + +struct ThreadContext * +alloc_thread_context() { + struct ThreadContext *context = + (struct ThreadContext *)malloc(sizeof(struct ThreadContext)); + + memset(context, 0, sizeof(struct ThreadContext)); + context->_ready = CreateEvent(NULL, FALSE, FALSE, NULL); + + return context; +} + +void +free_thread_context(struct ThreadContext *context) { + /* Make sure the thread wakes and exits gracefully. */ + context->_terminated = TRUE; + SetEvent(context->_ready); + WaitForSingleObject(context->_thread, INFINITE); + + CloseHandle(context->_ready); + if (context->_thread != NULL) { + CloseHandle(context->_thread); + } + + free(context); +} diff --git a/panda/src/pipeline/threadSimpleImpl.I b/panda/src/pipeline/threadSimpleImpl.I index e0421dbc98..3bedc3d74c 100644 --- a/panda/src/pipeline/threadSimpleImpl.I +++ b/panda/src/pipeline/threadSimpleImpl.I @@ -23,6 +23,24 @@ get_current_thread() { return ThreadSimpleManager::get_global_ptr()->get_current_thread()->_parent_obj; } +//////////////////////////////////////////////////////////////////// +// Function: ThreadSimpleImpl::is_same_system_thread +// Access: Public +// Description: Returns true if we are still running within the same +// OS-level thread that this thread begin in, or false +// if this appears to be running in a different thread. +//////////////////////////////////////////////////////////////////// +INLINE bool ThreadSimpleImpl:: +is_same_system_thread() const { +#ifdef HAVE_POSIX_THREADS + return pthread_equal(_posix_system_thread_id, pthread_self()); +#endif +#ifdef WIN32 + return (_win32_system_thread_id == GetCurrentThreadId()); +#endif + return true; +} + //////////////////////////////////////////////////////////////////// // Function: ThreadSimpleImpl::bind_thread // Access: Public, Static diff --git a/panda/src/pipeline/threadSimpleImpl.cxx b/panda/src/pipeline/threadSimpleImpl.cxx index 2000f69f5d..2496160bd9 100644 --- a/panda/src/pipeline/threadSimpleImpl.cxx +++ b/panda/src/pipeline/threadSimpleImpl.cxx @@ -51,6 +51,13 @@ ThreadSimpleImpl(Thread *parent_obj) : // Save this pointer for convenience. _manager = ThreadSimpleManager::get_global_ptr(); + +#ifdef HAVE_POSIX_THREADS + _posix_system_thread_id = -1; +#endif +#ifdef WIN32 + _win32_system_thread_id = 0; +#endif } //////////////////////////////////////////////////////////////////// @@ -87,6 +94,13 @@ setup_main_thread() { _priority = TP_normal; _priority_weight = _manager->_simple_thread_normal_weight; +#ifdef HAVE_POSIX_THREADS + _posix_system_thread_id = pthread_self(); +#endif +#ifdef WIN32 + _win32_system_thread_id = GetCurrentThreadId(); +#endif + _manager->set_current_thread(this); } @@ -105,7 +119,9 @@ start(ThreadPriority priority, bool joinable) { nassertr(_stack == NULL, false); _stack_size = memory_hook->round_up_to_page_size((size_t)thread_stack_size); - _stack = (unsigned char *)memory_hook->mmap_alloc(_stack_size, true); + if (needs_stack_prealloc) { + _stack = (unsigned char *)memory_hook->mmap_alloc(_stack_size, true); + } _joinable = joinable; _status = TS_running; @@ -254,6 +270,13 @@ begin_thread() { PyThreadState_Swap(_python_state); #endif // HAVE_PYTHON +#ifdef HAVE_POSIX_THREADS + _posix_system_thread_id = pthread_self(); +#endif +#ifdef WIN32 + _win32_system_thread_id = GetCurrentThreadId(); +#endif + // Here we are executing within the thread. Run the thread_main // function defined for this thread. _parent_obj->thread_main(); diff --git a/panda/src/pipeline/threadSimpleImpl.h b/panda/src/pipeline/threadSimpleImpl.h index 051a086e31..1f0203d99e 100644 --- a/panda/src/pipeline/threadSimpleImpl.h +++ b/panda/src/pipeline/threadSimpleImpl.h @@ -73,6 +73,8 @@ public: static void prepare_for_exit(); INLINE static Thread *get_current_thread(); + INLINE bool is_same_system_thread() const; + INLINE static void bind_thread(Thread *thread); INLINE static bool is_threading_supported(); INLINE static bool is_true_threads(); @@ -142,6 +144,20 @@ private: ThreadSimpleManager *_manager; static ThreadSimpleImpl *volatile _st_this; + // We may not mix-and-match OS threads with Panda's SIMPLE_THREADS. + // If we ever get a Panda context switch request from a different OS + // thread than the thread we think we should be in, that's a serious + // error that may cause major consequences. For this reason, we + // store the OS thread's current thread ID here when the thread is + // constructed, and insist that it never changes during the lifetime + // of the thread. +#ifdef HAVE_POSIX_THREADS + pthread_t _posix_system_thread_id; +#endif +#ifdef WIN32 + DWORD _win32_system_thread_id; +#endif + friend class ThreadSimpleManager; }; diff --git a/panda/src/pipeline/threadSimpleManager.I b/panda/src/pipeline/threadSimpleManager.I index 0818ca071f..abd8cbbf82 100644 --- a/panda/src/pipeline/threadSimpleManager.I +++ b/panda/src/pipeline/threadSimpleManager.I @@ -27,19 +27,13 @@ get_current_thread() { // Function: ThreadSimpleManager::is_same_system_thread // Access: Public // Description: Returns true if we are still running within the same -// OS-level thread that created the ThreadSimpleManager, -// or false if this appears to be running in a different +// OS-level thread we think we should be running in, or +// false if this appears to be running in a different // thread. //////////////////////////////////////////////////////////////////// INLINE bool ThreadSimpleManager:: is_same_system_thread() const { -#ifdef HAVE_POSIX_THREADS - return pthread_equal(_posix_system_thread_id, pthread_self()); -#endif -#ifdef WIN32 - return (_win32_system_thread_id == GetCurrentThreadId()); -#endif - return true; + return _current_thread->is_same_system_thread(); } //////////////////////////////////////////////////////////////////// diff --git a/panda/src/pipeline/threadSimpleManager.cxx b/panda/src/pipeline/threadSimpleManager.cxx index ce3e94b575..4265a815d9 100644 --- a/panda/src/pipeline/threadSimpleManager.cxx +++ b/panda/src/pipeline/threadSimpleManager.cxx @@ -78,13 +78,6 @@ ThreadSimpleManager() : _clock = TrueClock::get_global_ptr(); _waiting_for_exit = NULL; -#ifdef HAVE_POSIX_THREADS - _posix_system_thread_id = pthread_self(); -#endif -#ifdef WIN32 - _win32_system_thread_id = GetCurrentThreadId(); -#endif - // Install these global pointers so very low-level code (code // defined before the pipeline directory) can yield when necessary. global_thread_yield = &Thread::force_yield; @@ -542,9 +535,9 @@ init_pointers() { // of next_context(). //////////////////////////////////////////////////////////////////// void ThreadSimpleManager:: -st_choose_next_context(void *data) { +st_choose_next_context(struct ThreadContext *from_context, void *data) { ThreadSimpleManager *self = (ThreadSimpleManager *)data; - self->choose_next_context(); + self->choose_next_context(from_context); } //////////////////////////////////////////////////////////////////// @@ -554,7 +547,7 @@ st_choose_next_context(void *data) { // of next_context(). //////////////////////////////////////////////////////////////////// void ThreadSimpleManager:: -choose_next_context() { +choose_next_context(struct ThreadContext *from_context) { double now = get_current_time(); do_timeslice_accounting(_current_thread, now); @@ -696,7 +689,7 @@ choose_next_context() { << " blocked, " << _sleeping.size() << " sleeping)\n"; } - switch_to_thread_context(_current_thread->_context); + switch_to_thread_context(from_context, _current_thread->_context); // Shouldn't get here. nassertv(false); diff --git a/panda/src/pipeline/threadSimpleManager.h b/panda/src/pipeline/threadSimpleManager.h index eb86c31d7b..e839ac2f5a 100644 --- a/panda/src/pipeline/threadSimpleManager.h +++ b/panda/src/pipeline/threadSimpleManager.h @@ -38,6 +38,7 @@ class Thread; class ThreadSimpleImpl; class BlockerSimple; +struct ThreadContext; //////////////////////////////////////////////////////////////////// // Class : ThreadSimpleManager @@ -87,8 +88,8 @@ private: typedef pdeque FifoThreads; typedef pvector Sleeping; - static void st_choose_next_context(void *data); - void choose_next_context(); + static void st_choose_next_context(struct ThreadContext *from_context, void *data); + void choose_next_context(struct ThreadContext *from_context); void do_timeslice_accounting(ThreadSimpleImpl *thread, double now); void wake_sleepers(Sleeping &sleepers, double now); void wake_all_sleepers(Sleeping &sleepers); @@ -159,19 +160,6 @@ private: TickRecords _tick_records; unsigned int _total_ticks; - // We may not mix-and-match OS threads with Panda's SIMPLE_THREADS. - // If we ever get a Panda context switch request from a different OS - // thread than the original thread, that's a serious error that may - // cause major consequences. For this reason, we store the OS - // thread's current thread ID here when the manager is constructed, - // and insist that it never changes. -#ifdef HAVE_POSIX_THREADS - pthread_t _posix_system_thread_id; -#endif -#ifdef WIN32 - DWORD _win32_system_thread_id; -#endif - static bool _pointers_initialized; static ThreadSimpleManager *_global_ptr; };