experiment with WIN_THREAD_CONTEXT

2025-10-03 02:15:43 -04:00 · 2010-04-27 21:30:08 +00:00 · 2010-04-27 21:30:08 +00:00 · 15d98ead96
commit 15d98ead96
parent 33276672f3
12 changed files with 711 additions and 458 deletions
--- a/panda/src/pipeline/Sources.pp
+++ b/panda/src/pipeline/Sources.pp
@ -85,6 +85,8 @@
    conditionVarSimpleImpl.cxx \
    conditionVarSpinlockImpl.cxx \
    config_pipeline.cxx \
+    contextSwitch_longjmp_src.c contextSwitch_ucontext_src.c \
+    contextSwitch_windows_src.c \
    cycleData.cxx \
    cycleDataLockedReader.cxx \
    cycleDataLockedStageReader.cxx \
--- a/panda/src/pipeline/contextSwitch.c
+++ b/panda/src/pipeline/contextSwitch.c
@ -17,418 +17,26 @@
 #include <stdlib.h>
 #include <stdio.h>

-#ifdef THREAD_SIMPLE_IMPL
+#if defined(THREAD_SIMPLE_IMPL) && !defined(CPPPARSER)

-#if defined(PHAVE_UCONTEXT_H)
+#ifdef WIN32
+/* Define this macro to use native Windows threading constructs to
+   switch contexts. */
+#define WIN_THREAD_CONTEXT
+#endif

-#else  /* PHAVE_UCONTEXT_H */
+#if defined(WIN_THREAD_CONTEXT)
+
+#include "contextSwitch_windows_src.c"
+
+#elif defined(PHAVE_UCONTEXT_H)
+
+#include "contextSwitch_ucontext_src.c"
+
+#else
+
+#include "contextSwitch_longjmp_src.c"

 #endif  /* PHAVE_UCONTEXT_H */

-#if defined(PHAVE_UCONTEXT_H)
-
-/* We'd prefer to use getcontext() / setcontext() to portably change
-   execution contexts within C code.  That's what these library
-   functions are designed for. */
-#ifdef __APPLE__
-#include <sys/ucontext.h>
-#else
-#include <ucontext.h>
-#endif
-
-struct ThreadContext {
-  ucontext_t _ucontext;
-#ifdef __APPLE__
-  // Due to a bug in OSX 10.5, the system ucontext_t declaration
-  // doesn't reserve enough space, and we need to reserve some
-  // additional space to make room.
-#define EXTRA_PADDING_SIZE 4096
-  char _extra_padding[EXTRA_PADDING_SIZE];
-#endif
-};
-
-static void
-begin_context(ContextFunction *thread_func, void *data) {
-  (*thread_func)(data);
-}
-
-void
-init_thread_context(struct ThreadContext *context, 
-                    unsigned char *stack, size_t stack_size,
-                    ContextFunction *thread_func, void *data) {
-  if (getcontext(&context->_ucontext) != 0) {
-    fprintf(stderr, "getcontext failed in init_thread_context!\n");
-    // Too bad for you.
-    abort();
-  }
-
-  context->_ucontext.uc_stack.ss_sp = stack;
-  context->_ucontext.uc_stack.ss_size = stack_size;
-  context->_ucontext.uc_stack.ss_flags = 0;
-  context->_ucontext.uc_link = NULL;
-
-  makecontext(&context->_ucontext, (void (*)())&begin_context, 2, thread_func, data);
-}
-
-void
-save_thread_context(struct ThreadContext *context,
-                    ContextFunction *next_context, void *data) {
-  /* getcontext requires us to use a volatile auto variable to
-     differentiate between pass 1 (immediate return) and pass 2
-     (return from setcontext). */
-  volatile int context_return = 0;
-
-  if (getcontext(&context->_ucontext) != 0) {
-    fprintf(stderr, "getcontext failed!\n");
-    // Nothing to do here.
-    abort();
-  }
-
-  if (context_return) {
-    /* We have just returned from setcontext.  In this case, return
-       from the function.  The stack is still good. */
-    return;
-  }
-
-  context_return = 1;
-
-  /* We are still in the calling thread.  In this case, we cannot
-     return from the function without damaging the stack.  Insted,
-     call next_context() and trust the caller to call
-     switch_to_thread_context() in there somewhere. */
-
-  (*next_context)(data);
-
-  /* We shouldn't get here. */
-  abort();
-}
-
-void
-switch_to_thread_context(struct ThreadContext *context) {
-  setcontext(&context->_ucontext);
-
-  /* Shouldn't get here. */
-  abort();
-}
-
-#else
-
-/* Unfortunately, setcontext() is not defined everywhere (even though
-   it claims to be adopted by Posix).  So we have to fall back to
-   setjmp() / longjmp() in its absence.  This is a hackier solution. */
-
-#if defined(_M_IX86) || defined(__i386__)
-/* Maybe we can implement our own setjmp/longjmp in assembly code.
-   This will be safer than the system version, since who knows what
-   that one's really doing? */
-
-typedef int cs_jmp_buf[33];
-
-#define CS_JB_SP 4
-
-#else
-
-/* Fall back to the system implmentation of setjmp/longjmp. */
-#include <setjmp.h>
-
-typedef jmp_buf cs_jmp_buf;
-#define cs_setjmp setjmp
-#define cs_longjmp(buf) longjmp(buf, 1)
-
-#ifdef JB_SP
-#define CS_JB_SP JB_SP
-
-#elif defined(__ppc__)
-  /* This was determined experimentally through test_setjmp. */
-#define CS_JB_SP 0
-
-#endif
-
-#endif  /* __i386__ */
-
-struct ThreadContext {
-  cs_jmp_buf _jmp_context;
-};
-
-/* The approach is: hack our way onto the new stack pointer right now,
-   then call setjmp() to record that stack pointer in the
-   _jmp_context.  Then restore back to the original stack pointer. */
-
-#if defined(_M_IX86)
-/* Here is our own implementation of setjmp and longjmp for I386, via
-   Windows syntax. */
-
-/* warning C4731: frame pointer register 'ebp' modified by inline assembly code */
-#pragma warning(disable:4731)
-
-int
-cs_setjmp(cs_jmp_buf env) {
-  __asm {
-    pop ebp;  /* Restore the frame pointer that the compiler pushed */
-
-    pop edx;  /* edx = return address */
-    pop eax;  /* eax = &env */
-    push eax; /* keep &env on the stack; the caller will remove it */
-
-    mov [eax + 0], ebx;
-    mov [eax + 4], edi;
-    mov [eax + 8], esi;
-    mov [eax + 12], ebp;
-    mov [eax + 16], esp;
-    mov [eax + 20], edx;
-
-    fnsave [eax + 24];  /* save floating-point state */
-
-    xor eax,eax;  /* return 0: pass 1 return */
-    jmp edx;      /* this works like ret */
-  }
-}
-
-void
-cs_longjmp(cs_jmp_buf env) {
-  _asm {
-    mov eax, env;
-    
-    mov ebx, [eax + 0];
-    mov edi, [eax + 4];
-    mov esi, [eax + 8];
-    mov ebp, [eax + 12];
-    mov esp, [eax + 16];
-    mov edx, [eax + 20];
-    
-    frstor [eax + 24];  /* restore floating-point state */
-
-    mov eax, 1;   /* return 1 from setjmp: pass 2 return */
-    jmp edx;      /* return from above setjmp call */
-  }
-}
-
-
-#elif defined(__i386__)
-/* Here is our own implementation of setjmp and longjmp for I386, via
-   GNU syntax. */
-
-#if defined(IS_LINUX)
-/* On Linux, the leading underscores are not implicitly added for C
-   function names. */
-#define cs_setjmp _cs_setjmp
-#define cs_longjmp _cs_longjmp
-#endif
-
-int cs_setjmp(cs_jmp_buf env);
-void cs_longjmp(cs_jmp_buf env);
-
-__asm__
-("_cs_setjmp:\n"
- "popl %edx\n"
- "popl %eax\n"
- "pushl %eax\n"
-
- "movl %ebx, 0(%eax)\n"
- "movl %edi, 4(%eax)\n"
- "movl %esi, 8(%eax)\n"
- "movl %ebp, 12(%eax)\n"
- "movl %esp, 16(%eax)\n"
- "movl %edx, 20(%eax)\n"
-
- "fnsave 24(%eax)\n"
-
- "xorl %eax, %eax\n"
- "jmp *%edx\n");
-
-__asm__
-("_cs_longjmp:\n"
- "popl %edx\n"
- "popl %eax\n"
-
- "movl 0(%eax), %ebx\n"
- "movl 4(%eax), %edi\n"
- "movl 8(%eax), %esi\n"
- "movl 12(%eax), %ebp\n"
- "movl 16(%eax), %esp\n"
- "movl 20(%eax), %edx\n"
-
- "frstor 24(%eax)\n"
-
- "mov $1,%eax\n"
- "jmp *%edx\n");
-
-#endif  /* __i386__ */
-
-/* Ideally, including setjmp.h would have defined JB_SP, which will
-   tell us where in the context structure we can muck with the stack
-   pointer.  If it didn't define this symbol, we have to guess it. */
-#ifndef CS_JB_SP
-
-#if defined(IS_OSX) && defined(__i386__)
-/* We have determined this value empirically, via test_setjmp.cxx in
-   this directory. */
-#define CS_JB_SP 9
-
-#endif
-
-#endif  /* CS_JB_SP */
-
-static struct ThreadContext *st_context;
-static unsigned char *st_stack;
-static size_t st_stack_size;
-static ContextFunction *st_thread_func;
-static void *st_data;
-
-static cs_jmp_buf orig_stack;
-
-/* We can't declare this function static--gcc might want to inline it
-   in that case, and then the code crashes.  I hope this doesn't mean
-   that the stack is still not getting restored correctly in the above
-   assembly code. */
-void
-setup_context_2(void) {
-  /* Here we are running on the new stack.  Copy the key data onto our
-     new stack. */
-  ContextFunction *volatile thread_func = st_thread_func;
-  void *volatile data = st_data;
-
-  if (cs_setjmp(st_context->_jmp_context) == 0) {
-    /* The _jmp_context is set up and ready to run.  Now restore the
-       original stack and return.  We can't simply return from this
-       function, since it might overwrite some of the stack data on
-       the way out. */
-    cs_longjmp(orig_stack);
-
-    /* Shouldn't get here. */
-    abort();
-  }
-
-  /* We come here the first time the thread starts. */
-  (*thread_func)(data);
-
-  /* We shouldn't get here, since we don't expect the thread_func to
-     return. */
-  abort();
-}
-
-static void
-setup_context_1(void) {
-  /* Save the current stack frame so we can return to it (at the end
-     of setup_context_2()). */
-  if (cs_setjmp(orig_stack) == 0) {
-    /* First, switch to the new stack.  Save the current context using
-       setjmp().  This saves out all of the processor register values,
-       though it doesn't muck with the stack. */
-    static cs_jmp_buf temp;
-    if (cs_setjmp(temp) == 0) {
-      /* This is the initial return from setjmp.  Still the original
-         stack. */
-
-      /* Now we overwrite the stack pointer value in the saved
-         register context.  This doesn't work with all implementations
-         of setjmp/longjmp. */
-
-      /* We give ourselves a small buffer of unused space at the top
-         of the stack, to allow for the stack frame and such that this
-         code might be assuming is there. */
-      (*(void **)&temp[CS_JB_SP]) = (st_stack + st_stack_size - 0x100);
-
-      /* And finally, we place ourselves on the new stack by using
-         longjmp() to reload the modified context. */
-      cs_longjmp(temp);
-
-      /* Shouldn't get here. */
-      abort();
-    }
-
-    /* This is the second return from setjmp.  Now we're on the new
-       stack. */
-    setup_context_2();
-
-    /* Shouldn't get here. */
-    abort();
-  }
-
-  /* By now we are back to the original stack. */
-}
-
-void
-init_thread_context(struct ThreadContext *context, 
-                    unsigned char *stack, size_t stack_size,
-                    ContextFunction *thread_func, void *data) {
-  /* Copy all of the input parameters to static variables, then begin
-     the stack-switching process. */
-  st_context = context;
-  st_stack = stack;
-  st_stack_size = stack_size;
-  st_thread_func = thread_func;
-  st_data = data;
-
-  setup_context_1();
-}  
-
-void
-save_thread_context(struct ThreadContext *context,
-                    ContextFunction *next_context, void *data) {
-  if (cs_setjmp(context->_jmp_context) != 0) {
-    /* We have just returned from longjmp.  In this case, return from
-       the function.  The stack is still good. */
-    return;
-  }
-
-  /* We are still in the calling thread.  In this case, we cannot
-     return from the function without damaging the stack.  Insted,
-     call next_context() and trust the caller to call
-     switch_to_thread_context() in there somewhere. */
-
-  (*next_context)(data);
-
-  /* We shouldn't get here. */
-  abort();
-}
-
-void
-switch_to_thread_context(struct ThreadContext *context) {
-  cs_longjmp(context->_jmp_context);
-
-  /* Shouldn't get here. */
-  abort();
-}
-
-#endif  /* PHAVE_UCONTEXT_H */
-
-struct ThreadContext *
-alloc_thread_context() {
-  struct ThreadContext *context =
-    (struct ThreadContext *)malloc(sizeof(struct ThreadContext));
-
-#if defined(__APPLE__) && defined(_DEBUG)
-  {
-    int p;
-    // Pre-fill the extra_padding with bytes that we can recognize
-    // later.
-    for (p = 0; p < EXTRA_PADDING_SIZE; ++p) {
-      context->_extra_padding[p] = (p & 0xff);
-    }
-  }
-#endif  // __APPLE__
-
-  return context;
-}
-
-void
-free_thread_context(struct ThreadContext *context) {
-#if defined(__APPLE__) && defined(_DEBUG)
-  {
-    // Because of the OSX 10.5 bug, we anticipate that the extra_padding
-    // may have been filled in with junk.  Confirm this.
-    int p = EXTRA_PADDING_SIZE;
-    while (p > 0) {
-      --p;
-      if (context->_extra_padding[p] != (char)(p & 0xff)) {
-        fprintf(stderr, "Context was mangled at byte %d: %d!\n", p, context->_extra_padding[p]);
-        break;
-      }
-    }
-  }
-#endif  // __APPLE__
-  free(context);
-}
-
 #endif  /* THREAD_SIMPLE_IMPL */
--- a/panda/src/pipeline/contextSwitch.h
+++ b/panda/src/pipeline/contextSwitch.h
@ -36,16 +36,21 @@ struct ThreadContext;
 extern "C" {
 #endif 

-typedef void ContextFunction(void *);
+typedef void ContextFunction(struct ThreadContext *from_context, void *);
+typedef void ThreadFunction(void *);

-/* Call this to fill in the appropriate values in context.  The stack
-   must already have been allocated.  The context will be initialized
-   so that when switch_to_thread_context() is called, it will begin
-   executing thread_func(data), which should not return.  This function
-   will return normally. */
+extern const int needs_stack_prealloc;
+
+/* Call this to fill in the appropriate values in context.  If
+   needs_stack_prealloc (above) is true, the stack must already have
+   been allocated; if needs_stack_prealloc is false, the stack pointer
+   is not used and may be NULL.  The context will be initialized so
+   that when switch_to_thread_context() is called, it will begin
+   executing thread_func(data), which should not return.  This
+   function will return normally. */
 void init_thread_context(struct ThreadContext *context, 
                         unsigned char *stack, size_t stack_size,
-                         ContextFunction *thread_func, void *data);
+                         ThreadFunction *thread_func, void *data);

 /* Call this to save the current thread context.  This function does
   not return until switch_to_thread_context() is called.  Instead it
@ -53,15 +58,19 @@ void init_thread_context(struct ThreadContext *context,
 void save_thread_context(struct ThreadContext *context,
                         ContextFunction *next_context, void *data);

-/* Call this to resume executing a previously saved context.  When
-   called, it will return from save_thread_context() in the saved
-   stack (or begin executing thread_func()). */
-void switch_to_thread_context(struct ThreadContext *context);
+/* Call this to resume executing a previously saved context.
+   from_context must be the currently-executing context, and
+   to_context is the context to resume.  When called, it will return
+   from save_thread_context() in the saved stack (or begin executing
+   thread_func()). */
+void switch_to_thread_context(struct ThreadContext *from_context,
+                              struct ThreadContext *to_context);

 /* Use this pair of functions to transparently allocate and destroy an
   opaque ThreadContext object of the appropriate size.  These
-   functions only allocate memory; they do not initialize the values
-   of the context (see init_thread_context(), above, for that). */
+   functions allocate memory, and initialize the context as
+   appropriate for the main thread.  See init_main_context() to finish
+   the initialization for a new thread. */
 struct ThreadContext *alloc_thread_context();
 void free_thread_context(struct ThreadContext *context);

--- a/panda/src/pipeline/contextSwitch_longjmp_src.c
+++ b/panda/src/pipeline/contextSwitch_longjmp_src.c
@ -0,0 +1,308 @@
+/* Filename: contextSwitch_longjmp_src.c
+ * Created by:  drose (15Apr10)
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/* This is the implementation of user-space context switching using
+   setmp() / longjmp().  This is the hackier implementation,
+   which is necessary if setcontext() is not available. */
+
+const int needs_stack_prealloc = 1;
+
+#if defined(_M_IX86) || defined(__i386__)
+/* Maybe we can implement our own setjmp/longjmp in assembly code.
+   This will be safer than the system version, since who knows what
+   that one's really doing? */
+
+typedef int cs_jmp_buf[33];
+
+#define CS_JB_SP 4
+
+#else
+
+/* Fall back to the system implementation of setjmp/longjmp. */
+#include <setjmp.h>
+
+typedef jmp_buf cs_jmp_buf;
+#define cs_setjmp setjmp
+#define cs_longjmp(buf) longjmp(buf, 1)
+
+#ifdef JB_SP
+#define CS_JB_SP JB_SP
+
+#elif defined(__ppc__)
+  /* This was determined experimentally through test_setjmp. */
+#define CS_JB_SP 0
+
+#endif
+
+#endif  /* __i386__ */
+
+struct ThreadContext {
+  cs_jmp_buf _jmp_context;
+};
+
+/* The approach is: hack our way onto the new stack pointer right now,
+   then call setjmp() to record that stack pointer in the
+   _jmp_context.  Then restore back to the original stack pointer. */
+
+#if defined(_M_IX86)
+/* Here is our own implementation of setjmp and longjmp for I386, via
+   Windows syntax. */
+
+/* warning C4731: frame pointer register 'ebp' modified by inline assembly code */
+#pragma warning(disable:4731)
+
+int
+cs_setjmp(cs_jmp_buf env) {
+  __asm {
+    pop ebp;  /* Restore the frame pointer that the compiler pushed */
+
+    pop edx;  /* edx = return address */
+    pop eax;  /* eax = &env */
+    push eax; /* keep &env on the stack; the caller will remove it */
+
+    mov [eax + 0], ebx;
+    mov [eax + 4], edi;
+    mov [eax + 8], esi;
+    mov [eax + 12], ebp;
+    mov [eax + 16], esp;
+    mov [eax + 20], edx;
+
+    fnsave [eax + 24];  /* save floating-point state */
+
+    xor eax,eax;  /* return 0: pass 1 return */
+    jmp edx;      /* this works like ret */
+  }
+}
+
+void
+cs_longjmp(cs_jmp_buf env) {
+  _asm {
+    mov eax, env;
+    
+    mov ebx, [eax + 0];
+    mov edi, [eax + 4];
+    mov esi, [eax + 8];
+    mov ebp, [eax + 12];
+    mov esp, [eax + 16];
+    mov edx, [eax + 20];
+    
+    frstor [eax + 24];  /* restore floating-point state */
+
+    mov eax, 1;   /* return 1 from setjmp: pass 2 return */
+    jmp edx;      /* return from above setjmp call */
+  }
+}
+
+
+#elif defined(__i386__)
+/* Here is our own implementation of setjmp and longjmp for I386, via
+   GNU syntax. */
+
+#if defined(IS_LINUX)
+/* On Linux, the leading underscores are not implicitly added for C
+   function names. */
+#define cs_setjmp _cs_setjmp
+#define cs_longjmp _cs_longjmp
+#endif
+
+int cs_setjmp(cs_jmp_buf env);
+void cs_longjmp(cs_jmp_buf env);
+
+__asm__
+("_cs_setjmp:\n"
+ "popl %edx\n"
+ "popl %eax\n"
+ "pushl %eax\n"
+
+ "movl %ebx, 0(%eax)\n"
+ "movl %edi, 4(%eax)\n"
+ "movl %esi, 8(%eax)\n"
+ "movl %ebp, 12(%eax)\n"
+ "movl %esp, 16(%eax)\n"
+ "movl %edx, 20(%eax)\n"
+
+ "fnsave 24(%eax)\n"
+
+ "xorl %eax, %eax\n"
+ "jmp *%edx\n");
+
+__asm__
+("_cs_longjmp:\n"
+ "popl %edx\n"
+ "popl %eax\n"
+
+ "movl 0(%eax), %ebx\n"
+ "movl 4(%eax), %edi\n"
+ "movl 8(%eax), %esi\n"
+ "movl 12(%eax), %ebp\n"
+ "movl 16(%eax), %esp\n"
+ "movl 20(%eax), %edx\n"
+
+ "frstor 24(%eax)\n"
+
+ "mov $1,%eax\n"
+ "jmp *%edx\n");
+
+#endif  /* __i386__ */
+
+/* Ideally, including setjmp.h would have defined JB_SP, which will
+   tell us where in the context structure we can muck with the stack
+   pointer.  If it didn't define this symbol, we have to guess it. */
+#ifndef CS_JB_SP
+
+#if defined(IS_OSX) && defined(__i386__)
+/* We have determined this value empirically, via test_setjmp.cxx in
+   this directory. */
+#define CS_JB_SP 9
+
+#endif
+
+#endif  /* CS_JB_SP */
+
+static struct ThreadContext *st_context;
+static unsigned char *st_stack;
+static size_t st_stack_size;
+static ThreadFunction *st_thread_func;
+static void *st_data;
+
+static cs_jmp_buf orig_stack;
+
+/* We can't declare this function static--gcc might want to inline it
+   in that case, and then the code crashes.  I hope this doesn't mean
+   that the stack is still not getting restored correctly in the above
+   assembly code. */
+void
+setup_context_2(void) {
+  /* Here we are running on the new stack.  Copy the key data onto our
+     new stack. */
+  ThreadFunction *volatile thread_func = st_thread_func;
+  void *volatile data = st_data;
+
+  if (cs_setjmp(st_context->_jmp_context) == 0) {
+    /* The _jmp_context is set up and ready to run.  Now restore the
+       original stack and return.  We can't simply return from this
+       function, since it might overwrite some of the stack data on
+       the way out. */
+    cs_longjmp(orig_stack);
+
+    /* Shouldn't get here. */
+    abort();
+  }
+
+  /* We come here the first time the thread starts. */
+  (*thread_func)(data);
+
+  /* We shouldn't get here, since we don't expect the thread_func to
+     return. */
+  abort();
+}
+
+static void
+setup_context_1(void) {
+  /* Save the current stack frame so we can return to it (at the end
+     of setup_context_2()). */
+  if (cs_setjmp(orig_stack) == 0) {
+    /* First, switch to the new stack.  Save the current context using
+       setjmp().  This saves out all of the processor register values,
+       though it doesn't muck with the stack. */
+    static cs_jmp_buf temp;
+    if (cs_setjmp(temp) == 0) {
+      /* This is the initial return from setjmp.  Still the original
+         stack. */
+
+      /* Now we overwrite the stack pointer value in the saved
+         register context.  This doesn't work with all implementations
+         of setjmp/longjmp. */
+
+      /* We give ourselves a small buffer of unused space at the top
+         of the stack, to allow for the stack frame and such that this
+         code might be assuming is there. */
+      (*(void **)&temp[CS_JB_SP]) = (st_stack + st_stack_size - 0x100);
+
+      /* And finally, we place ourselves on the new stack by using
+         longjmp() to reload the modified context. */
+      cs_longjmp(temp);
+
+      /* Shouldn't get here. */
+      abort();
+    }
+
+    /* This is the second return from setjmp.  Now we're on the new
+       stack. */
+    setup_context_2();
+
+    /* Shouldn't get here. */
+    abort();
+  }
+
+  /* By now we are back to the original stack. */
+}
+
+void
+init_thread_context(struct ThreadContext *context, 
+                    unsigned char *stack, size_t stack_size,
+                    ThreadFunction *thread_func, void *data) {
+  /* Copy all of the input parameters to static variables, then begin
+     the stack-switching process. */
+  st_context = context;
+  st_stack = stack;
+  st_stack_size = stack_size;
+  st_thread_func = thread_func;
+  st_data = data;
+
+  setup_context_1();
+}  
+
+void
+save_thread_context(struct ThreadContext *context,
+                    ContextFunction *next_context, void *data) {
+  if (cs_setjmp(context->_jmp_context) != 0) {
+    /* We have just returned from longjmp.  In this case, return from
+       the function.  The stack is still good. */
+    return;
+  }
+
+  /* We are still in the calling thread.  In this case, we cannot
+     return from the function without damaging the stack.  Insted,
+     call next_context() and trust the caller to call
+     switch_to_thread_context() in there somewhere. */
+
+  (*next_context)(context, data);
+
+  /* We shouldn't get here. */
+  abort();
+}
+
+void
+switch_to_thread_context(struct ThreadContext *from_context,
+                         struct ThreadContext *to_context) {
+  cs_longjmp(to_context->_jmp_context);
+
+  /* Shouldn't get here. */
+  abort();
+}
+
+struct ThreadContext *
+alloc_thread_context() {
+  struct ThreadContext *context =
+    (struct ThreadContext *)malloc(sizeof(struct ThreadContext));
+  memset(context, 0, sizeof(struct ThreadContext));
+
+  return context;
+}
+
+void
+free_thread_context(struct ThreadContext *context) {
+  free(context);
+}
--- a/panda/src/pipeline/contextSwitch_ucontext_src.c
+++ b/panda/src/pipeline/contextSwitch_ucontext_src.c
@ -0,0 +1,141 @@
+/* Filename: contextSwitch_ucontext_src.c
+ * Created by:  drose (15Apr10)
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/* This is the implementation of user-space context switching using
+   getcontext() / setcontext().  This is the preferred implementation,
+   if these library functions are available; that's what they are
+   designed for. */
+
+#ifdef __APPLE__
+#include <sys/ucontext.h>
+#else
+#include <ucontext.h>
+#endif
+
+const int needs_stack_prealloc = 1;
+
+struct ThreadContext {
+  ucontext_t _ucontext;
+#ifdef __APPLE__
+  // Due to a bug in OSX 10.5, the system ucontext_t declaration
+  // doesn't reserve enough space, and we need to reserve some
+  // additional space to make room.
+#define EXTRA_PADDING_SIZE 4096
+  char _extra_padding[EXTRA_PADDING_SIZE];
+#endif
+};
+
+static void
+begin_context(ThreadFunction *thread_func, void *data) {
+  (*thread_func)(data);
+}
+
+void
+init_thread_context(struct ThreadContext *context, 
+                    unsigned char *stack, size_t stack_size,
+                    ThreadFunction *thread_func, void *data) {
+  if (getcontext(&context->_ucontext) != 0) {
+    fprintf(stderr, "getcontext failed in init_thread_context!\n");
+    // Too bad for you.
+    abort();
+  }
+
+  context->_ucontext.uc_stack.ss_sp = stack;
+  context->_ucontext.uc_stack.ss_size = stack_size;
+  context->_ucontext.uc_stack.ss_flags = 0;
+  context->_ucontext.uc_link = NULL;
+
+  makecontext(&context->_ucontext, (void (*)())&begin_context, 2, thread_func, data);
+}
+
+void
+save_thread_context(struct ThreadContext *context,
+                    ContextFunction *next_context, void *data) {
+  /* getcontext requires us to use a volatile auto variable to
+     differentiate between pass 1 (immediate return) and pass 2
+     (return from setcontext). */
+  volatile int context_return = 0;
+
+  if (getcontext(&context->_ucontext) != 0) {
+    fprintf(stderr, "getcontext failed!\n");
+    // Nothing to do here.
+    abort();
+  }
+
+  if (context_return) {
+    /* We have just returned from setcontext.  In this case, return
+       from the function.  The stack is still good. */
+    return;
+  }
+
+  context_return = 1;
+
+  /* We are still in the calling thread.  In this case, we cannot
+     return from the function without damaging the stack.  Insted,
+     call next_context() and trust the caller to call
+     switch_to_thread_context() in there somewhere. */
+
+  (*next_context)(context, data);
+
+  /* We shouldn't get here. */
+  abort();
+}
+
+void
+switch_to_thread_context(struct ThreadContext *from_context,
+                         struct ThreadContext *to_context) {
+  setcontext(&to_context->_ucontext);
+
+  /* Shouldn't get here. */
+  abort();
+}
+
+struct ThreadContext *
+alloc_thread_context() {
+  struct ThreadContext *context =
+    (struct ThreadContext *)malloc(sizeof(struct ThreadContext));
+  memset(context, 0, sizeof(struct ThreadContext));
+
+#if defined(__APPLE__) && defined(_DEBUG)
+  {
+    int p;
+    // Pre-fill the extra_padding with bytes that we can recognize
+    // later.
+    for (p = 0; p < EXTRA_PADDING_SIZE; ++p) {
+      context->_extra_padding[p] = (p & 0xff);
+    }
+  }
+#endif  // __APPLE__
+
+  return context;
+}
+
+void
+free_thread_context(struct ThreadContext *context) {
+#if defined(__APPLE__) && defined(_DEBUG)
+  {
+    // Because of the OSX 10.5 bug, we anticipate that the extra_padding
+    // may have been filled in with junk.  Confirm this.
+    int p = EXTRA_PADDING_SIZE;
+    while (p > 0) {
+      --p;
+      if (context->_extra_padding[p] != (char)(p & 0xff)) {
+        fprintf(stderr, "Context was mangled at byte %d: %d!\n", p, context->_extra_padding[p]);
+        break;
+      }
+    }
+  }
+#endif  // __APPLE__
+  free(context);
+}
--- a/panda/src/pipeline/contextSwitch_windows_src.c
+++ b/panda/src/pipeline/contextSwitch_windows_src.c
@ -0,0 +1,153 @@
+/* Filename: contextSwitch_windows_src.c
+ * Created by:  drose (15Apr10)
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/* This is the implementation of user-space context switching using
+   native Windows threading constructs to manage the different
+   execution contexts.  This isn't strictly user-space, since we use
+   OS threading constructs, but we use a global lock to ensure that
+   only one thread at a time is active.  Thus, we still don't have to
+   defend the code against critical sections globally, so we still get
+   the low-overhead benefit of SIMPLE_THREADS; this is just a simple,
+   reliable way to manage context switches. */
+
+#include <windows.h>
+#include <setjmp.h>
+
+/* The Windows implementation doesn't use the stack pointer. */
+const int needs_stack_prealloc = 0;
+
+static struct ThreadContext *current_context = NULL;
+
+struct ThreadContext {
+  /* Each context is really its own thread. */
+  HANDLE _thread;
+
+  /* This event is in the signaled state when the thread is ready to
+     roll. */
+  HANDLE _ready;
+  
+  // This is set FALSE while the thread is alive, and TRUE if the
+  // thread is to be terminated when it next wakes up.
+  int _terminated;
+
+  /* These are preloaded with the startup parameters, then cleared to
+     NULL for subsequent runs. */
+  ThreadFunction *_thread_func;
+  void *_data;
+
+  /* We use setjmp()/longjmp() to manage the detail of returning from
+     save_thread_context() when we call switch_to_thread_context(). */
+  jmp_buf _jmp_context;
+};
+
+static DWORD WINAPI
+thread_main(LPVOID data) {
+  struct ThreadContext *context = (struct ThreadContext *)data;
+
+  // Wait for the thread to be awoken.
+  WaitForSingleObject(context->_ready, INFINITE);
+
+  if (context->_terminated) {
+    /* We've been rudely terminated.  Exit gracefully. */
+    ExitThread(1);
+  }
+
+  // Now we can begin.
+  (*context->_thread_func)(context->_data);
+
+  return 0;
+}
+
+void
+init_thread_context(struct ThreadContext *context, 
+                    unsigned char *stack, size_t stack_size,
+                    ThreadFunction *thread_func, void *data) {
+  context->_thread_func = thread_func;
+  context->_data = data;
+
+  context->_thread = CreateThread(NULL, stack_size, 
+                                  thread_main, context, 0, NULL);
+}
+
+void
+save_thread_context(struct ThreadContext *context,
+                    ContextFunction *next_context, void *data) {
+  /* Save the current context so we can return here when the thread is
+     awoken. */
+  if (setjmp(context->_jmp_context) != 0) {
+    /* We have just returned from longjmp.  In this case, return from
+       the function. */
+    return;
+  }
+
+  current_context = context;
+  (*next_context)(context, data);
+
+  /* Should not get here. */
+  assert(FALSE);
+  abort();
+}
+
+void
+switch_to_thread_context(struct ThreadContext *from_context,
+                         struct ThreadContext *to_context) {
+  /* Pause the current thread, and switch to the indicated context.
+     This function should not return. */
+  assert(from_context == current_context);
+
+  /* Wake up the target thread. */
+  SetEvent(to_context->_ready);
+
+  /* And now put the from thread to sleep until it is again awoken. */
+  WaitForSingleObject(from_context->_ready, INFINITE);
+
+  if (from_context->_terminated) {
+    /* We've been rudely terminated.  Exit gracefully. */
+    ExitThread(1);
+  }
+  
+  /* Now we have been signaled again, and we're ready to resume the
+     thread. */
+  longjmp(from_context->_jmp_context, 1);
+
+  /* Should not get here. */
+  assert(FALSE);
+  abort();
+}
+
+struct ThreadContext *
+alloc_thread_context() {
+  struct ThreadContext *context =
+    (struct ThreadContext *)malloc(sizeof(struct ThreadContext));
+
+  memset(context, 0, sizeof(struct ThreadContext));
+  context->_ready = CreateEvent(NULL, FALSE, FALSE, NULL);
+
+  return context;
+}
+
+void
+free_thread_context(struct ThreadContext *context) {
+  /* Make sure the thread wakes and exits gracefully. */
+  context->_terminated = TRUE;
+  SetEvent(context->_ready);
+  WaitForSingleObject(context->_thread, INFINITE);
+
+  CloseHandle(context->_ready);
+  if (context->_thread != NULL) {
+    CloseHandle(context->_thread);
+  }
+
+  free(context);
+}
--- a/panda/src/pipeline/threadSimpleImpl.I
+++ b/panda/src/pipeline/threadSimpleImpl.I
@ -23,6 +23,24 @@ get_current_thread() {
  return ThreadSimpleManager::get_global_ptr()->get_current_thread()->_parent_obj;
 }

+////////////////////////////////////////////////////////////////////
+//     Function: ThreadSimpleImpl::is_same_system_thread
+//       Access: Public
+//  Description: Returns true if we are still running within the same
+//               OS-level thread that this thread begin in, or false
+//               if this appears to be running in a different thread.
+////////////////////////////////////////////////////////////////////
+INLINE bool ThreadSimpleImpl::
+is_same_system_thread() const {
+#ifdef HAVE_POSIX_THREADS
+  return pthread_equal(_posix_system_thread_id, pthread_self());
+#endif
+#ifdef WIN32
+  return (_win32_system_thread_id == GetCurrentThreadId());
+#endif
+  return true;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: ThreadSimpleImpl::bind_thread
 //       Access: Public, Static
--- a/panda/src/pipeline/threadSimpleImpl.cxx
+++ b/panda/src/pipeline/threadSimpleImpl.cxx
@ -51,6 +51,13 @@ ThreadSimpleImpl(Thread *parent_obj) :

  // Save this pointer for convenience.
  _manager = ThreadSimpleManager::get_global_ptr();
+
+#ifdef HAVE_POSIX_THREADS
+  _posix_system_thread_id = -1;
+#endif
+#ifdef WIN32
+  _win32_system_thread_id = 0;
+#endif
 }

 ////////////////////////////////////////////////////////////////////
@ -87,6 +94,13 @@ setup_main_thread() {
  _priority = TP_normal;
  _priority_weight = _manager->_simple_thread_normal_weight;

+#ifdef HAVE_POSIX_THREADS
+  _posix_system_thread_id = pthread_self();
+#endif
+#ifdef WIN32
+  _win32_system_thread_id = GetCurrentThreadId();
+#endif
+
  _manager->set_current_thread(this);
 }

@ -105,7 +119,9 @@ start(ThreadPriority priority, bool joinable) {

  nassertr(_stack == NULL, false);
  _stack_size = memory_hook->round_up_to_page_size((size_t)thread_stack_size);
-  _stack = (unsigned char *)memory_hook->mmap_alloc(_stack_size, true);
+  if (needs_stack_prealloc) {
+    _stack = (unsigned char *)memory_hook->mmap_alloc(_stack_size, true);
+  }

  _joinable = joinable;
  _status = TS_running;
@ -254,6 +270,13 @@ begin_thread() {
  PyThreadState_Swap(_python_state);
 #endif  // HAVE_PYTHON

+#ifdef HAVE_POSIX_THREADS
+  _posix_system_thread_id = pthread_self();
+#endif
+#ifdef WIN32
+  _win32_system_thread_id = GetCurrentThreadId();
+#endif
+
  // Here we are executing within the thread.  Run the thread_main
  // function defined for this thread.
  _parent_obj->thread_main();
--- a/panda/src/pipeline/threadSimpleImpl.h
+++ b/panda/src/pipeline/threadSimpleImpl.h
@ -73,6 +73,8 @@ public:
  static void prepare_for_exit();

  INLINE static Thread *get_current_thread();
+  INLINE bool is_same_system_thread() const;
+
  INLINE static void bind_thread(Thread *thread);
  INLINE static bool is_threading_supported();
  INLINE static bool is_true_threads();
@ -142,6 +144,20 @@ private:
  ThreadSimpleManager *_manager;
  static ThreadSimpleImpl *volatile _st_this;

+  // We may not mix-and-match OS threads with Panda's SIMPLE_THREADS.
+  // If we ever get a Panda context switch request from a different OS
+  // thread than the thread we think we should be in, that's a serious
+  // error that may cause major consequences.  For this reason, we
+  // store the OS thread's current thread ID here when the thread is
+  // constructed, and insist that it never changes during the lifetime
+  // of the thread.
+#ifdef HAVE_POSIX_THREADS
+  pthread_t _posix_system_thread_id;
+#endif
+#ifdef WIN32
+  DWORD _win32_system_thread_id;
+#endif
+
  friend class ThreadSimpleManager;
 };

--- a/panda/src/pipeline/threadSimpleManager.I
+++ b/panda/src/pipeline/threadSimpleManager.I
@ -27,19 +27,13 @@ get_current_thread() {
 //     Function: ThreadSimpleManager::is_same_system_thread
 //       Access: Public
 //  Description: Returns true if we are still running within the same
-//               OS-level thread that created the ThreadSimpleManager,
-//               or false if this appears to be running in a different
+//               OS-level thread we think we should be running in, or
+//               false if this appears to be running in a different
 //               thread.
 ////////////////////////////////////////////////////////////////////
 INLINE bool ThreadSimpleManager::
 is_same_system_thread() const {
-#ifdef HAVE_POSIX_THREADS
-  return pthread_equal(_posix_system_thread_id, pthread_self());
-#endif
-#ifdef WIN32
-  return (_win32_system_thread_id == GetCurrentThreadId());
-#endif
-  return true;
+  return _current_thread->is_same_system_thread();
 }

 ////////////////////////////////////////////////////////////////////
--- a/panda/src/pipeline/threadSimpleManager.cxx
+++ b/panda/src/pipeline/threadSimpleManager.cxx
@ -78,13 +78,6 @@ ThreadSimpleManager() :
  _clock = TrueClock::get_global_ptr();
  _waiting_for_exit = NULL;

-#ifdef HAVE_POSIX_THREADS
-  _posix_system_thread_id = pthread_self();
-#endif
-#ifdef WIN32
-  _win32_system_thread_id = GetCurrentThreadId();
-#endif
-
  // Install these global pointers so very low-level code (code
  // defined before the pipeline directory) can yield when necessary.
  global_thread_yield = &Thread::force_yield;
@ -542,9 +535,9 @@ init_pointers() {
 //               of next_context().
 ////////////////////////////////////////////////////////////////////
 void ThreadSimpleManager::
-st_choose_next_context(void *data) {
+st_choose_next_context(struct ThreadContext *from_context, void *data) {
  ThreadSimpleManager *self = (ThreadSimpleManager *)data;
-  self->choose_next_context();
+  self->choose_next_context(from_context);
 }

 ////////////////////////////////////////////////////////////////////
@ -554,7 +547,7 @@ st_choose_next_context(void *data) {
 //               of next_context().
 ////////////////////////////////////////////////////////////////////
 void ThreadSimpleManager::
-choose_next_context() {
+choose_next_context(struct ThreadContext *from_context) {
  double now = get_current_time();

  do_timeslice_accounting(_current_thread, now);
@ -696,7 +689,7 @@ choose_next_context() {
      << " blocked, " << _sleeping.size() << " sleeping)\n";
  }

-  switch_to_thread_context(_current_thread->_context);
+  switch_to_thread_context(from_context, _current_thread->_context);

  // Shouldn't get here.
  nassertv(false);
--- a/panda/src/pipeline/threadSimpleManager.h
+++ b/panda/src/pipeline/threadSimpleManager.h
@ -38,6 +38,7 @@
 class Thread;
 class ThreadSimpleImpl;
 class BlockerSimple;
+struct ThreadContext;

 ////////////////////////////////////////////////////////////////////
 //       Class : ThreadSimpleManager
@ -87,8 +88,8 @@ private:
  typedef pdeque<ThreadSimpleImpl *> FifoThreads;
  typedef pvector<ThreadSimpleImpl *> Sleeping;

-  static void st_choose_next_context(void *data);
-  void choose_next_context();
+  static void st_choose_next_context(struct ThreadContext *from_context, void *data);
+  void choose_next_context(struct ThreadContext *from_context);
  void do_timeslice_accounting(ThreadSimpleImpl *thread, double now);
  void wake_sleepers(Sleeping &sleepers, double now);
  void wake_all_sleepers(Sleeping &sleepers);
@ -159,19 +160,6 @@ private:
  TickRecords _tick_records;
  unsigned int _total_ticks;

-  // We may not mix-and-match OS threads with Panda's SIMPLE_THREADS.
-  // If we ever get a Panda context switch request from a different OS
-  // thread than the original thread, that's a serious error that may
-  // cause major consequences.  For this reason, we store the OS
-  // thread's current thread ID here when the manager is constructed,
-  // and insist that it never changes.
-#ifdef HAVE_POSIX_THREADS
-  pthread_t _posix_system_thread_id;
-#endif
-#ifdef WIN32
-  DWORD _win32_system_thread_id;
-#endif
-
  static bool _pointers_initialized;
  static ThreadSimpleManager *_global_ptr;
 };