found a reason to use dlmalloc if 16-byte alignment is required

2025-09-30 08:44:19 -04:00 · 2011-12-22 22:44:31 +00:00 · 2011-12-22 22:44:31 +00:00 · a06fd5521f
commit a06fd5521f
parent 3138076bc4
6 changed files with 36 additions and 8 deletions
--- a/dtool/Config.pp
+++ b/dtool/Config.pp
@ -388,6 +388,13 @@
 #define USE_MEMORY_DLMALLOC
 #define USE_MEMORY_PTMALLOC2

+// Set this true if you prefer to use the system malloc library even
+// if 16-byte alignment must be performed on top of it, wasting up to
+// 30% of memory usage.  If you do not set this, and 16-byte alignment
+// is required and not provided by the system malloc library, then an
+// alternative malloc system (above) will be used instead.
+#define MEMORY_HOOK_DO_ALIGN
+
 // Panda contains some experimental code to compile for IPhone.  This
 // requires the Apple IPhone SDK, which is currently only available
 // for OS X platforms.  Set this to either "iPhoneSimulator" or
--- a/dtool/src/dtoolbase/dtoolbase.h
+++ b/dtool/src/dtoolbase/dtoolbase.h
@ -360,9 +360,15 @@
 // alignment.
 #undef MEMORY_HOOK_DO_ALIGN

+#elif defined(MEMORY_HOOK_DO_ALIGN)
+// We need memory alignment, and we're willing to provide it ourselves.
+
 #else
-// Whoops, we need memory alignment, and we have to provide it ourselves.
-#define MEMORY_HOOK_DO_ALIGN 1
+// We need memory alignment, and we haven't specified whether it
+// should be provided on top of the existing malloc library, or
+// otherwise.  Let's rely on dlmalloc to provide it, it seems to be
+// the most memory-efficient option.
+#define USE_MEMORY_DLMALLOC 1

 #endif

--- a/dtool/src/dtoolutil/pandaSystem.cxx
+++ b/dtool/src/dtoolutil/pandaSystem.cxx
@ -44,12 +44,20 @@ PandaSystem() :

 #ifdef HAVE_EIGEN
  add_system("eigen");
-#ifdef LINMATH_VECTORIZE
+#ifdef LINMATH_ALIGN
  set_system_tag("eigen", "vectorize", "1");
 #else
  set_system_tag("eigen", "vectorize", "0");
 #endif
 #endif  // HAVE_EIGEN
+
+#ifdef USE_MEMORY_DLMALLOC
+  set_system_tag("system", "malloc", "dlmalloc");
+#elif defined(USE_MEMORY_PTMALLOC2)
+  set_system_tag("system", "malloc", "ptmalloc2");
+#else
+  set_system_tag("system", "malloc", "malloc");
+#endif
 }

 ////////////////////////////////////////////////////////////////////
--- a/makepanda/confauto.in
+++ b/makepanda/confauto.in
@ -104,7 +104,9 @@ cull-bin gui-popup 60 unsorted
 # The following two lines are a fix for flaky hardware clocks.
 # Nowadays, lock-to-one-cpu is probably more trouble than it's worth.
 #lock-to-one-cpu #t
-paranoid-clock 1
+
+# So is paranoid-clock.
+#paranoid-clock 1

 # This default only comes into play if you try to load a model
 # and don't specify an extension.
--- a/makepanda/makepanda.py
+++ b/makepanda/makepanda.py
@ -814,6 +814,8 @@ def CompileCxx(obj,src,opts):
                cmd += "/DPANDA_WIN7 /DWINVER=0x601 "

        cmd += "/Fo" + obj + " /nologo /c"
+        if (not is_64):
+            cmd += " /arch:SSE2"
        for x in ipath: cmd += " /I" + x
        for (opt,dir) in INCDIRECTORIES:
            if (opt=="ALWAYS") or (opt in opts): cmd += " /I" + BracketNameWithQuotes(dir)
@ -823,10 +825,10 @@ def CompileCxx(obj,src,opts):
        if (opts.count('MSFORSCOPE')): cmd += ' /Zc:forScope-'
        optlevel = GetOptimizeOption(opts)
        if (optlevel==1): cmd += " /MDd /Zi /RTCs /GS"
-        if (optlevel==2): cmd += " /MDd /Zi /arch:SSE2"
-        if (optlevel==3): cmd += " /MD /Zi /O2 /Ob2 /Oi /Ot /arch:SSE2 /fp:fast /DFORCE_INLINING"
+        if (optlevel==2): cmd += " /MDd /Zi"
+        if (optlevel==3): cmd += " /MD /Zi /O2 /Ob2 /Oi /Ot /fp:fast /DFORCE_INLINING"
        if (optlevel==4): 
-           cmd += " /MD /Zi /Ox /Ob2 /Oi /Ot /arch:SSE2 /fp:fast /DFORCE_INLINING /DNDEBUG /GL"
+           cmd += " /MD /Zi /Ox /Ob2 /Oi /Ot /fp:fast /DFORCE_INLINING /DNDEBUG /GL"
           cmd += " /Oy"                # jcr add
           cmd += " /Zp16"              # jcr add # Is this necessary with /Ox?

@ -1533,6 +1535,9 @@ DTOOL_CONFIG=[
    ("GLOBAL_OPERATOR_NEW_EXCEPTIONS", 'UNDEF',                  '1'),
    ("HAVE_EIGEN",                     'UNDEF',                  'UNDEF'),
    ("LINMATH_ALIGN",                  '1',                      '1'),
+    ("MEMORY_HOOK_DO_ALIGN",           'UNDEF',                  'UNDEF'),
+    ("USE_MEMORY_DLMALLOC",            'UNDEF',                  'UNDEF'),
+    ("USE_MEMORY_PTMALLOC2",           'UNDEF',                  'UNDEF'),
    ("HAVE_ZLIB",                      'UNDEF',                  'UNDEF'),
    ("HAVE_PNG",                       'UNDEF',                  'UNDEF'),
    ("HAVE_JPEG",                      'UNDEF',                  'UNDEF'),
--- a/panda/src/putil/clockObject.cxx
+++ b/panda/src/putil/clockObject.cxx
@ -31,7 +31,7 @@ TypeHandle ClockObject::_type_handle;
 //  Description:
 ////////////////////////////////////////////////////////////////////
 ClockObject::
-ClockObject() {
+ClockObject() : _ticks(get_class_type()) {
  _true_clock = TrueClock::get_global_ptr();

  // Each clock except for the application global clock is created in