diff --git a/dtool/src/dtoolbase/deletedChain.h b/dtool/src/dtoolbase/deletedChain.h index 17a723a89b..a25be5d429 100644 --- a/dtool/src/dtoolbase/deletedChain.h +++ b/dtool/src/dtoolbase/deletedChain.h @@ -77,7 +77,7 @@ public: // Place this macro within a class definition to define appropriate operator // new and delete methods that take advantage of DeletedChain. #define ALLOC_DELETED_CHAIN(Type) \ - inline void *operator new(size_t size) { \ + inline void *operator new(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \ return (void *)StaticDeletedChain< Type >::allocate(size, get_type_handle(Type)); \ } \ inline void *operator new(size_t size, void *ptr) { \ @@ -96,7 +96,7 @@ public: // Use this variant of the above macro in cases in which the compiler fails to // unify the static template pointers properly, to prevent leaks. #define ALLOC_DELETED_CHAIN_DECL(Type) \ - inline void *operator new(size_t size) { \ + inline void *operator new(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \ return (void *)_deleted_chain.allocate(size, get_type_handle(Type)); \ } \ inline void *operator new(size_t size, void *ptr) { \ diff --git a/dtool/src/dtoolbase/dtoolbase.h b/dtool/src/dtoolbase/dtoolbase.h index 8dd2ec3cf6..965069614e 100644 --- a/dtool/src/dtoolbase/dtoolbase.h +++ b/dtool/src/dtoolbase/dtoolbase.h @@ -76,6 +76,10 @@ #define __has_builtin(x) 0 #endif +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + // Use NODEFAULT to optimize a switch() stmt to tell MSVC to automatically go // to the final untested case after it has failed all the other cases (i.e. // 'assume at least one of the cases is always true') @@ -96,6 +100,12 @@ #define ASSUME_ALIGNED(x, y) (x) #endif +#if __has_attribute(assume_aligned) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9) +#define RETURNS_ALIGNED(x) __attribute__((assume_aligned(x))) +#else +#define RETURNS_ALIGNED(x) +#endif + /* include win32 defns for everything up to WinServer2003, and assume I'm smart enough to use GetProcAddress for backward compat on @@ -400,6 +410,35 @@ typedef struct _object PyObject; #endif +#ifdef LINMATH_ALIGN +/* We require 16-byte alignment of certain structures, to support SSE2. We + don't strictly have to align everything, but it's just easier to do so. */ +#if defined(HAVE_EIGEN) && defined(__AVX__) && defined(STDFLOAT_DOUBLE) +/* Eigen uses AVX instructions, but let's only enable this when compiling with + double precision, so that we can keep our ABI a bit more stable. */ +#define MEMORY_HOOK_ALIGNMENT 32 +#else +#define MEMORY_HOOK_ALIGNMENT 16 +#endif +/* Otherwise, align to two words. This seems to be pretty standard to the + point where some code may rely on this being the case. */ +#elif defined(IS_OSX) || NATIVE_WORDSIZE >= 64 +#define MEMORY_HOOK_ALIGNMENT 16 +#else +#define MEMORY_HOOK_ALIGNMENT 8 +#endif + +#ifdef HAVE_EIGEN +/* Make sure that Eigen doesn't assume alignment guarantees we don't offer. */ +#define EIGEN_MAX_ALIGN_BYTES MEMORY_HOOK_ALIGNMENT +#ifndef EIGEN_MPL2_ONLY +#define EIGEN_MPL2_ONLY 1 +#endif +#if !defined(_DEBUG) && !defined(EIGEN_NO_DEBUG) +#define EIGEN_NO_DEBUG 1 +#endif +#endif + /* Determine our memory-allocation requirements. */ #if defined(USE_MEMORY_PTMALLOC2) || defined(USE_MEMORY_DLMALLOC) || defined(DO_MEMORY_USAGE) || defined(MEMORY_HOOK_DO_ALIGN) /* In this case we have some custom memory management requirements. */ diff --git a/dtool/src/dtoolbase/memoryBase.h b/dtool/src/dtoolbase/memoryBase.h index 2f361f1bb1..6d755e695b 100644 --- a/dtool/src/dtoolbase/memoryBase.h +++ b/dtool/src/dtoolbase/memoryBase.h @@ -26,7 +26,7 @@ #ifndef USE_MEMORY_NOWRAPPERS #define ALLOC_MEMORY_BASE \ - inline void *operator new(size_t size) { \ + inline void *operator new(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \ return PANDA_MALLOC_SINGLE(size); \ } \ inline void *operator new(size_t size, void *ptr) { \ @@ -38,7 +38,7 @@ } \ inline void operator delete(void *, void *) { \ } \ - inline void *operator new[](size_t size) { \ + inline void *operator new[](size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \ return PANDA_MALLOC_ARRAY(size); \ } \ inline void *operator new[](size_t size, void *ptr) { \ diff --git a/dtool/src/dtoolbase/memoryHook.I b/dtool/src/dtoolbase/memoryHook.I index c89fe1908b..bd5a55af79 100644 --- a/dtool/src/dtoolbase/memoryHook.I +++ b/dtool/src/dtoolbase/memoryHook.I @@ -63,14 +63,24 @@ round_up_to_page_size(size_t size) const { /** * Given a pointer that was returned by a MemoryHook allocation, returns the - * number of bytes that were allocated for it. Returns 0 if not compiling - * with DO_MEMORY_USAGE. + * number of bytes that were allocated for it. This may be slightly larger + * than the number of bytes requested. + * The behavior of this function is undefined if the given pointer was not + * returned by the MemoryHook allocator or was already freed. + * May return 0 if not compiling with DO_MEMORY_USAGE. + * + * This is only defined publicly so TypeHandle can get at it; it really + * shouldn't be used outside of dtoolbase. */ INLINE size_t MemoryHook:: get_ptr_size(void *ptr) { #if defined(MEMORY_HOOK_DO_ALIGN) uintptr_t *root = (uintptr_t *)ptr; return (size_t)root[-2]; +#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) + // If we are using dlmalloc, we know how it stores the size. + size_t *root = (size_t *)ptr; + return (root[-1] & ~0x7) - sizeof(size_t); #elif defined(DO_MEMORY_USAGE) size_t *root = (size_t *)((char *)ptr - MEMORY_HOOK_ALIGNMENT); return *root; @@ -78,68 +88,3 @@ get_ptr_size(void *ptr) { return 0; #endif // DO_MEMORY_USAGE } - -/** - * Increments the amount of requested size as necessary to accommodate the - * extra data we might piggyback on each allocated block. - */ -INLINE size_t MemoryHook:: -inflate_size(size_t size) { -#if defined(MEMORY_HOOK_DO_ALIGN) - // If we're aligning, we need to request the header size, plus extra bytes - // to give us wiggle room to adjust the pointer. - return size + sizeof(uintptr_t) * 2 + MEMORY_HOOK_ALIGNMENT - 1; -#elif defined(DO_MEMORY_USAGE) - // If we're not aligning, but we're tracking memory allocations, we just - // need the header size extra (this gives us a place to store the size of - // the allocated block). However, we do need to make sure that any - // alignment guarantee is kept. - return size + MEMORY_HOOK_ALIGNMENT; -#else - // If we're not doing any of that, we can just allocate the precise - // requested amount. - return size; -#endif // DO_MEMORY_USAGE -} - -/** - * Converts an allocated pointer to a pointer returnable to the application. - * Stuffs size in the first n bytes of the allocated space. - */ -INLINE void *MemoryHook:: -alloc_to_ptr(void *alloc, size_t size) { -#if defined(MEMORY_HOOK_DO_ALIGN) - // Add room for two uintptr_t values. - uintptr_t *root = (uintptr_t *)((char *)alloc + sizeof(uintptr_t) * 2); - // Align this to the requested boundary. - root = (uintptr_t *)(((uintptr_t)root + MEMORY_HOOK_ALIGNMENT - 1) & ~(MEMORY_HOOK_ALIGNMENT - 1)); - root[-2] = size; - root[-1] = (uintptr_t)alloc; // Save the pointer we originally allocated. - return (void *)root; -#elif defined(DO_MEMORY_USAGE) - size_t *root = (size_t *)alloc; - root[0] = size; - return (void *)((char *)root + MEMORY_HOOK_ALIGNMENT); -#else - return alloc; -#endif // DO_MEMORY_USAGE -} - -/** - * Converts an application pointer back to the original allocated pointer. - * Extracts size from the first n bytes of the allocated space. - */ -INLINE void *MemoryHook:: -ptr_to_alloc(void *ptr, size_t &size) { -#if defined(MEMORY_HOOK_DO_ALIGN) - uintptr_t *root = (uintptr_t *)ptr; - size = root[-2]; - return (void *)root[-1]; // Get the pointer we originally allocated. -#elif defined(DO_MEMORY_USAGE) - size_t *root = (size_t *)((char *)ptr - MEMORY_HOOK_ALIGNMENT); - size = root[0]; - return (void *)root; -#else - return ptr; -#endif // DO_MEMORY_USAGE -} diff --git a/dtool/src/dtoolbase/memoryHook.cxx b/dtool/src/dtoolbase/memoryHook.cxx index 506ff51005..224fd3107b 100644 --- a/dtool/src/dtoolbase/memoryHook.cxx +++ b/dtool/src/dtoolbase/memoryHook.cxx @@ -14,6 +14,7 @@ #include "memoryHook.h" #include "deletedBufferChain.h" #include +#include "typeRegistry.h" #ifdef WIN32 @@ -104,6 +105,83 @@ static_assert((MEMORY_HOOK_ALIGNMENT & (MEMORY_HOOK_ALIGNMENT - 1)) == 0, #endif // USE_MEMORY_* +/** + * Increments the amount of requested size as necessary to accommodate the + * extra data we might piggyback on each allocated block. + */ +INLINE static size_t +inflate_size(size_t size) { +#if defined(MEMORY_HOOK_DO_ALIGN) + // If we're aligning, we need to request the header size, plus extra bytes + // to give us wiggle room to adjust the pointer. + return size + sizeof(uintptr_t) * 2 + MEMORY_HOOK_ALIGNMENT - 1; +#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) + // If we are can access the allocator's bookkeeping to figure out how many + // bytes were allocated, we don't need to add our own information. + return size; +#elif defined(DO_MEMORY_USAGE) + // If we're not aligning, but we're tracking memory allocations, we just + // need the header size extra (this gives us a place to store the size of + // the allocated block). However, we do need to make sure that any + // alignment guarantee is kept. + return size + MEMORY_HOOK_ALIGNMENT; +#else + // If we're not doing any of that, we can just allocate the precise + // requested amount. + return size; +#endif // DO_MEMORY_USAGE +} + +/** + * Converts an allocated pointer to a pointer returnable to the application. + * Stuffs size in the first n bytes of the allocated space. + */ +INLINE static void * +alloc_to_ptr(void *alloc, size_t size) { +#if defined(MEMORY_HOOK_DO_ALIGN) + // Add room for two uintptr_t values. + uintptr_t *root = (uintptr_t *)((char *)alloc + sizeof(uintptr_t) * 2); + // Align this to the requested boundary. + root = (uintptr_t *)(((uintptr_t)root + MEMORY_HOOK_ALIGNMENT - 1) & ~(MEMORY_HOOK_ALIGNMENT - 1)); + root[-2] = size; + root[-1] = (uintptr_t)alloc; // Save the pointer we originally allocated. + return (void *)root; +#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) + return alloc; +#elif defined(DO_MEMORY_USAGE) + size_t *root = (size_t *)alloc; + root[0] = size; + return (void *)((char *)root + MEMORY_HOOK_ALIGNMENT); +#else + return alloc; +#endif // DO_MEMORY_USAGE +} + +/** + * Converts an application pointer back to the original allocated pointer. + * Extracts size from the first n bytes of the allocated space, but only if + * DO_MEMORY_USAGE is defined. + */ +INLINE static void * +ptr_to_alloc(void *ptr, size_t &size) { +#if defined(MEMORY_HOOK_DO_ALIGN) + uintptr_t *root = (uintptr_t *)ptr; + size = root[-2]; + return (void *)root[-1]; // Get the pointer we originally allocated. +#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) +#ifdef DO_MEMORY_USAGE + size = MemoryHook::get_ptr_size(ptr); +#endif + return ptr; +#elif defined(DO_MEMORY_USAGE) + size_t *root = (size_t *)((char *)ptr - MEMORY_HOOK_ALIGNMENT); + size = root[0]; + return (void *)root; +#else + return ptr; +#endif // DO_MEMORY_USAGE +} + /** * */ @@ -195,6 +273,11 @@ heap_alloc_single(size_t size) { #ifdef DO_MEMORY_USAGE // In the DO_MEMORY_USAGE case, we want to track the total size of allocated // bytes on the heap. +#if defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) + // dlmalloc may slightly overallocate, however. + size = get_ptr_size(alloc); + inflated_size = size; +#endif AtomicAdjust::add(_total_heap_single_size, (AtomicAdjust::Integer)size); if ((size_t)AtomicAdjust::get(_total_heap_single_size) + (size_t)AtomicAdjust::get(_total_heap_array_size) > @@ -204,8 +287,10 @@ heap_alloc_single(size_t size) { #endif // DO_MEMORY_USAGE void *ptr = alloc_to_ptr(alloc, size); +#ifdef _DEBUG assert(((uintptr_t)ptr % MEMORY_HOOK_ALIGNMENT) == 0); assert(ptr >= alloc && (char *)ptr + size <= (char *)alloc + inflated_size); +#endif return ptr; } @@ -265,6 +350,11 @@ heap_alloc_array(size_t size) { #ifdef DO_MEMORY_USAGE // In the DO_MEMORY_USAGE case, we want to track the total size of allocated // bytes on the heap. +#if defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) + // dlmalloc may slightly overallocate, however. + size = get_ptr_size(alloc); + inflated_size = size; +#endif AtomicAdjust::add(_total_heap_array_size, (AtomicAdjust::Integer)size); if ((size_t)AtomicAdjust::get(_total_heap_single_size) + (size_t)AtomicAdjust::get(_total_heap_array_size) > @@ -274,8 +364,10 @@ heap_alloc_array(size_t size) { #endif // DO_MEMORY_USAGE void *ptr = alloc_to_ptr(alloc, size); +#ifdef _DEBUG assert(((uintptr_t)ptr % MEMORY_HOOK_ALIGNMENT) == 0); assert(ptr >= alloc && (char *)ptr + size <= (char *)alloc + inflated_size); +#endif return ptr; } @@ -287,11 +379,6 @@ heap_realloc_array(void *ptr, size_t size) { size_t orig_size; void *alloc = ptr_to_alloc(ptr, orig_size); -#ifdef DO_MEMORY_USAGE - assert((AtomicAdjust::Integer)orig_size <= _total_heap_array_size); - AtomicAdjust::add(_total_heap_array_size, (AtomicAdjust::Integer)size-(AtomicAdjust::Integer)orig_size); -#endif // DO_MEMORY_USAGE - size_t inflated_size = inflate_size(size); void *alloc1 = alloc; @@ -318,6 +405,16 @@ heap_realloc_array(void *ptr, size_t size) { #endif } +#ifdef DO_MEMORY_USAGE +#if defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2) + // dlmalloc may slightly overallocate, however. + size = get_ptr_size(alloc1); + inflated_size = size; +#endif + assert((AtomicAdjust::Integer)orig_size <= _total_heap_array_size); + AtomicAdjust::add(_total_heap_array_size, (AtomicAdjust::Integer)size-(AtomicAdjust::Integer)orig_size); +#endif // DO_MEMORY_USAGE + // Align this to the requested boundary. #ifdef MEMORY_HOOK_DO_ALIGN // This copies the code from alloc_to_ptr, since we can't write the size and @@ -337,8 +434,11 @@ heap_realloc_array(void *ptr, size_t size) { #else void *ptr1 = alloc_to_ptr(alloc1, size); #endif + +#ifdef _DEBUG assert(ptr1 >= alloc1 && (char *)ptr1 + size <= (char *)alloc1 + inflated_size); assert(((uintptr_t)ptr1 % MEMORY_HOOK_ALIGNMENT) == 0); +#endif return ptr1; } diff --git a/dtool/src/dtoolbase/memoryHook.h b/dtool/src/dtoolbase/memoryHook.h index b8eac26f54..687e348aef 100644 --- a/dtool/src/dtoolbase/memoryHook.h +++ b/dtool/src/dtoolbase/memoryHook.h @@ -20,22 +20,6 @@ #include "mutexImpl.h" #include -#ifdef LINMATH_ALIGN -// We require 16-byte alignment of certain structures, to support SSE2. We -// don't strictly have to align *everything*, but it's just easier to do so. -#ifdef __AVX__ -#define MEMORY_HOOK_ALIGNMENT 32 -#else -#define MEMORY_HOOK_ALIGNMENT 16 -#endif -// Otherwise, align to two words. This seems to be pretty standard to the -// point where some code may rely on this being the case. -#elif defined(IS_OSX) || NATIVE_WORDSIZE >= 64 -#define MEMORY_HOOK_ALIGNMENT 16 -#else -#define MEMORY_HOOK_ALIGNMENT 8 -#endif - class DeletedBufferChain; /** @@ -83,11 +67,6 @@ public: INLINE static size_t get_ptr_size(void *ptr); -private: - INLINE static size_t inflate_size(size_t size); - INLINE static void *alloc_to_ptr(void *alloc, size_t size); - INLINE static void *ptr_to_alloc(void *ptr, size_t &size); - #ifdef DO_MEMORY_USAGE protected: TVOLATILE AtomicAdjust::Integer _total_heap_single_size; diff --git a/dtool/src/dtoolbase/pallocator.T b/dtool/src/dtoolbase/pallocator.T index 38bc87135c..e9bfd3f98e 100644 --- a/dtool/src/dtoolbase/pallocator.T +++ b/dtool/src/dtoolbase/pallocator.T @@ -19,7 +19,7 @@ pallocator_single(TypeHandle type_handle) NOEXCEPT : } template -INLINE TYPENAME pallocator_single::pointer pallocator_single:: +INLINE Type *pallocator_single:: allocate(TYPENAME pallocator_single::size_type n, TYPENAME allocator::const_pointer) { TAU_PROFILE("pallocator_single:allocate()", " ", TAU_USER); // This doesn't support allocating arrays. @@ -43,34 +43,14 @@ pallocator_array(TypeHandle type_handle) NOEXCEPT : } template -INLINE TYPENAME pallocator_array::pointer pallocator_array:: +INLINE Type *pallocator_array:: allocate(TYPENAME pallocator_array::size_type n, TYPENAME allocator::const_pointer) { - TAU_PROFILE("pallocator_array:allocate()", " ", TAU_USER); -#ifdef DO_MEMORY_USAGE - size_t alloc_size = n * sizeof(Type); - void *ptr = (TYPENAME pallocator_array::pointer)PANDA_MALLOC_ARRAY(alloc_size); -#ifdef _DEBUG - assert(alloc_size == MemoryHook::get_ptr_size(ptr)); -#endif - _type_handle.inc_memory_usage(TypeHandle::MC_array, alloc_size); - return (TYPENAME pallocator_array::pointer)ASSUME_ALIGNED(ptr, MEMORY_HOOK_ALIGNMENT); -#else - return (TYPENAME pallocator_array::pointer)PANDA_MALLOC_ARRAY(n * sizeof(Type)); -#endif // DO_MEMORY_USAGE + return (TYPENAME pallocator_array::pointer) + ASSUME_ALIGNED(_type_handle.allocate_array(n * sizeof(Type)), MEMORY_HOOK_ALIGNMENT); } template INLINE void pallocator_array:: deallocate(TYPENAME pallocator_array::pointer p, TYPENAME pallocator_array::size_type) { - TAU_PROFILE("pallocator_array:deallocate()", " ", TAU_USER); -#ifdef DO_MEMORY_USAGE - // Now we need to recover the total number of bytes. Fortunately, in the - // case of DO_MEMORY_USAGE, MemoryHook already keeps track of this. - void *ptr = (void *)p; - size_t alloc_size = MemoryHook::get_ptr_size(ptr); - _type_handle.dec_memory_usage(TypeHandle::MC_array, alloc_size); - PANDA_FREE_ARRAY(ptr); -#else - PANDA_FREE_ARRAY(p); -#endif // DO_MEMORY_USAGE + _type_handle.deallocate_array((void *)p); } diff --git a/dtool/src/dtoolbase/pallocator.h b/dtool/src/dtoolbase/pallocator.h index b735bb7b3e..3c91b6cc7e 100644 --- a/dtool/src/dtoolbase/pallocator.h +++ b/dtool/src/dtoolbase/pallocator.h @@ -59,7 +59,8 @@ public: INLINE pallocator_single(const pallocator_single ©) NOEXCEPT : _type_handle(copy._type_handle) { } - INLINE pointer allocate(size_type n, allocator::const_pointer hint = 0); + INLINE Type *allocate(size_type n, allocator::const_pointer hint = 0) + RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); INLINE void deallocate(pointer p, size_type n); template struct rebind { @@ -87,7 +88,8 @@ public: INLINE pallocator_array(const pallocator_array ©) NOEXCEPT : _type_handle(copy._type_handle) { } - INLINE pointer allocate(size_type n, allocator::const_pointer hint = 0); + INLINE Type *allocate(size_type n, allocator::const_pointer hint = 0) + RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); INLINE void deallocate(pointer p, size_type n); template struct rebind { diff --git a/dtool/src/dtoolbase/typeHandle.cxx b/dtool/src/dtoolbase/typeHandle.cxx index ea3a51d6a1..6f4d7bc1d1 100644 --- a/dtool/src/dtoolbase/typeHandle.cxx +++ b/dtool/src/dtoolbase/typeHandle.cxx @@ -18,7 +18,6 @@ // This is initialized to zero by static initialization. TypeHandle TypeHandle::_none; -#ifdef DO_MEMORY_USAGE /** * Returns the total allocated memory used by objects of this type, for the * indicated memory class. This is only updated if track-memory-usage is set @@ -26,6 +25,7 @@ TypeHandle TypeHandle::_none; */ size_t TypeHandle:: get_memory_usage(MemoryClass memory_class) const { +#ifdef DO_MEMORY_USAGE assert((int)memory_class >= 0 && (int)memory_class < (int)MC_limit); if ((*this) == TypeHandle::none()) { return 0; @@ -34,16 +34,17 @@ get_memory_usage(MemoryClass memory_class) const { assert(rnode != (TypeRegistryNode *)NULL); return (size_t)AtomicAdjust::get(rnode->_memory_usage[memory_class]); } -} #endif // DO_MEMORY_USAGE + return 0; +} -#ifdef DO_MEMORY_USAGE /** * Adds the indicated amount to the record for the total allocated memory for * objects of this type. */ void TypeHandle:: inc_memory_usage(MemoryClass memory_class, size_t size) { +#ifdef DO_MEMORY_USAGE assert((int)memory_class >= 0 && (int)memory_class < (int)MC_limit); if ((*this) != TypeHandle::none()) { TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL); @@ -56,16 +57,16 @@ inc_memory_usage(MemoryClass memory_class, size_t size) { abort(); } } -} #endif // DO_MEMORY_USAGE +} -#ifdef DO_MEMORY_USAGE /** * Subtracts the indicated amount from the record for the total allocated * memory for objects of this type. */ void TypeHandle:: dec_memory_usage(MemoryClass memory_class, size_t size) { +#ifdef DO_MEMORY_USAGE assert((int)memory_class >= 0 && (int)memory_class < (int)MC_limit); if ((*this) != TypeHandle::none()) { TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL); @@ -75,8 +76,81 @@ dec_memory_usage(MemoryClass memory_class, size_t size) { // rnode->_memory_usage[memory_class] << "\n"; assert(rnode->_memory_usage[memory_class] >= 0); } -} #endif // DO_MEMORY_USAGE +} + +/** + * Allocates memory, adding it to the total amount of memory allocated for + * this type. + */ +void *TypeHandle:: +allocate_array(size_t size) { + TAU_PROFILE("TypeHandle:allocate_array()", " ", TAU_USER); + + void *ptr = PANDA_MALLOC_ARRAY(size); +#ifdef DO_MEMORY_USAGE + if ((*this) != TypeHandle::none()) { + size_t alloc_size = MemoryHook::get_ptr_size(ptr); +#ifdef _DEBUG + assert(size <= alloc_size); +#endif + TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL); + assert(rnode != (TypeRegistryNode *)NULL); + AtomicAdjust::add(rnode->_memory_usage[MC_array], (AtomicAdjust::Integer)alloc_size); + if (rnode->_memory_usage[MC_array] < 0) { + cerr << "Memory usage overflow for type " << *this << ".\n"; + abort(); + } + } +#endif // DO_MEMORY_USAGE + return ptr; +} + +/** + * Reallocates memory, adjusting the total amount of memory allocated for this + * type. + */ +void *TypeHandle:: +reallocate_array(void *old_ptr, size_t size) { + TAU_PROFILE("TypeHandle:reallocate_array()", " ", TAU_USER); + +#ifdef DO_MEMORY_USAGE + size_t old_size = MemoryHook::get_ptr_size(old_ptr); + void *new_ptr = PANDA_REALLOC_ARRAY(old_ptr, size); + + if ((*this) != TypeHandle::none()) { + size_t new_size = MemoryHook::get_ptr_size(new_ptr); + + TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL); + assert(rnode != (TypeRegistryNode *)NULL); + AtomicAdjust::add(rnode->_memory_usage[MC_array], (AtomicAdjust::Integer)new_size - (AtomicAdjust::Integer)old_size); + assert(rnode->_memory_usage[MC_array] >= 0); + } +#else + void *new_ptr = PANDA_REALLOC_ARRAY(old_ptr, size); +#endif + return new_ptr; +} + +/** + * Deallocates memory, subtracting it from the total amount of memory + * allocated for this type. + */ +void TypeHandle:: +deallocate_array(void *ptr) { + TAU_PROFILE("TypeHandle:deallocate_array()", " ", TAU_USER); + +#ifdef DO_MEMORY_USAGE + size_t alloc_size = MemoryHook::get_ptr_size(ptr); + if ((*this) != TypeHandle::none()) { + TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL); + assert(rnode != (TypeRegistryNode *)NULL); + AtomicAdjust::add(rnode->_memory_usage[MC_array], -(AtomicAdjust::Integer)alloc_size); + assert(rnode->_memory_usage[MC_array] >= 0); + } +#endif // DO_MEMORY_USAGE + PANDA_FREE_ARRAY(ptr); +} /** * Return the Index of the BEst fit Classs from a set diff --git a/dtool/src/dtoolbase/typeHandle.h b/dtool/src/dtoolbase/typeHandle.h index fe611c2fd1..f770b1810b 100644 --- a/dtool/src/dtoolbase/typeHandle.h +++ b/dtool/src/dtoolbase/typeHandle.h @@ -18,25 +18,49 @@ #include -// The following illustrates the convention for declaring a type that uses -// TypeHandle. In this example, ThisThingie inherits from TypedObject, which -// automatically supplies some type-differentiation functions at the cost of -// one virtual function, get_type(); however, this inheritance is optional, -// and may be omitted to avoid the virtual function pointer overhead. (If you -// do use TypedObject, be sure to consider whether your destructor should also -// be virtual.) - -/* - * class ThatThingie : public SimpleTypedObject { public: static TypeHandle - * get_class_type() { return _type_handle; } static void init_type() { - * register_type(_type_handle, "ThatThingie"); } private: static TypeHandle - * _type_handle; }; class ThisThingie : public ThatThingie, publid TypedObject - * { public: static TypeHandle get_class_type() { return _type_handle; } - * static void init_type() { ThatThingie::init_type(); - * TypedObject::init_type(); register_type(_type_handle, "ThisThingie", - * ThatThingie::get_class_type(), TypedObject::get_class_type()); } virtual - * TypeHandle get_type() const { return get_class_type(); } private: static - * TypeHandle _type_handle; }; +/** + * The following illustrates the convention for declaring a type that uses + * TypeHandle. In this example, ThisThingie inherits from TypedObject, which + * automatically supplies some type-differentiation functions at the cost of + * one virtual function, get_type(); however, this inheritance is optional, + * and may be omitted to avoid the virtual function pointer overhead. (If you + * do use TypedObject, be sure to consider whether your destructor should also + * be virtual.) + * + * @code + * class ThatThingie : public SimpleTypedObject { + * public: + * static TypeHandle get_class_type() { + * return _type_handle; + * } + * static void init_type() { + * register_type(_type_handle, "ThatThingie"); + * } + * + * private: + * static TypeHandle _type_handle; + * }; + * + * class ThisThingie : public ThatThingie, publid TypedObject { + * public: + * static TypeHandle get_class_type() { + * return _type_handle; + * } + * static void init_type() { + * ThatThingie::init_type(); + * TypedObject::init_type(); + * register_type(_type_handle, "ThisThingie", + * ThatThingie::get_class_type(), + * TypedObject::get_class_type()); + * } + * virtual TypeHandle get_type() const { + * return get_class_type(); + * } + * + * private: + * static TypeHandle _type_handle; + * }; + * @endcode */ class TypedObject; @@ -97,15 +121,9 @@ PUBLISHED: int get_best_parent_from_Set(const std::set< int > &legal_vals) const; -#ifdef DO_MEMORY_USAGE size_t get_memory_usage(MemoryClass memory_class) const; void inc_memory_usage(MemoryClass memory_class, size_t size); void dec_memory_usage(MemoryClass memory_class, size_t size); -#else - static CONSTEXPR size_t get_memory_usage(MemoryClass) { return 0; } - INLINE void inc_memory_usage(MemoryClass, size_t) { } - INLINE void dec_memory_usage(MemoryClass, size_t) { } -#endif // DO_MEMORY_USAGE INLINE int get_index() const; INLINE void output(ostream &out) const; @@ -118,6 +136,10 @@ PUBLISHED: MAKE_SEQ_PROPERTY(child_classes, get_num_child_classes, get_child_class); public: + void *allocate_array(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); + void *reallocate_array(void *ptr, size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); + void deallocate_array(void *ptr); + INLINE static TypeHandle from_index(int index); private: diff --git a/dtool/src/dtoolutil/pandaSystem.cxx b/dtool/src/dtoolutil/pandaSystem.cxx index cef04eb37b..79e5aa7762 100644 --- a/dtool/src/dtoolutil/pandaSystem.cxx +++ b/dtool/src/dtoolutil/pandaSystem.cxx @@ -45,6 +45,11 @@ PandaSystem() : #else set_system_tag("eigen", "vectorize", "0"); #endif +#ifdef __AVX__ + set_system_tag("eigen", "avx", "1"); +#else + set_system_tag("eigen", "avx", "0"); +#endif #endif // HAVE_EIGEN #ifdef USE_MEMORY_DLMALLOC @@ -189,6 +194,14 @@ is_official_version() { #endif } +/** + * Returns the memory alignment that Panda's allocators are using. + */ +int PandaSystem:: +get_memory_alignment() { + return MEMORY_HOOK_ALIGNMENT; +} + /** * Returns the string defined by the distributor of this version of Panda, or * "homebuilt" if this version was built directly from the sources by the end- diff --git a/dtool/src/dtoolutil/pandaSystem.h b/dtool/src/dtoolutil/pandaSystem.h index d88e3b92a3..bed5f2c073 100644 --- a/dtool/src/dtoolutil/pandaSystem.h +++ b/dtool/src/dtoolutil/pandaSystem.h @@ -39,6 +39,8 @@ PUBLISHED: static int get_sequence_version(); static bool is_official_version(); + static int get_memory_alignment(); + static string get_distributor(); static string get_compiler(); static string get_build_date(); diff --git a/makepanda/makepanda.py b/makepanda/makepanda.py index 564ee8e4e9..e159e864d8 100755 --- a/makepanda/makepanda.py +++ b/makepanda/makepanda.py @@ -973,9 +973,7 @@ if GetTarget() == 'android': DefSymbol("ALWAYS", "ANDROID") if not PkgSkip("EIGEN"): - DefSymbol("ALWAYS", "EIGEN_MPL2_ONLY") if GetOptimize() >= 3: - DefSymbol("ALWAYS", "EIGEN_NO_DEBUG") if COMPILER == "MSVC": # Squeeze out a bit more performance on MSVC builds... # Only do this if EIGEN_NO_DEBUG is also set, otherwise it diff --git a/panda/src/gobj/geomVertexArrayData.h b/panda/src/gobj/geomVertexArrayData.h index e52876d376..9766a9cbd1 100644 --- a/panda/src/gobj/geomVertexArrayData.h +++ b/panda/src/gobj/geomVertexArrayData.h @@ -261,8 +261,8 @@ public: INLINE Thread *get_current_thread() const; - INLINE const unsigned char *get_read_pointer(bool force) const; - unsigned char *get_write_pointer(); + INLINE const unsigned char *get_read_pointer(bool force) const RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); + unsigned char *get_write_pointer() RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); PUBLISHED: INLINE const GeomVertexArrayData *get_object() const; diff --git a/panda/src/gobj/vertexDataBuffer.cxx b/panda/src/gobj/vertexDataBuffer.cxx index 1e3b884893..19f50b3013 100644 --- a/panda/src/gobj/vertexDataBuffer.cxx +++ b/panda/src/gobj/vertexDataBuffer.cxx @@ -27,15 +27,13 @@ operator = (const VertexDataBuffer ©) { if (_resident_data != (unsigned char *)NULL) { nassertv(_reserved_size != 0); - get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size); - PANDA_FREE_ARRAY(_resident_data); + get_class_type().deallocate_array(_resident_data); _resident_data = NULL; } if (copy._resident_data != (unsigned char *)NULL && copy._size != 0) { // We only allocate _size bytes, not the full _reserved_size allocated by // the original copy. - get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)copy._size); - _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(copy._size); + _resident_data = (unsigned char *)get_class_type().allocate_array(copy._size); memcpy(_resident_data, copy._resident_data, copy._size); } _size = copy._size; @@ -55,17 +53,16 @@ swap(VertexDataBuffer &other) { unsigned char *resident_data = _resident_data; size_t size = _size; size_t reserved_size = _reserved_size; - PT(VertexDataBlock) block = _block; + + _block.swap(other._block); _resident_data = other._resident_data; _size = other._size; _reserved_size = other._reserved_size; - _block = other._block; other._resident_data = resident_data; other._size = size; other._reserved_size = reserved_size; - other._block = block; nassertv(_reserved_size >= _size); } @@ -94,13 +91,12 @@ do_clean_realloc(size_t reserved_size) { do_page_in(); } - get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)reserved_size - (int)_reserved_size); if (_reserved_size == 0) { nassertv(_resident_data == (unsigned char *)NULL); - _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(reserved_size); + _resident_data = (unsigned char *)get_class_type().allocate_array(reserved_size); } else { nassertv(_resident_data != (unsigned char *)NULL); - _resident_data = (unsigned char *)PANDA_REALLOC_ARRAY(_resident_data, reserved_size); + _resident_data = (unsigned char *)get_class_type().reallocate_array(_resident_data, reserved_size); } nassertv(_resident_data != (unsigned char *)NULL); _reserved_size = reserved_size; @@ -129,16 +125,14 @@ do_unclean_realloc(size_t reserved_size) { if (_resident_data != (unsigned char *)NULL) { nassertv(_reserved_size != 0); - get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size); - PANDA_FREE_ARRAY(_resident_data); + get_class_type().deallocate_array(_resident_data); _resident_data = NULL; _reserved_size = 0; } if (reserved_size != 0) { - get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)reserved_size); nassertv(_resident_data == (unsigned char *)NULL); - _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(reserved_size); + _resident_data = (unsigned char *)get_class_type().allocate_array(reserved_size); } _reserved_size = reserved_size; @@ -166,8 +160,7 @@ do_page_out(VertexDataBook &book) { if (_size == 0) { // It's an empty buffer. Just deallocate it; don't bother to create a // block. - get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size); - PANDA_FREE_ARRAY(_resident_data); + get_class_type().deallocate_array(_resident_data); _resident_data = NULL; _reserved_size = 0; @@ -180,8 +173,7 @@ do_page_out(VertexDataBook &book) { nassertv(pointer != (unsigned char *)NULL); memcpy(pointer, _resident_data, _size); - get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size); - PANDA_FREE_ARRAY(_resident_data); + get_class_type().deallocate_array(_resident_data); _resident_data = NULL; _reserved_size = _size; @@ -205,8 +197,7 @@ do_page_in() { nassertv(_block != (VertexDataBlock *)NULL); nassertv(_reserved_size == _size); - get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)_size); - _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(_size); + _resident_data = (unsigned char *)get_class_type().allocate_array(_size); nassertv(_resident_data != (unsigned char *)NULL); memcpy(_resident_data, _block->get_pointer(true), _size); diff --git a/panda/src/gobj/vertexDataBuffer.h b/panda/src/gobj/vertexDataBuffer.h index 2a18aeda45..f698669f52 100644 --- a/panda/src/gobj/vertexDataBuffer.h +++ b/panda/src/gobj/vertexDataBuffer.h @@ -57,8 +57,8 @@ public: void operator = (const VertexDataBuffer ©); INLINE ~VertexDataBuffer(); - INLINE const unsigned char *get_read_pointer(bool force) const; - INLINE unsigned char *get_write_pointer(); + INLINE const unsigned char *get_read_pointer(bool force) const RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); + INLINE unsigned char *get_write_pointer() RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT); INLINE size_t get_size() const; INLINE size_t get_reserved_size() const; diff --git a/panda/src/linmath/lsimpleMatrix.h b/panda/src/linmath/lsimpleMatrix.h index 5669e18be3..3228caf5bb 100644 --- a/panda/src/linmath/lsimpleMatrix.h +++ b/panda/src/linmath/lsimpleMatrix.h @@ -58,7 +58,7 @@ private: #endif // HAVE_EIGEN // This is as good a place as any to define this alignment macro. -#if defined(LINMATH_ALIGN) && defined(HAVE_EIGEN) && defined(__AVX__) +#if defined(LINMATH_ALIGN) && defined(HAVE_EIGEN) && defined(__AVX__) && defined(STDFLOAT_DOUBLE) #define ALIGN_LINMATH ALIGN_32BYTE #elif defined(LINMATH_ALIGN) #define ALIGN_LINMATH ALIGN_16BYTE diff --git a/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx b/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx index 6818ea65a0..db600e14de 100644 --- a/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx +++ b/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx @@ -2612,12 +2612,10 @@ setup_gltex(GLTexture *gltex, int x_size, int y_size, int num_levels) { if (gltex->total_bytecount != total_bytecount) { if (gltex->allocated_buffer != NULL) { - PANDA_FREE_ARRAY(gltex->allocated_buffer); - TinyTextureContext::get_class_type().dec_memory_usage(TypeHandle::MC_array, gltex->total_bytecount); + TinyTextureContext::get_class_type().deallocate_array(gltex->allocated_buffer); } - gltex->allocated_buffer = PANDA_MALLOC_ARRAY(total_bytecount); + gltex->allocated_buffer = TinyTextureContext::get_class_type().allocate_array(total_bytecount); gltex->total_bytecount = total_bytecount; - TinyTextureContext::get_class_type().inc_memory_usage(TypeHandle::MC_array, total_bytecount); } char *next_buffer = (char *)gltex->allocated_buffer; diff --git a/panda/src/tinydisplay/tinyTextureContext.cxx b/panda/src/tinydisplay/tinyTextureContext.cxx index 87427ac365..5a3ae3414d 100644 --- a/panda/src/tinydisplay/tinyTextureContext.cxx +++ b/panda/src/tinydisplay/tinyTextureContext.cxx @@ -24,8 +24,7 @@ TinyTextureContext:: GLTexture *gltex = &_gltex; if (gltex->allocated_buffer != NULL) { nassertv(gltex->num_levels != 0); - TinyTextureContext::get_class_type().dec_memory_usage(TypeHandle::MC_array, gltex->total_bytecount); - PANDA_FREE_ARRAY(gltex->allocated_buffer); + get_class_type().deallocate_array(gltex->allocated_buffer); gltex->allocated_buffer = NULL; gltex->total_bytecount = 0; gltex->num_levels = 0; @@ -51,8 +50,7 @@ evict_lru() { GLTexture *gltex = &_gltex; if (gltex->allocated_buffer != NULL) { nassertv(gltex->num_levels != 0); - TinyTextureContext::get_class_type().dec_memory_usage(TypeHandle::MC_array, gltex->total_bytecount); - PANDA_FREE_ARRAY(gltex->allocated_buffer); + get_class_type().deallocate_array(gltex->allocated_buffer); gltex->allocated_buffer = NULL; gltex->total_bytecount = 0; gltex->num_levels = 0;