From f5827963a3ac4c84187fbef6f671bc0f12787a97 Mon Sep 17 00:00:00 2001 From: rdb Date: Wed, 26 Jul 2023 19:17:07 +0200 Subject: [PATCH] pgraph: Refactor RenderAttrib return_new code a little bit - Allows the compiler to devirtualize the get_hash_impl call, in theory - Reduces the overhead of checking the configuration variables every time in return_new - Avoids recalculating the hash on calling get_unique --- panda/src/pgraph/config_pgraph.cxx | 4 +- panda/src/pgraph/config_pgraph.h | 4 +- panda/src/pgraph/renderAttrib.I | 47 +++++++++++++++++++++++- panda/src/pgraph/renderAttrib.cxx | 59 ++++++++---------------------- panda/src/pgraph/renderAttrib.h | 10 +++-- panda/src/pgraph/renderState.cxx | 6 +-- 6 files changed, 75 insertions(+), 55 deletions(-) diff --git a/panda/src/pgraph/config_pgraph.cxx b/panda/src/pgraph/config_pgraph.cxx index 7b175f15d0..c736829d39 100644 --- a/panda/src/pgraph/config_pgraph.cxx +++ b/panda/src/pgraph/config_pgraph.cxx @@ -214,7 +214,7 @@ ConfigVariableBool transform_cache "transforms, but imposes some overhead for maintaining the " "cache itself.")); -ConfigVariableBool state_cache +ALIGN_16BYTE ConfigVariableBool state_cache ("state-cache", true, PRC_DESC("Set this true to enable the cache of RenderState objects, " "similar to the TransformState cache controlled via " @@ -236,7 +236,7 @@ ConfigVariableBool uniquify_states "including the need to check for a composition cycle in " "the cache. It is highly recommended to keep this on.")); -ConfigVariableBool uniquify_attribs +ALIGN_16BYTE ConfigVariableBool uniquify_attribs ("uniquify-attribs", true, PRC_DESC("Set this true to ensure that equivalent RenderAttribs " "are pointerwise equal. This may improve caching performance, " diff --git a/panda/src/pgraph/config_pgraph.h b/panda/src/pgraph/config_pgraph.h index 0cba3cf971..97ff9682c2 100644 --- a/panda/src/pgraph/config_pgraph.h +++ b/panda/src/pgraph/config_pgraph.h @@ -46,10 +46,10 @@ extern ConfigVariableBool auto_break_cycles; extern EXPCL_PANDA_PGRAPH ConfigVariableBool garbage_collect_states; extern ConfigVariableDouble garbage_collect_states_rate; extern ConfigVariableBool transform_cache; -extern ConfigVariableBool state_cache; +extern ALIGN_16BYTE EXPCL_PANDA_PGRAPH ConfigVariableBool state_cache; extern ConfigVariableBool uniquify_transforms; extern EXPCL_PANDA_PGRAPH ConfigVariableBool uniquify_states; -extern ConfigVariableBool uniquify_attribs; +extern ALIGN_16BYTE EXPCL_PANDA_PGRAPH ConfigVariableBool uniquify_attribs; extern ConfigVariableBool retransform_sprites; extern ConfigVariableBool depth_offset_decals; extern ConfigVariableInt max_collect_vertices; diff --git a/panda/src/pgraph/renderAttrib.I b/panda/src/pgraph/renderAttrib.I index 5df0d0774e..e92d16bd3b 100644 --- a/panda/src/pgraph/renderAttrib.I +++ b/panda/src/pgraph/renderAttrib.I @@ -79,7 +79,11 @@ get_hash() const { */ INLINE CPT(RenderAttrib) RenderAttrib:: get_unique() const { - return return_unique((RenderAttrib *)this); + if (state_cache) { + return do_uniquify(this); + } else { + return this; + } } /** @@ -93,6 +97,47 @@ calc_hash() { _hash = int_hash::add_hash(hash, get_type().get_index()); } +/** + * This function is used by derived RenderAttrib types to share a common + * RenderAttrib pointer for all equivalent RenderAttrib objects. + * + * This is different from return_unique() in that it does not actually + * guarantee a unique pointer, unless uniquify-attribs is set. + */ +INLINE CPT(RenderAttrib) RenderAttrib:: +return_new(RenderAttrib *attrib) { + nassertr(attrib != nullptr, attrib); + attrib->calc_hash(); + + if (_uniquify_attribs) { + return do_uniquify(attrib); + } else { + return attrib; + } +} + +/** + * This function is used by derived RenderAttrib types to share a common + * RenderAttrib pointer for all equivalent RenderAttrib objects. + * + * The make() function of the derived type should create a new RenderAttrib + * and pass it through return_new(), which will either save the pointer and + * return it unchanged (if this is the first similar such object) or delete it + * and return an equivalent pointer (if there was already a similar object + * saved). + */ +INLINE CPT(RenderAttrib) RenderAttrib:: +return_unique(RenderAttrib *attrib) { + nassertr(attrib != nullptr, attrib); + attrib->calc_hash(); + + if (state_cache) { + return do_uniquify(attrib); + } else { + return attrib; + } +} + /** * Adds the indicated TypeHandle to the registry, if it is not there already, * and returns a unique slot number. See RenderAttribRegistry. diff --git a/panda/src/pgraph/renderAttrib.cxx b/panda/src/pgraph/renderAttrib.cxx index 62a2d0c38b..0992dcb79c 100644 --- a/panda/src/pgraph/renderAttrib.cxx +++ b/panda/src/pgraph/renderAttrib.cxx @@ -14,7 +14,6 @@ #include "renderAttrib.h" #include "bamReader.h" #include "indent.h" -#include "config_pgraph.h" #include "lightReMutexHolder.h" #include "pStatTimer.h" @@ -22,6 +21,7 @@ using std::ostream; LightReMutex *RenderAttrib::_attribs_lock = nullptr; RenderAttrib::Attribs RenderAttrib::_attribs; +bool RenderAttrib::_uniquify_attribs = false; TypeHandle RenderAttrib::_type_handle; size_t RenderAttrib::_garbage_index = 0; @@ -180,6 +180,10 @@ list_attribs(ostream &out) { */ int RenderAttrib:: garbage_collect() { + // This gets called periodically, so use this opportunity to reload the value + // from the Config.prc file. + _uniquify_attribs = uniquify_attribs && state_cache; + if (!garbage_collect_states) { return 0; } @@ -305,54 +309,15 @@ validate_attribs() { } /** - * This function is used by derived RenderAttrib types to share a common - * RenderAttrib pointer for all equivalent RenderAttrib objects. - * - * This is different from return_unique() in that it does not actually - * guarantee a unique pointer, unless uniquify-attribs is set. + * Private implementation of return_new, return_unique, and get_unique. */ CPT(RenderAttrib) RenderAttrib:: -return_new(RenderAttrib *attrib) { - nassertr(attrib != nullptr, attrib); - if (!uniquify_attribs) { - attrib->calc_hash(); - return attrib; - } - - return return_unique(attrib); -} - -/** - * This function is used by derived RenderAttrib types to share a common - * RenderAttrib pointer for all equivalent RenderAttrib objects. - * - * The make() function of the derived type should create a new RenderAttrib - * and pass it through return_new(), which will either save the pointer and - * return it unchanged (if this is the first similar such object) or delete it - * and return an equivalent pointer (if there was already a similar object - * saved). - */ -CPT(RenderAttrib) RenderAttrib:: -return_unique(RenderAttrib *attrib) { - nassertr(attrib != nullptr, attrib); - - attrib->calc_hash(); - - if (!state_cache) { - return attrib; - } - -#ifndef NDEBUG - if (paranoid_const) { - nassertr(validate_attribs(), attrib); - } -#endif - +do_uniquify(const RenderAttrib *attrib) { LightReMutexHolder holder(*_attribs_lock); if (attrib->_saved_entry != -1) { - // This attrib is already in the cache. nassertr(_attribs.find(attrib) - // == attrib->_saved_entry, attrib); + // This attrib is already in the cache. + //nassertr(_attribs.find(attrib) == attrib->_saved_entry, attrib); return attrib; } @@ -503,6 +468,12 @@ release_new() { */ void RenderAttrib:: init_attribs() { + // These are copied here so that we can be sure that they are constructed. + ALIGN_16BYTE ConfigVariableBool uniquify_attribs("uniquify-attribs", true); + ALIGN_16BYTE ConfigVariableBool state_cache("state-cache", true); + + _uniquify_attribs = uniquify_attribs && state_cache; + // TODO: we should have a global Panda mutex to allow us to safely create // _attribs_lock without a startup race condition. For the meantime, this // is OK because we guarantee that this method is called at static init diff --git a/panda/src/pgraph/renderAttrib.h b/panda/src/pgraph/renderAttrib.h index d9e9262429..a287c95f57 100644 --- a/panda/src/pgraph/renderAttrib.h +++ b/panda/src/pgraph/renderAttrib.h @@ -16,6 +16,7 @@ #include "pandabase.h" +#include "config_pgraph.h" #include "typedWritableReferenceCount.h" #include "renderAttribRegistry.h" #include "pointerTo.h" @@ -161,8 +162,9 @@ PUBLISHED: protected: INLINE void calc_hash(); - static CPT(RenderAttrib) return_new(RenderAttrib *attrib); - static CPT(RenderAttrib) return_unique(RenderAttrib *attrib); + INLINE static CPT(RenderAttrib) return_new(RenderAttrib *attrib); + INLINE static CPT(RenderAttrib) return_unique(RenderAttrib *attrib); + static CPT(RenderAttrib) do_uniquify(const RenderAttrib *attrib); virtual int compare_to_impl(const RenderAttrib *other) const; virtual size_t get_hash_impl() const; virtual CPT(RenderAttrib) compose_impl(const RenderAttrib *other) const; @@ -184,8 +186,9 @@ private: static LightReMutex *_attribs_lock; typedef SimpleHashMap > Attribs; static Attribs _attribs; + static bool _uniquify_attribs; - int _saved_entry; + mutable int _saved_entry; size_t _hash; // This keeps track of our current position through the garbage collection @@ -195,6 +198,7 @@ private: static PStatCollector _garbage_collect_pcollector; friend class RenderAttribRegistry; + friend class RenderState; public: virtual void write_datagram(BamWriter *manager, Datagram &dg); diff --git a/panda/src/pgraph/renderState.cxx b/panda/src/pgraph/renderState.cxx index da9d1bca97..235075cb1d 100644 --- a/panda/src/pgraph/renderState.cxx +++ b/panda/src/pgraph/renderState.cxx @@ -1287,8 +1287,8 @@ return_unique(RenderState *state) { LightReMutexHolder holder(*_states_lock); if (state->_saved_entry != -1) { - // This state is already in the cache. nassertr(_states.find(state) == - // state->_saved_entry, pt_state); + // This state is already in the cache. + //nassertr(_states.find(state) == state->_saved_entry, pt_state); return state; } @@ -1300,7 +1300,7 @@ return_unique(RenderState *state) { while (slot >= 0) { Attribute &attrib = state->_attributes[slot]; nassertd(attrib._attrib != nullptr) continue; - attrib._attrib = attrib._attrib->get_unique(); + attrib._attrib = RenderAttrib::do_uniquify(attrib._attrib); mask.clear_bit(slot); slot = mask.get_lowest_on_bit(); }