From 410049fd4baeb5aa665548cc0e60f7770618d5dc Mon Sep 17 00:00:00 2001 From: rdb Date: Wed, 18 Oct 2017 11:10:11 +0200 Subject: [PATCH] Improvements for ShaderGenerator-based programs: * Disable state munger, which isn't needed * Reflect updates to TextureStage properties (Fixes #178) * Also respect combine mode and rgb scale changes * Allow rehashing and clearing generated shader cache State munger is now disabled whenever any shader is applied. Panda assumes that any custom shader will apply color scale by itself. --- panda/src/display/graphicsStateGuardian.cxx | 50 +++ panda/src/display/graphicsStateGuardian.h | 3 + panda/src/display/standardMunger.cxx | 51 +-- panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx | 2 +- .../glstuff/glGraphicsStateGuardian_src.cxx | 9 +- panda/src/gobj/textureStage.I | 73 +++- panda/src/gobj/textureStage.cxx | 4 +- panda/src/gobj/textureStage.h | 7 +- .../src/gsgbase/graphicsStateGuardianBase.cxx | 1 + panda/src/gsgbase/graphicsStateGuardianBase.h | 11 + panda/src/pgraph/config_pgraph.cxx | 2 +- panda/src/pgraph/cullableObject.cxx | 11 +- panda/src/pgraph/renderState.h | 2 + panda/src/pgraph/stateMunger.I | 11 +- panda/src/pgraph/stateMunger.h | 3 + panda/src/pgraphnodes/shaderGenerator.cxx | 327 +++++++++++------- panda/src/pgraphnodes/shaderGenerator.h | 54 +-- 17 files changed, 421 insertions(+), 200 deletions(-) diff --git a/panda/src/display/graphicsStateGuardian.cxx b/panda/src/display/graphicsStateGuardian.cxx index 9d368467f8..f4672a0198 100644 --- a/panda/src/display/graphicsStateGuardian.cxx +++ b/panda/src/display/graphicsStateGuardian.cxx @@ -3039,6 +3039,19 @@ determine_target_texture() { nassertv(_target_texture->get_num_on_stages() <= max_texture_stages); } +/** + * Assigns _target_shader based on the _target_rs. + */ +void GraphicsStateGuardian:: +determine_target_shader() { + if (_target_rs->_generated_shader != nullptr) { + _target_shader = (const ShaderAttrib *)_target_rs->_generated_shader.p(); + } else { + _target_shader = (const ShaderAttrib *) + _target_rs->get_attrib_def(ShaderAttrib::get_class_slot()); + } +} + /** * Frees some memory that was explicitly allocated within the glgsg. */ @@ -3382,6 +3395,43 @@ make_shadow_buffer(const NodePath &light_np, GraphicsOutputBase *host) { return tex; } +/** + * Ensures that an appropriate shader has been generated for the given state. + * This is stored in the _generated_shader field on the RenderState. + */ +void GraphicsStateGuardian:: +ensure_generated_shader(const RenderState *state) { +#ifdef HAVE_CG + const ShaderAttrib *shader_attrib; + state->get_attrib_def(shader_attrib); + + if (shader_attrib->auto_shader()) { + if (_shader_generator == nullptr) { + if (!_supports_basic_shaders) { + return; + } + _shader_generator = new ShaderGenerator(this); + } + if (state->_generated_shader == nullptr || + state->_generated_shader_seq != _generated_shader_seq) { + GeomVertexAnimationSpec spec; + + // Currently we overload this flag to request vertex animation for the + // shader generator. + const ShaderAttrib *sattr; + state->get_attrib_def(sattr); + if (sattr->get_flag(ShaderAttrib::F_hardware_skinning)) { + spec.set_hardware(4, true); + } + + // Cache the generated ShaderAttrib on the shader state. + state->_generated_shader = _shader_generator->synthesize_shader(state, spec); + state->_generated_shader_seq = _generated_shader_seq; + } + } +#endif +} + /** * Returns true if the GSG implements the extension identified by the given * string. This currently is only implemented by the OpenGL back-end. diff --git a/panda/src/display/graphicsStateGuardian.h b/panda/src/display/graphicsStateGuardian.h index 305a8d62e0..52cae8ff28 100644 --- a/panda/src/display/graphicsStateGuardian.h +++ b/panda/src/display/graphicsStateGuardian.h @@ -426,6 +426,8 @@ public: PT(Texture) get_dummy_shadow_map(Texture::TextureType texture_type) const; PT(Texture) make_shadow_buffer(const NodePath &light_np, GraphicsOutputBase *host); + virtual void ensure_generated_shader(const RenderState *state); + #ifdef DO_PSTATS static void init_frame_pstats(); #endif @@ -446,6 +448,7 @@ protected: virtual void end_bind_clip_planes(); void determine_target_texture(); + void determine_target_shader(); virtual void free_pointers(); virtual void close_gsg(); diff --git a/panda/src/display/standardMunger.cxx b/panda/src/display/standardMunger.cxx index 1cb64189da..cdc2f0bda3 100644 --- a/panda/src/display/standardMunger.cxx +++ b/panda/src/display/standardMunger.cxx @@ -38,7 +38,17 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state, _auto_shader(false), _shader_skinning(false) { - if (!get_gsg()->get_runtime_color_scale()) { + const ShaderAttrib *shader_attrib; + state->get_attrib_def(shader_attrib); +#ifdef HAVE_CG + _auto_shader = shader_attrib->auto_shader(); +#endif + if (shader_attrib->get_flag(ShaderAttrib::F_hardware_skinning)) { + _shader_skinning = true; + } + + if (!get_gsg()->get_runtime_color_scale() && !_auto_shader && + shader_attrib->get_shader() == nullptr) { // We might need to munge the colors. const ColorAttrib *color_attrib; const ColorScaleAttrib *color_scale_attrib; @@ -60,6 +70,7 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state, _color[3] * cs[3]); } _munge_color = true; + _should_munge_state = true; } } else if (state->get_attrib(color_scale_attrib) && @@ -74,6 +85,7 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state, if ((color_scale_attrib->has_rgb_scale() && !get_gsg()->get_color_scale_via_lighting()) || (color_scale_attrib->has_alpha_scale() && !get_gsg()->get_alpha_scale_via_texture(tex_attrib))) { _munge_color_scale = true; + _should_munge_state = true; } // Known bug: if there is a material on an object that would obscure the @@ -82,15 +94,6 @@ StandardMunger(GraphicsStateGuardianBase *gsg, const RenderState *state, // effort to detect this contrived situation and handle it correctly. } } - - const ShaderAttrib *shader_attrib = (const ShaderAttrib *) - state->get_attrib_def(ShaderAttrib::get_class_slot()); - if (shader_attrib->auto_shader()) { - _auto_shader = true; - } - if (shader_attrib->get_flag(ShaderAttrib::F_hardware_skinning)) { - _shader_skinning = true; - } } /** @@ -341,33 +344,5 @@ munge_state_impl(const RenderState *state) { munged_state = munged_state->remove_attrib(ColorScaleAttrib::get_class_slot()); } -#ifdef HAVE_CG - if (_auto_shader) { - GraphicsStateGuardian *gsg = get_gsg(); - ShaderGenerator *shader_generator = gsg->get_shader_generator(); - if (shader_generator == nullptr) { - shader_generator = new ShaderGenerator(gsg); - gsg->set_shader_generator(shader_generator); - } - if (munged_state->_generated_shader == nullptr) { - // Cache the generated ShaderAttrib on the shader state. - GeomVertexAnimationSpec spec; - - // Currently we overload this flag to request vertex animation for the - // shader generator. - const ShaderAttrib *sattr; - munged_state->get_attrib_def(sattr); - if (sattr->get_flag(ShaderAttrib::F_hardware_skinning)) { - spec.set_hardware(4, true); - } - - munged_state->_generated_shader = shader_generator->synthesize_shader(munged_state, spec); - } - if (munged_state->_generated_shader != nullptr) { - munged_state = munged_state->set_attrib(munged_state->_generated_shader); - } - } -#endif - return munged_state; } diff --git a/panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx b/panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx index 5b7ffce964..b51d67c564 100644 --- a/panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx +++ b/panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx @@ -3074,7 +3074,7 @@ set_state_and_transform(const RenderState *target, } _target_rs = target; - _target_shader = DCAST(ShaderAttrib, _target_rs->get_attrib_def(ShaderAttrib::get_class_slot())); + determine_target_shader(); int alpha_test_slot = AlphaTestAttrib::get_class_slot(); if (_target_rs->get_attrib(alpha_test_slot) != _state_rs->get_attrib(alpha_test_slot) || diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx index 305f7e3aab..45450ae98d 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx @@ -174,7 +174,7 @@ static const string default_vshader = "void main(void) {\n" " gl_Position = p3d_ModelViewProjectionMatrix * p3d_Vertex;\n" " texcoord = p3d_MultiTexCoord0;\n" - " color = p3d_Color;\n" + " color = p3d_Color * p3d_ColorScale;\n" "}\n"; static const string default_fshader = @@ -3127,6 +3127,10 @@ reset() { } #endif + // Do we guarantee that we can apply the color scale via a shader? We set + // this false if there is a chance that the fixed-function pipeline is used. + _runtime_color_scale = !has_fixed_function_pipeline(); + #ifndef OPENGLES if (_gl_shadlang_ver_major >= 4 || has_extension("GL_NV_gpu_program5")) { // gp5fp - OpenGL fragment profile for GeForce 400 Series and up @@ -10341,8 +10345,7 @@ set_state_and_transform(const RenderState *target, _target_rs = target; #ifndef OPENGLES_1 - _target_shader = (const ShaderAttrib *) - _target_rs->get_attrib_def(ShaderAttrib::get_class_slot()); + determine_target_shader(); _instance_count = _target_shader->get_instance_count(); if (_target_shader != _state_shader) { diff --git a/panda/src/gobj/textureStage.I b/panda/src/gobj/textureStage.I index c73c2d5338..cc977c99c1 100644 --- a/panda/src/gobj/textureStage.I +++ b/panda/src/gobj/textureStage.I @@ -15,7 +15,7 @@ * Initialize the texture stage from other */ INLINE TextureStage:: -TextureStage(TextureStage ©) { +TextureStage(const TextureStage ©) { (*this) = copy; } @@ -52,6 +52,10 @@ set_sort(int sort) { // Update the global flag to indicate that all TextureAttribs in the world // must now re-sort their lists. _sort_seq++; + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } } /** @@ -80,6 +84,10 @@ set_priority(int priority) { // Update the global flag to indicate that all TextureAttribs in the world // must now re-sort their lists. _sort_seq++; + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } } /** @@ -99,7 +107,13 @@ get_priority() const { */ INLINE void TextureStage:: set_texcoord_name(InternalName *name) { - _texcoord_name = name; + if (name != _texcoord_name) { + _texcoord_name = name; + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } + } } /** @@ -108,7 +122,7 @@ set_texcoord_name(InternalName *name) { */ INLINE void TextureStage:: set_texcoord_name(const string &name) { - _texcoord_name = InternalName::get_texcoord_name(name); + set_texcoord_name(InternalName::get_texcoord_name(name)); } /** @@ -150,13 +164,16 @@ get_binormal_name() const { */ INLINE void TextureStage:: set_mode(TextureStage::Mode mode) { - _mode = mode; + if (mode != _mode) { + _mode = mode; - if (_mode != M_combine) { - _num_combine_rgb_operands = 0; - _num_combine_alpha_operands = 0; + if (_mode != M_combine) { + _num_combine_rgb_operands = 0; + _num_combine_alpha_operands = 0; + } + + update_color_flags(); } - update_color_flags(); } /** @@ -202,8 +219,14 @@ get_color() const { */ INLINE void TextureStage:: set_rgb_scale(int rgb_scale) { - nassertv(rgb_scale == 1 || rgb_scale == 2 || rgb_scale == 4); - _rgb_scale = rgb_scale; + if (rgb_scale != _rgb_scale) { + nassertv(rgb_scale == 1 || rgb_scale == 2 || rgb_scale == 4); + _rgb_scale = rgb_scale; + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } + } } /** @@ -222,8 +245,14 @@ get_rgb_scale() const { */ INLINE void TextureStage:: set_alpha_scale(int alpha_scale) { - nassertv(alpha_scale == 1 || alpha_scale == 2 || alpha_scale == 4); - _alpha_scale = alpha_scale; + if (alpha_scale != _alpha_scale) { + nassertv(alpha_scale == 1 || alpha_scale == 2 || alpha_scale == 4); + _alpha_scale = alpha_scale; + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } + } } /** @@ -247,7 +276,13 @@ get_alpha_scale() const { */ INLINE void TextureStage:: set_saved_result(bool saved_result) { - _saved_result = saved_result; + if (saved_result != _saved_result) { + _saved_result = saved_result; + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } + } } /** @@ -641,6 +676,14 @@ get_sort_seq() { return _sort_seq; } +/** + * Marks this TextureStage as having been used by the auto shader. + */ +INLINE void TextureStage:: +mark_used_by_auto_shader() const { + _used_by_auto_shader = true; +} + /** * Updates _uses_color, _involves_color_scale, _uses_primary_color and * _uses_last_saved_result appropriately. @@ -684,6 +727,10 @@ update_color_flags() { _combine_alpha_source0 == CS_last_saved_result || _combine_alpha_source1 == CS_last_saved_result || _combine_alpha_source2 == CS_last_saved_result)); + + if (_used_by_auto_shader) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } } INLINE ostream & diff --git a/panda/src/gobj/textureStage.cxx b/panda/src/gobj/textureStage.cxx index 708b6d140d..120bb4feb3 100644 --- a/panda/src/gobj/textureStage.cxx +++ b/panda/src/gobj/textureStage.cxx @@ -25,7 +25,7 @@ TypeHandle TextureStage::_type_handle; * Initialize the texture stage at construction */ TextureStage:: -TextureStage(const string &name) { +TextureStage(const string &name) : _used_by_auto_shader(false) { _name = name; _sort = 0; _priority = 0; @@ -90,6 +90,8 @@ operator = (const TextureStage &other) { _uses_color = other._uses_color; _involves_color_scale = other._involves_color_scale; + + _used_by_auto_shader = false; } /** diff --git a/panda/src/gobj/textureStage.h b/panda/src/gobj/textureStage.h index ccaa32e5d0..2414e72991 100644 --- a/panda/src/gobj/textureStage.h +++ b/panda/src/gobj/textureStage.h @@ -21,6 +21,7 @@ #include "typedWritableReferenceCount.h" #include "updateSeq.h" #include "luse.h" +#include "graphicsStateGuardianBase.h" class FactoryParams; @@ -34,7 +35,7 @@ class FactoryParams; class EXPCL_PANDA_GOBJ TextureStage : public TypedWritableReferenceCount { PUBLISHED: explicit TextureStage(const string &name); - INLINE TextureStage(TextureStage ©); + INLINE TextureStage(const TextureStage ©); void operator = (const TextureStage ©); virtual ~TextureStage(); @@ -206,6 +207,8 @@ PUBLISHED: public: INLINE static UpdateSeq get_sort_seq(); + INLINE void mark_used_by_auto_shader() const; + private: INLINE void update_color_flags(); @@ -249,6 +252,8 @@ private: static PT(TextureStage) _default_stage; static UpdateSeq _sort_seq; + mutable bool _used_by_auto_shader; + public: // Datagram stuff static void register_with_read_factory(); diff --git a/panda/src/gsgbase/graphicsStateGuardianBase.cxx b/panda/src/gsgbase/graphicsStateGuardianBase.cxx index aa23375418..fefd8037f2 100644 --- a/panda/src/gsgbase/graphicsStateGuardianBase.cxx +++ b/panda/src/gsgbase/graphicsStateGuardianBase.cxx @@ -16,6 +16,7 @@ #include AtomicAdjust::Pointer GraphicsStateGuardianBase::_gsg_list; +UpdateSeq GraphicsStateGuardianBase::_generated_shader_seq; TypeHandle GraphicsStateGuardianBase::_type_handle; /** diff --git a/panda/src/gsgbase/graphicsStateGuardianBase.h b/panda/src/gsgbase/graphicsStateGuardianBase.h index f86d39a8a3..45bf35b958 100644 --- a/panda/src/gsgbase/graphicsStateGuardianBase.h +++ b/panda/src/gsgbase/graphicsStateGuardianBase.h @@ -223,6 +223,14 @@ public: virtual void bind_light(Spotlight *light_obj, const NodePath &light, int light_id) { } + virtual void ensure_generated_shader(const RenderState *state)=0; + + static void mark_rehash_generated_shaders() { +#ifdef HAVE_CG + ++_generated_shader_seq; +#endif + } + PUBLISHED: static GraphicsStateGuardianBase *get_default_gsg(); static void set_default_gsg(GraphicsStateGuardianBase *default_gsg); @@ -247,6 +255,9 @@ private: }; static AtomicAdjust::Pointer _gsg_list; +protected: + static UpdateSeq _generated_shader_seq; + public: static TypeHandle get_class_type() { return _type_handle; diff --git a/panda/src/pgraph/config_pgraph.cxx b/panda/src/pgraph/config_pgraph.cxx index f481a2aeb1..237c1d0011 100644 --- a/panda/src/pgraph/config_pgraph.cxx +++ b/panda/src/pgraph/config_pgraph.cxx @@ -227,7 +227,7 @@ ConfigVariableBool uniquify_states "are pointerwise equal. This may improve caching performance, " "but also adds additional overhead to maintain the cache, " "including the need to check for a composition cycle in " - "the cache.")); + "the cache. It is highly recommended to keep this on.")); ConfigVariableBool uniquify_attribs ("uniquify-attribs", true, diff --git a/panda/src/pgraph/cullableObject.cxx b/panda/src/pgraph/cullableObject.cxx index 0630513696..d38fc4f5a4 100644 --- a/panda/src/pgraph/cullableObject.cxx +++ b/panda/src/pgraph/cullableObject.cxx @@ -142,10 +142,15 @@ munge_geom(GraphicsStateGuardianBase *gsg, GeomMunger *munger, DCAST(ShaderAttrib, ShaderAttrib::make())->set_flag(ShaderAttrib::F_hardware_skinning, true)); _state = _state->compose(state); } - } - StateMunger *state_munger = (StateMunger *)munger; - _state = state_munger->munge_state(_state); + gsg->ensure_generated_shader(_state); + } else { + // We may need to munge the state for the fixed-function pipeline. + StateMunger *state_munger = (StateMunger *)munger; + if (state_munger->should_munge_state()) { + _state = state_munger->munge_state(_state); + } + } // If there is any animation left in the vertex data after it has been // munged--that is, we couldn't arrange to handle the animation in diff --git a/panda/src/pgraph/renderState.h b/panda/src/pgraph/renderState.h index 4e2b2e3384..1052687314 100644 --- a/panda/src/pgraph/renderState.h +++ b/panda/src/pgraph/renderState.h @@ -219,6 +219,7 @@ public: // declare this as a ShaderAttrib because that would create a circular // include-file dependency problem. Aaargh. mutable CPT(RenderAttrib) _generated_shader; + mutable UpdateSeq _generated_shader_seq; private: // This mutex protects _states. It also protects any modification to the @@ -363,6 +364,7 @@ private: friend class GraphicsStateGuardian; friend class RenderAttribRegistry; friend class Extension; + friend class ShaderGenerator; friend class StateMunger; }; diff --git a/panda/src/pgraph/stateMunger.I b/panda/src/pgraph/stateMunger.I index edd3f6669b..7e35a2450c 100644 --- a/panda/src/pgraph/stateMunger.I +++ b/panda/src/pgraph/stateMunger.I @@ -16,6 +16,15 @@ */ INLINE StateMunger:: StateMunger(GraphicsStateGuardianBase *gsg) : - GeomMunger(gsg) + GeomMunger(gsg), + _should_munge_state(false) { } + +/** + * Returns true if this munger has something interesting to do to the state. + */ +INLINE bool StateMunger:: +should_munge_state() const { + return _should_munge_state; +} diff --git a/panda/src/pgraph/stateMunger.h b/panda/src/pgraph/stateMunger.h index 467eaaf604..c7fa6ef862 100644 --- a/panda/src/pgraph/stateMunger.h +++ b/panda/src/pgraph/stateMunger.h @@ -30,9 +30,12 @@ public: virtual ~StateMunger(); CPT(RenderState) munge_state(const RenderState *state); + INLINE bool should_munge_state() const; + protected: virtual CPT(RenderState) munge_state_impl(const RenderState *state); + bool _should_munge_state; public: static TypeHandle get_class_type() { diff --git a/panda/src/pgraphnodes/shaderGenerator.cxx b/panda/src/pgraphnodes/shaderGenerator.cxx index 0b4dc68a62..b25eb338fb 100644 --- a/panda/src/pgraphnodes/shaderGenerator.cxx +++ b/panda/src/pgraphnodes/shaderGenerator.cxx @@ -51,6 +51,14 @@ TypeHandle ShaderGenerator::_type_handle; #ifdef HAVE_CG +#define PACK_COMBINE(src0, op0, src1, op1, src2, op2) ( \ + ((uint16_t)src0) | ((((uint16_t)op0 - 1u) & 3u) << 3u) | \ + ((uint16_t)src1 << 5u) | ((((uint16_t)op1 - 1u) & 3u) << 8u) | \ + ((uint16_t)src2 << 10u) | ((((uint16_t)op2 - 1u) & 3u) << 13u)) + +#define UNPACK_COMBINE_SRC(from, n) (TextureStage::CombineSource)((from >> ((uint16_t)n * 5u)) & 7u) +#define UNPACK_COMBINE_OP(from, n) (TextureStage::CombineOperand)(((from >> (((uint16_t)n * 5u) + 3u)) & 3u) + 1u) + static PStatCollector lookup_collector("*:Munge:ShaderGen:Lookup"); static PStatCollector synthesize_collector("*:Munge:ShaderGen:Synthesize"); @@ -60,7 +68,7 @@ static PStatCollector synthesize_collector("*:Munge:ShaderGen:Synthesize"); * shader generator belongs. */ ShaderGenerator:: -ShaderGenerator(GraphicsStateGuardianBase *gsg) { +ShaderGenerator(const GraphicsStateGuardianBase *gsg) { // The ATTR# input semantics seem to map to generic vertex attributes in // both arbvp1 and glslv, which behave more consistently. However, they // don't exist in Direct3D 9. Use this silly little check for now. @@ -298,10 +306,17 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) { Texture *tex = texture->get_on_texture(stage); nassertd(tex != nullptr) continue; + // Mark this TextureStage as having been used by the shader generator, so + // that the next time its properties change, it will cause the state to be + // rehashed to ensure that the shader is regenerated if needed. + stage->mark_used_by_auto_shader(); + ShaderKey::TextureInfo info; info._type = tex->get_texture_type(); info._mode = stage->get_mode(); info._flags = 0; + info._combine_rgb = 0u; + info._combine_alpha = 0u; // While we look at the mode, determine whether we need to change the mode // in order to reflect disabled features. @@ -357,6 +372,40 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) { info._flags = ShaderKey::TF_map_normal | ShaderKey::TF_map_gloss; } break; + + case TextureStage::M_combine: + // If we have this rare, special mode, we encode all these extra + // parameters as flags to prevent bloating the shader key. + info._flags |= (uint32_t)stage->get_combine_rgb_mode() << ShaderKey::TF_COMBINE_RGB_MODE_SHIFT; + info._flags |= (uint32_t)stage->get_combine_alpha_mode() << ShaderKey::TF_COMBINE_ALPHA_MODE_SHIFT; + if (stage->get_rgb_scale() == 2) { + info._flags |= ShaderKey::TF_rgb_scale_2; + } + if (stage->get_rgb_scale() == 4) { + info._flags |= ShaderKey::TF_rgb_scale_4; + } + if (stage->get_alpha_scale() == 2) { + info._flags |= ShaderKey::TF_alpha_scale_2; + } + if (stage->get_alpha_scale() == 4) { + info._flags |= ShaderKey::TF_alpha_scale_4; + } + info._combine_rgb = PACK_COMBINE( + stage->get_combine_rgb_source0(), stage->get_combine_rgb_operand0(), + stage->get_combine_rgb_source1(), stage->get_combine_rgb_operand1(), + stage->get_combine_rgb_source2(), stage->get_combine_rgb_operand2()); + info._combine_alpha = PACK_COMBINE( + stage->get_combine_alpha_source0(), stage->get_combine_alpha_operand0(), + stage->get_combine_alpha_source1(), stage->get_combine_alpha_operand1(), + stage->get_combine_alpha_source2(), stage->get_combine_alpha_operand2()); + + if (stage->uses_primary_color()) { + info._flags |= ShaderKey::TF_uses_primary_color; + } + if (stage->uses_last_saved_result()) { + info._flags |= ShaderKey::TF_uses_last_saved_result; + } + break; } // In fact, perhaps this stage should be disabled altogether? @@ -417,11 +466,9 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) { info._gen_mode = TexGenAttrib::M_off; } - // If we have this rare, special mode, just include a pointer to the - // TextureStage object, because I can't be bothered to bloat the shader - // key with all these extra relevant properties. - if (stage->get_mode() == TextureStage::M_combine) { - info._stage = stage; + // Does this stage require saving its result? + if (stage->get_saved_result()) { + info._flags |= ShaderKey::TF_saved_result; } // Does this stage need a texcolor_# input? @@ -433,6 +480,17 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) { key._texture_flags |= info._flags; } + // Does nothing use the saved result? If so, don't bother saving it. + if ((key._texture_flags & ShaderKey::TF_uses_last_saved_result) == 0 && + (key._texture_flags & ShaderKey::TF_saved_result) != 0) { + + pvector::iterator it; + for (it = key._textures.begin(); it != key._textures.end(); ++it) { + (*it)._flags &= ~ShaderKey::TF_saved_result; + } + key._texture_flags &= ~ShaderKey::TF_saved_result; + } + // Decide whether to separate ambient and diffuse calculations. if (have_ambient) { if (key._material_flags & Material::F_ambient) { @@ -468,6 +526,74 @@ analyze_renderstate(ShaderKey &key, const RenderState *rs) { } } +/** + * Rehashes all the states with generated shaders, removing the ones that are + * no longer fresh. + * + * Call this if certain state has changed in such a way as to require a rerun + * of the shader generator. This should be rare because in most cases, the + * shader generator will automatically regenerate shaders as necessary. + */ +INLINE void ShaderGenerator:: +rehash_generated_shaders() { + LightReMutexHolder holder(*RenderState::_states_lock); + + // With uniquify-states turned on, we can actually go through all the states + // and check whether their generated shader is still OK. + size_t size = RenderState::_states->get_num_entries(); + for (size_t si = 0; si < size; ++si) { + const RenderState *state = RenderState::_states->get_key(si); + + if (state->_generated_shader != nullptr) { + ShaderKey key; + analyze_renderstate(key, state); + + GeneratedShaders::const_iterator si; + si = _generated_shaders.find(key); + if (si != _generated_shaders.end()) { + if (si->second != state->_generated_shader) { + state->_generated_shader = si->second; + state->_munged_states.clear(); + } + } else { + // We have not yet generated a shader for this modified state. + state->_generated_shader.clear(); + state->_munged_states.clear(); + } + } + } + + // If we don't have uniquify-states, however, the above list won't contain + // all the state. We can change a global seq value to require Panda to + // rehash the states the next time it tries to render an object with it. + if (!uniquify_states) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } +} + +/** + * Removes all previously generated shaders, requiring all shaders to be + * regenerated. Does not clear cache of compiled shaders. + */ +INLINE void ShaderGenerator:: +clear_generated_shaders() { + LightReMutexHolder holder(*RenderState::_states_lock); + + size_t size = RenderState::_states->get_num_entries(); + for (size_t si = 0; si < size; ++si) { + const RenderState *state = RenderState::_states->get_key(si); + state->_generated_shader.clear(); + } + + _generated_shaders.clear(); + + // If we don't have uniquify-states, we can't clear all the ShaderAttribs + // that are cached on the states, but we can simulate the effect of that. + if (!uniquify_states) { + GraphicsStateGuardianBase::mark_rehash_generated_shaders(); + } +} + /** * This is the routine that implements the next-gen fixed function pipeline by * synthesizing a shader. It also takes care of setting up any buffers needed @@ -1256,24 +1382,12 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) { } } - // Loop first to see if something is using primary_color or - // last_saved_result. - bool have_saved_result = false; - bool have_primary_color = false; - for (size_t i = 0; i < key._textures.size(); ++i) { - const ShaderKey::TextureInfo &tex = key._textures[i]; - if (tex._stage == nullptr) { - continue; - } - - if (tex._stage->uses_primary_color() && !have_primary_color) { - text << "\t float4 primary_color = result;\n"; - have_primary_color = true; - } - if (tex._stage->uses_last_saved_result() && !have_saved_result) { - text << "\t float4 last_saved_result = result;\n"; - have_saved_result = true; - } + // Store these if any stages will use it. + if (key._texture_flags & ShaderKey::TF_uses_primary_color) { + text << "\t float4 primary_color = result;\n"; + } + if (key._texture_flags & ShaderKey::TF_uses_last_saved_result) { + text << "\t float4 last_saved_result = result;\n"; } // Now loop through the textures to compose our magic blending formulas. @@ -1315,24 +1429,21 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) { } break; case TextureStage::M_combine: - // Only in the case of M_combine have we filled in the _stage pointer. text << "\t result.rgb = "; - if (tex._stage->get_combine_rgb_mode() != TextureStage::CM_undefined) { - text << combine_mode_as_string(tex._stage, tex._stage->get_combine_rgb_mode(), false, i); - } else { - text << "tex" << i << ".rgb"; + text << combine_mode_as_string(tex, (TextureStage::CombineMode)((tex._flags & ShaderKey::TF_COMBINE_RGB_MODE_MASK) >> ShaderKey::TF_COMBINE_RGB_MODE_SHIFT), false, i); + if (tex._flags & ShaderKey::TF_rgb_scale_2) { + text << " * 2"; } - if (tex._stage->get_rgb_scale() != 1) { - text << " * " << tex._stage->get_rgb_scale(); + if (tex._flags & ShaderKey::TF_rgb_scale_4) { + text << " * 4"; } text << ";\n\t result.a = "; - if (tex._stage->get_combine_alpha_mode() != TextureStage::CM_undefined) { - text << combine_mode_as_string(tex._stage, tex._stage->get_combine_alpha_mode(), true, i); - } else { - text << "tex" << i << ".a"; + text << combine_mode_as_string(tex, (TextureStage::CombineMode)((tex._flags & ShaderKey::TF_COMBINE_ALPHA_MODE_MASK) >> ShaderKey::TF_COMBINE_ALPHA_MODE_SHIFT), false, i); + if (tex._flags & ShaderKey::TF_alpha_scale_2) { + text << " * 2"; } - if (tex._stage->get_alpha_scale() != 1) { - text << " * " << tex._stage->get_alpha_scale(); + if (tex._flags & ShaderKey::TF_alpha_scale_4) { + text << " * 4"; } text << ";\n"; break; @@ -1342,7 +1453,7 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) { default: break; } - if ((tex._flags & ShaderKey::TF_saved_result) != 0 && have_saved_result) { + if (tex._flags & ShaderKey::TF_saved_result) { text << "\t last_saved_result = result;\n"; } } @@ -1469,53 +1580,53 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) { * This 'synthesizes' a combine mode into a string. */ const string ShaderGenerator:: -combine_mode_as_string(CPT(TextureStage) stage, TextureStage::CombineMode c_mode, bool alpha, short texindex) { +combine_mode_as_string(const ShaderKey::TextureInfo &info, TextureStage::CombineMode c_mode, bool alpha, short texindex) { ostringstream text; switch (c_mode) { - case TextureStage::CM_modulate: - text << combine_source_as_string(stage, 0, alpha, alpha, texindex); - text << " * "; - text << combine_source_as_string(stage, 1, alpha, alpha, texindex); - break; - case TextureStage::CM_add: - text << combine_source_as_string(stage, 0, alpha, alpha, texindex); - text << " + "; - text << combine_source_as_string(stage, 1, alpha, alpha, texindex); - break; - case TextureStage::CM_add_signed: - text << combine_source_as_string(stage, 0, alpha, alpha, texindex); - text << " + "; - text << combine_source_as_string(stage, 1, alpha, alpha, texindex); - if (alpha) { - text << " - 0.5"; - } else { - text << " - float3(0.5, 0.5, 0.5)"; - } - break; - case TextureStage::CM_interpolate: - text << "lerp("; - text << combine_source_as_string(stage, 1, alpha, alpha, texindex); - text << ", "; - text << combine_source_as_string(stage, 0, alpha, alpha, texindex); - text << ", "; - text << combine_source_as_string(stage, 2, alpha, true, texindex); - text << ")"; - break; - case TextureStage::CM_subtract: - text << combine_source_as_string(stage, 0, alpha, alpha, texindex); - text << " + "; - text << combine_source_as_string(stage, 1, alpha, alpha, texindex); - break; - case TextureStage::CM_dot3_rgb: - pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGB not yet supported in per-pixel mode.\n"; - break; - case TextureStage::CM_dot3_rgba: - pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGBA not yet supported in per-pixel mode.\n"; - break; - case TextureStage::CM_replace: - default: // Not sure if this is correct as default value. - text << combine_source_as_string(stage, 0, alpha, alpha, texindex); - break; + case TextureStage::CM_modulate: + text << combine_source_as_string(info, 0, alpha, alpha, texindex); + text << " * "; + text << combine_source_as_string(info, 1, alpha, alpha, texindex); + break; + case TextureStage::CM_add: + text << combine_source_as_string(info, 0, alpha, alpha, texindex); + text << " + "; + text << combine_source_as_string(info, 1, alpha, alpha, texindex); + break; + case TextureStage::CM_add_signed: + text << combine_source_as_string(info, 0, alpha, alpha, texindex); + text << " + "; + text << combine_source_as_string(info, 1, alpha, alpha, texindex); + if (alpha) { + text << " - 0.5"; + } else { + text << " - float3(0.5, 0.5, 0.5)"; + } + break; + case TextureStage::CM_interpolate: + text << "lerp("; + text << combine_source_as_string(info, 1, alpha, alpha, texindex); + text << ", "; + text << combine_source_as_string(info, 0, alpha, alpha, texindex); + text << ", "; + text << combine_source_as_string(info, 2, alpha, true, texindex); + text << ")"; + break; + case TextureStage::CM_subtract: + text << combine_source_as_string(info, 0, alpha, alpha, texindex); + text << " + "; + text << combine_source_as_string(info, 1, alpha, alpha, texindex); + break; + case TextureStage::CM_dot3_rgb: + pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGB not yet supported in per-pixel mode.\n"; + break; + case TextureStage::CM_dot3_rgba: + pgraphnodes_cat.error() << "TextureStage::CombineMode DOT3_RGBA not yet supported in per-pixel mode.\n"; + break; + case TextureStage::CM_replace: + default: // Not sure if this is correct as default value. + text << combine_source_as_string(info, 0, alpha, alpha, texindex); + break; } return text.str(); } @@ -1524,39 +1635,15 @@ combine_mode_as_string(CPT(TextureStage) stage, TextureStage::CombineMode c_mode * This 'synthesizes' a combine source into a string. */ const string ShaderGenerator:: -combine_source_as_string(CPT(TextureStage) stage, short num, bool alpha, bool single_value, short texindex) { - TextureStage::CombineSource c_src = TextureStage::CS_undefined; - TextureStage::CombineOperand c_op = TextureStage::CO_undefined; - if (alpha) { - switch (num) { - case 0: - c_src = stage->get_combine_alpha_source0(); - c_op = stage->get_combine_alpha_operand0(); - break; - case 1: - c_src = stage->get_combine_alpha_source1(); - c_op = stage->get_combine_alpha_operand1(); - break; - case 2: - c_src = stage->get_combine_alpha_source2(); - c_op = stage->get_combine_alpha_operand2(); - break; - } +combine_source_as_string(const ShaderKey::TextureInfo &info, short num, bool alpha, bool single_value, short texindex) { + TextureStage::CombineSource c_src; + TextureStage::CombineOperand c_op; + if (!alpha) { + c_src = UNPACK_COMBINE_SRC(info._combine_rgb, num); + c_op = UNPACK_COMBINE_OP(info._combine_rgb, num); } else { - switch (num) { - case 0: - c_src = stage->get_combine_rgb_source0(); - c_op = stage->get_combine_rgb_operand0(); - break; - case 1: - c_src = stage->get_combine_rgb_source1(); - c_op = stage->get_combine_rgb_operand1(); - break; - case 2: - c_src = stage->get_combine_rgb_source2(); - c_op = stage->get_combine_rgb_operand2(); - break; - } + c_src = UNPACK_COMBINE_SRC(info._combine_alpha, num); + c_op = UNPACK_COMBINE_OP(info._combine_alpha, num); } ostringstream csource; if (c_op == TextureStage::CO_one_minus_src_color || @@ -1688,8 +1775,11 @@ operator < (const ShaderKey &other) const { if (tex._flags != other_tex._flags) { return tex._flags < other_tex._flags; } - if (tex._stage != other_tex._stage) { - return tex._stage < other_tex._stage; + if (tex._combine_rgb != other_tex._combine_rgb) { + return tex._combine_rgb < other_tex._combine_rgb; + } + if (tex._combine_alpha != other_tex._combine_alpha) { + return tex._combine_alpha < other_tex._combine_alpha; } } if (_lights.size() != other._lights.size()) { @@ -1763,7 +1853,8 @@ operator == (const ShaderKey &other) const { tex._mode != other_tex._mode || tex._gen_mode != other_tex._gen_mode || tex._flags != other_tex._flags || - tex._stage != other_tex._stage) { + tex._combine_rgb != other_tex._combine_rgb || + tex._combine_alpha != other_tex._combine_alpha) { return false; } } diff --git a/panda/src/pgraphnodes/shaderGenerator.h b/panda/src/pgraphnodes/shaderGenerator.h index dc68d90d66..28a563cc25 100644 --- a/panda/src/pgraphnodes/shaderGenerator.h +++ b/panda/src/pgraphnodes/shaderGenerator.h @@ -65,18 +65,15 @@ class GeomVertexAnimationSpec; */ class EXPCL_PANDA_PGRAPHNODES ShaderGenerator : public TypedReferenceCount { PUBLISHED: - ShaderGenerator(GraphicsStateGuardianBase *gsg); + ShaderGenerator(const GraphicsStateGuardianBase *gsg); virtual ~ShaderGenerator(); virtual CPT(ShaderAttrib) synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim); -protected: - static const string combine_mode_as_string(CPT(TextureStage) stage, - TextureStage::CombineMode c_mode, bool alpha, short texindex); - static const string combine_source_as_string(CPT(TextureStage) stage, - short num, bool alpha, bool single_value, short texindex); - static const string texture_type_as_string(Texture::TextureType ttype); + INLINE void rehash_generated_shaders(); + INLINE void clear_generated_shaders(); +protected: // Shader register allocation: bool _use_generic_attr; @@ -101,16 +98,28 @@ protected: GeomVertexAnimationSpec _anim_spec; enum TextureFlags { - TF_has_rgb = 1, - TF_has_alpha = 2, - TF_has_texscale = 4, - TF_has_texmat = 8, - TF_saved_result = 16, - TF_map_normal = 32, - TF_map_height = 64, - TF_map_glow = 128, - TF_map_gloss = 256, - TF_uses_color = 512, + TF_has_rgb = 0x001, + TF_has_alpha = 0x002, + TF_has_texscale = 0x004, + TF_has_texmat = 0x008, + TF_saved_result = 0x010, + TF_map_normal = 0x020, + TF_map_height = 0x040, + TF_map_glow = 0x080, + TF_map_gloss = 0x100, + TF_uses_color = 0x200, + TF_uses_primary_color = 0x400, + TF_uses_last_saved_result = 0x800, + + TF_rgb_scale_2 = 0x1000, + TF_rgb_scale_4 = 0x2000, + TF_alpha_scale_2 = 0x4000, + TF_alpha_scale_4 = 0x8000, + + TF_COMBINE_RGB_MODE_SHIFT = 16, + TF_COMBINE_RGB_MODE_MASK = 0x0000f0000, + TF_COMBINE_ALPHA_MODE_SHIFT = 20, + TF_COMBINE_ALPHA_MODE_MASK = 0x000f00000, }; ColorAttrib::Type _color_type; @@ -123,9 +132,8 @@ protected: TextureStage::Mode _mode; TexGenAttrib::Mode _gen_mode; int _flags; - - // Stored only if combine modes / blend color is used - CPT(TextureStage) _stage; + uint16_t _combine_rgb; + uint16_t _combine_alpha; }; pvector _textures; @@ -160,6 +168,12 @@ protected: void analyze_renderstate(ShaderKey &key, const RenderState *rs); + static const string combine_mode_as_string(const ShaderKey::TextureInfo &info, + TextureStage::CombineMode c_mode, bool alpha, short texindex); + static const string combine_source_as_string(const ShaderKey::TextureInfo &info, + short num, bool alpha, bool single_value, short texindex); + static const string texture_type_as_string(Texture::TextureType ttype); + public: static TypeHandle get_class_type() { return _type_handle;