From b781547956f7a576ee4af896fb0728d4005f9b95 Mon Sep 17 00:00:00 2001 From: rdb Date: Sat, 11 Jan 2014 15:06:22 +0000 Subject: [PATCH] a tiny optimisation to use less registers --- panda/src/pgraphnodes/shaderGenerator.cxx | 142 ++++++++++------------ 1 file changed, 64 insertions(+), 78 deletions(-) diff --git a/panda/src/pgraphnodes/shaderGenerator.cxx b/panda/src/pgraphnodes/shaderGenerator.cxx index 9f9218ed46..2ff2b5e3ce 100644 --- a/panda/src/pgraphnodes/shaderGenerator.cxx +++ b/panda/src/pgraphnodes/shaderGenerator.cxx @@ -95,6 +95,14 @@ alloc_vreg() { case 5: _vtregs_used += 1; return (char*)"TEXCOORD5"; case 6: _vtregs_used += 1; return (char*)"TEXCOORD6"; case 7: _vtregs_used += 1; return (char*)"TEXCOORD7"; + } + switch (_vcregs_used) { + case 0: _vcregs_used += 1; return (char*)"COLOR0"; + case 1: _vcregs_used += 1; return (char*)"COLOR1"; + } + // These don't exist in arbvp1, though they're reportedly + // supported by other profiles. + switch (_vtregs_used) { case 8: _vtregs_used += 1; return (char*)"TEXCOORD8"; case 9: _vtregs_used += 1; return (char*)"TEXCOORD9"; case 10: _vtregs_used += 1; return (char*)"TEXCOORD10"; @@ -104,24 +112,6 @@ alloc_vreg() { case 14: _vtregs_used += 1; return (char*)"TEXCOORD14"; case 15: _vtregs_used += 1; return (char*)"TEXCOORD15"; } - switch (_vcregs_used) { - case 0: _vcregs_used += 1; return (char*)"COLOR0"; - case 1: _vcregs_used += 1; return (char*)"COLOR1"; - case 2: _vcregs_used += 1; return (char*)"COLOR2"; - case 3: _vcregs_used += 1; return (char*)"COLOR3"; - case 4: _vcregs_used += 1; return (char*)"COLOR4"; - case 5: _vcregs_used += 1; return (char*)"COLOR5"; - case 6: _vcregs_used += 1; return (char*)"COLOR6"; - case 7: _vcregs_used += 1; return (char*)"COLOR7"; - case 8: _vcregs_used += 1; return (char*)"COLOR8"; - case 9: _vcregs_used += 1; return (char*)"COLOR9"; - case 10: _vcregs_used += 1; return (char*)"COLOR10"; - case 11: _vcregs_used += 1; return (char*)"COLOR11"; - case 12: _vcregs_used += 1; return (char*)"COLOR12"; - case 13: _vcregs_used += 1; return (char*)"COLOR13"; - case 14: _vcregs_used += 1; return (char*)"COLOR14"; - case 15: _vcregs_used += 1; return (char*)"COLOR15"; - } return (char*)"UNKNOWN"; } @@ -141,6 +131,14 @@ alloc_freg() { case 5: _ftregs_used += 1; return (char*)"TEXCOORD5"; case 6: _ftregs_used += 1; return (char*)"TEXCOORD6"; case 7: _ftregs_used += 1; return (char*)"TEXCOORD7"; + } + switch (_fcregs_used) { + case 0: _fcregs_used += 1; return (char*)"COLOR0"; + case 1: _fcregs_used += 1; return (char*)"COLOR1"; + } + // These don't exist in arbvp1/arbfp1, though they're + // reportedly supported by other profiles. + switch (_ftregs_used) { case 8: _ftregs_used += 1; return (char*)"TEXCOORD8"; case 9: _ftregs_used += 1; return (char*)"TEXCOORD9"; case 10: _ftregs_used += 1; return (char*)"TEXCOORD10"; @@ -150,24 +148,6 @@ alloc_freg() { case 14: _ftregs_used += 1; return (char*)"TEXCOORD14"; case 15: _ftregs_used += 1; return (char*)"TEXCOORD15"; } - switch (_fcregs_used) { - case 0: _fcregs_used += 1; return (char*)"COLOR0"; - case 1: _fcregs_used += 1; return (char*)"COLOR1"; - case 2: _fcregs_used += 1; return (char*)"COLOR2"; - case 3: _fcregs_used += 1; return (char*)"COLOR3"; - case 4: _fcregs_used += 1; return (char*)"COLOR4"; - case 5: _fcregs_used += 1; return (char*)"COLOR5"; - case 6: _fcregs_used += 1; return (char*)"COLOR6"; - case 7: _fcregs_used += 1; return (char*)"COLOR7"; - case 8: _fcregs_used += 1; return (char*)"COLOR8"; - case 9: _fcregs_used += 1; return (char*)"COLOR9"; - case 10: _fcregs_used += 1; return (char*)"COLOR10"; - case 11: _fcregs_used += 1; return (char*)"COLOR11"; - case 12: _fcregs_used += 1; return (char*)"COLOR12"; - case 13: _fcregs_used += 1; return (char*)"COLOR13"; - case 14: _fcregs_used += 1; return (char*)"COLOR14"; - case 15: _fcregs_used += 1; return (char*)"COLOR15"; - } return (char*)"UNKNOWN"; } @@ -230,7 +210,7 @@ analyze_renderstate(const RenderState *rs) { _out_aux_normal = (outputs & AuxBitplaneAttrib::ABO_aux_normal) ? true:false; _out_aux_glow = (outputs & AuxBitplaneAttrib::ABO_aux_glow) ? true:false; _out_aux_any = (_out_aux_normal || _out_aux_glow); - + if (_out_aux_normal) { _need_eye_normal = true; } @@ -285,7 +265,7 @@ analyze_renderstate(const RenderState *rs) { // See if there is a normal map, height map, gloss map, or glow map. // Also check if anything has TexGen. - + const TexGenAttrib *tex_gen = DCAST(TexGenAttrib, rs->get_attrib_def(TexGenAttrib::get_class_slot())); for (int i=0; i<_num_textures; i++) { TextureStage *stage = texture->get_on_stage(i); @@ -336,7 +316,6 @@ analyze_renderstate(const RenderState *rs) { if (la->get_num_on_lights() > 0) { _lighting = true; - _need_eye_position = true; _need_eye_normal = true; } @@ -390,6 +369,13 @@ analyze_renderstate(const RenderState *rs) { } else if (_map_index_gloss >= 0) { _have_specular = true; } + + if (_plights.size() + _slights.size() > 0) { + _need_eye_position = true; + + } else if (_have_specular && _material->get_local()) { + _need_eye_position = true; + } } // Decide whether to separate ambient and diffuse calculations. @@ -521,34 +507,34 @@ CPT(RenderAttrib) ShaderGenerator:: create_shader_attrib(const string &txt) { PT(Shader) shader = Shader::make(txt); CPT(RenderAttrib) shattr = ShaderAttrib::make(); - shattr=DCAST(ShaderAttrib, shattr)->set_shader(shader); + shattr = DCAST(ShaderAttrib, shattr)->set_shader(shader); if (_lighting) { - for (int i=0; i<(int)_alights.size(); i++) { - shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("alight", i), _alights_np[i]); + for (int i=0; i < (int)_alights.size(); i++) { + shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("alight", i), _alights_np[i]); } - for (int i=0; i<(int)_dlights.size(); i++) { - shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlight", i), _dlights_np[i]); + for (int i=0; i < (int)_dlights.size(); i++) { + shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlight", i), _dlights_np[i]); if (_shadows && _dlights[i]->_shadow_caster) { PT(Texture) tex = update_shadow_buffer(_dlights_np[i]); if (tex == NULL) { pgraph_cat.error() << "Failed to create shadow buffer for DirectionalLight '" << _dlights[i]->get_name() << "'!\n"; } - shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlighttex", i), tex); + shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("dlighttex", i), tex); } else { _dlights[i]->clear_shadow_buffers(); } } - for (int i=0; i<(int)_plights.size(); i++) { - shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("plight", i), _plights_np[i]); + for (int i=0; i < (int)_plights.size(); i++) { + shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("plight", i), _plights_np[i]); } - for (int i=0; i<(int)_slights.size(); i++) { - shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slight", i), _slights_np[i]); + for (int i=0; i < (int)_slights.size(); i++) { + shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slight", i), _slights_np[i]); if (_shadows && _slights[i]->_shadow_caster) { PT(Texture) tex = update_shadow_buffer(_slights_np[i]); if (tex == NULL) { pgraph_cat.error() << "Failed to create shadow buffer for Spotlight '" << _slights[i]->get_name() << "'!\n"; } - shattr=DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slighttex", i), tex); + shattr = DCAST(ShaderAttrib, shattr)->set_shader_input(InternalName::make("slighttex", i), tex); } else { _slights[i]->clear_shadow_buffers(); } @@ -574,7 +560,7 @@ update_shadow_buffer(NodePath light_np) { if (light == NULL || !light->_shadow_caster) { return NULL; } - + // See if we already have a buffer. If not, create one. PT(Texture) tex; if (light->_sbuffers.count(_gsg) == 0) { @@ -634,7 +620,6 @@ synthesize_shader(const RenderState *rs) { // These variables will hold the results of register allocation. - char *normal_vreg = 0; char *ntangent_vreg = 0; char *ntangent_freg = 0; char *nbinormal_vreg = 0; @@ -652,6 +637,7 @@ synthesize_shader(const RenderState *rs) { char *hpos_freg = 0; if (_vertex_colors) { + // Reserve COLOR0 _vcregs_used = 1; _fcregs_used = 1; } @@ -676,8 +662,8 @@ synthesize_shader(const RenderState *rs) { text << "\t out float4 l_texcoord" << i << " : " << texcoord_freg[i] << ",\n"; } if (_vertex_colors) { - text << "\t in float4 vtx_color : COLOR,\n"; - text << "\t out float4 l_color : COLOR,\n"; + text << "\t in float4 vtx_color : COLOR0,\n"; + text << "\t out float4 l_color : COLOR0,\n"; } if (_need_world_position || _need_world_normal) { text << "\t uniform float4x4 trans_model_to_world,\n"; @@ -701,8 +687,7 @@ synthesize_shader(const RenderState *rs) { text << "\t out float4 l_eye_normal : " << eye_normal_freg << ",\n"; } if (_map_index_height >= 0 || _need_world_normal || _need_eye_normal) { - normal_vreg = alloc_vreg(); - text << "\t in float4 vtx_normal : " << normal_vreg << ",\n"; + text << "\t in float4 vtx_normal : NORMAL,\n"; } if (_map_index_height >= 0) { htangent_vreg = alloc_vreg(); @@ -722,6 +707,7 @@ synthesize_shader(const RenderState *rs) { if (_map_index_normal != _map_index_height) { ntangent_vreg = alloc_vreg(); nbinormal_vreg = alloc_vreg(); + // NB. If we used TANGENT and BINORMAL, Cg would have them overlap with TEXCOORD6-7. text << "\t in float4 vtx_tangent" << _map_index_normal << " : " << ntangent_vreg << ",\n"; text << "\t in float4 vtx_binormal" << _map_index_normal << " : " << nbinormal_vreg << ",\n"; } @@ -731,7 +717,7 @@ synthesize_shader(const RenderState *rs) { text << "\t out float4 l_binormal : " << nbinormal_freg << ",\n"; } if (_shadows && _auto_shadow_on) { - for (int i=0; i<(int)_dlights.size(); i++) { + for (int i=0; i < (int)_dlights.size(); i++) { if (_dlights[i]->_shadow_caster) { dlightcoord_freg.push_back(alloc_freg()); text << "\t uniform float4x4 trans_model_to_clip_of_dlight" << i << ",\n"; @@ -740,7 +726,7 @@ synthesize_shader(const RenderState *rs) { dlightcoord_freg.push_back(NULL); } } - for (int i=0; i<(int)_slights.size(); i++) { + for (int i=0; i < (int)_slights.size(); i++) { if (_slights[i]->_shadow_caster) { slightcoord_freg.push_back(alloc_freg()); text << "\t uniform float4x4 trans_model_to_clip_of_slight" << i << ",\n"; @@ -793,12 +779,12 @@ synthesize_shader(const RenderState *rs) { } if (_shadows && _auto_shadow_on) { text << "\t float4x4 biasmat = {0.5f, 0.0f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.0f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, 1.0f};\n"; - for (int i=0; i<(int)_dlights.size(); i++) { + for (int i=0; i < (int)_dlights.size(); i++) { if (_dlights[i]->_shadow_caster) { text << "\t l_dlightcoord" << i << " = mul(biasmat, mul(trans_model_to_clip_of_dlight" << i << ", vtx_position));\n"; } } - for (int i=0; i<(int)_slights.size(); i++) { + for (int i=0; i < (int)_slights.size(); i++) { if (_slights[i]->_shadow_caster) { text << "\t l_slightcoord" << i << " = mul(biasmat, mul(trans_model_to_clip_of_slight" << i << ", vtx_position));\n"; } @@ -827,7 +813,7 @@ synthesize_shader(const RenderState *rs) { if (_need_world_normal) { text << "\t in float4 l_world_normal : " << world_normal_freg << ",\n"; } - if (_need_eye_position) { + if (_need_eye_position) { text << "\t in float4 l_eye_position : " << eye_position_freg << ",\n"; } if (_need_eye_normal) { @@ -851,10 +837,10 @@ synthesize_shader(const RenderState *rs) { text << "\t in float3 l_binormal : " << nbinormal_freg << ",\n"; } if (_lighting) { - for (int i=0; i<(int)_alights.size(); i++) { + for (int i=0; i < (int)_alights.size(); i++) { text << "\t uniform float4 alight_alight" << i << ",\n"; } - for (int i=0; i<(int)_dlights.size(); i++) { + for (int i=0; i < (int)_dlights.size(); i++) { text << "\t uniform float4x4 dlight_dlight" << i << "_rel_view,\n"; if (_shadows && _dlights[i]->_shadow_caster && _auto_shadow_on) { if (_use_shadow_filter) { @@ -865,10 +851,10 @@ synthesize_shader(const RenderState *rs) { text << "\t in float4 l_dlightcoord" << i << " : " << dlightcoord_freg[i] << ",\n"; } } - for (int i=0; i<(int)_plights.size(); i++) { + for (int i=0; i < (int)_plights.size(); i++) { text << "\t uniform float4x4 plight_plight" << i << "_rel_view,\n"; } - for (int i=0; i<(int)_slights.size(); i++) { + for (int i=0; i < (int)_slights.size(); i++) { text << "\t uniform float4x4 slight_slight" << i << "_rel_view,\n"; text << "\t uniform float4 satten_slight" << i << ",\n"; if (_shadows && _slights[i]->_shadow_caster && _auto_shadow_on) { @@ -899,7 +885,7 @@ synthesize_shader(const RenderState *rs) { } text << "\t out float4 o_color : COLOR0,\n"; if (_vertex_colors) { - text << "\t in float4 l_color : COLOR,\n"; + text << "\t in float4 l_color : COLOR0,\n"; } else { text << "\t uniform float4 attr_color,\n"; } @@ -917,7 +903,7 @@ synthesize_shader(const RenderState *rs) { } text << "\t float4 result;\n"; if (_out_aux_any) { - text << "\t o_aux = float4(0,0,0,0);\n"; + text << "\t o_aux = float4(0, 0, 0, 0);\n"; } // Now generate any texture coordinates according to TexGenAttrib. If it has a TexMatrixAttrib, also transform them. for (int i=0; i<_num_textures; i++) { @@ -959,11 +945,11 @@ synthesize_shader(const RenderState *rs) { case Texture::TT_2d_texture_array: text << "xyz"; break; - case Texture::TT_2d_texture: - text << "xy"; + case Texture::TT_2d_texture: + text << "xy"; break; case Texture::TT_1d_texture: - text << "x"; + text << "x"; break; default: break; @@ -1058,7 +1044,7 @@ synthesize_shader(const RenderState *rs) { text << "\t float shininess = 50; // no shininess specified, using default\n"; } } - for (int i=0; i<(int)_alights.size(); i++) { + for (int i=0; i < (int)_alights.size(); i++) { text << "\t // Ambient Light " << i << "\n"; text << "\t lcolor = alight_alight" << i << ";\n"; if (_separate_ambient_diffuse && _have_ambient) { @@ -1067,7 +1053,7 @@ synthesize_shader(const RenderState *rs) { text << "\t tot_diffuse += lcolor;\n"; } } - for (int i=0; i<(int)_dlights.size(); i++) { + for (int i=0; i < (int)_dlights.size(); i++) { text << "\t // Directional Light " << i << "\n"; text << "\t lcolor = dlight_dlight" << i << "_rel_view[0];\n"; text << "\t lspec = dlight_dlight" << i << "_rel_view[1];\n"; @@ -1095,7 +1081,7 @@ synthesize_shader(const RenderState *rs) { text << "\t tot_specular += lspec;\n"; } } - for (int i=0; i<(int)_plights.size(); i++) { + for (int i=0; i < (int)_plights.size(); i++) { text << "\t // Point Light " << i << "\n"; text << "\t lcolor = plight_plight" << i << "_rel_view[0];\n"; text << "\t lspec = plight_plight" << i << "_rel_view[1];\n"; @@ -1113,14 +1099,14 @@ synthesize_shader(const RenderState *rs) { if (_material->get_local()) { text << "\t lhalf = normalize(lvec - normalize(l_eye_position));\n"; } else { - text << "\t lhalf = normalize(lvec - float4(0,1,0,0));\n"; + text << "\t lhalf = normalize(lvec - float4(0, 1, 0, 0));\n"; } text << "\t lspec *= lattenv;\n"; text << "\t lspec *= pow(saturate(dot(l_eye_normal.xyz, lhalf.xyz)), shininess);\n"; text << "\t tot_specular += lspec;\n"; } } - for (int i=0; i<(int)_slights.size(); i++) { + for (int i=0; i < (int)_slights.size(); i++) { text << "\t // Spot Light " << i << "\n"; text << "\t lcolor = slight_slight" << i << "_rel_view[0];\n"; text << "\t lspec = slight_slight" << i << "_rel_view[1];\n"; @@ -1431,7 +1417,7 @@ synthesize_shader(const RenderState *rs) { case Fog::M_linear: text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate((attr_fog.z - l_hpos.z) * attr_fog.w));\n"; break; - case Fog::M_exponential: + case Fog::M_exponential: // 1.442695f = 1 / log(2) text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate(exp2(attr_fog.x * l_hpos.z * -1.442695f)));\n"; break; case Fog::M_exponential_squared: @@ -1454,10 +1440,10 @@ synthesize_shader(const RenderState *rs) { // Insert the shader into the shader attrib. CPT(RenderAttrib) shattr = create_shader_attrib(text.str()); if (_subsume_alpha_test) { - shattr=DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_subsume_alpha_test, true); + shattr = DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_subsume_alpha_test, true); } if (_disable_alpha_write) { - shattr=DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_disable_alpha_write, true); + shattr = DCAST(ShaderAttrib, shattr)->set_flag(ShaderAttrib::F_disable_alpha_write, true); } clear_analysis(); reset_register_allocator();