a tiny optimisation to use less registers

This commit is contained in:
rdb 2014-01-11 15:06:22 +00:00
parent 9c27cea403
commit b781547956

View File

@ -95,6 +95,14 @@ alloc_vreg() {
case 5: _vtregs_used += 1; return (char*)"TEXCOORD5"; case 5: _vtregs_used += 1; return (char*)"TEXCOORD5";
case 6: _vtregs_used += 1; return (char*)"TEXCOORD6"; case 6: _vtregs_used += 1; return (char*)"TEXCOORD6";
case 7: _vtregs_used += 1; return (char*)"TEXCOORD7"; case 7: _vtregs_used += 1; return (char*)"TEXCOORD7";
}
switch (_vcregs_used) {
case 0: _vcregs_used += 1; return (char*)"COLOR0";
case 1: _vcregs_used += 1; return (char*)"COLOR1";
}
// These don't exist in arbvp1, though they're reportedly
// supported by other profiles.
switch (_vtregs_used) {
case 8: _vtregs_used += 1; return (char*)"TEXCOORD8"; case 8: _vtregs_used += 1; return (char*)"TEXCOORD8";
case 9: _vtregs_used += 1; return (char*)"TEXCOORD9"; case 9: _vtregs_used += 1; return (char*)"TEXCOORD9";
case 10: _vtregs_used += 1; return (char*)"TEXCOORD10"; case 10: _vtregs_used += 1; return (char*)"TEXCOORD10";
@ -104,24 +112,6 @@ alloc_vreg() {
case 14: _vtregs_used += 1; return (char*)"TEXCOORD14"; case 14: _vtregs_used += 1; return (char*)"TEXCOORD14";
case 15: _vtregs_used += 1; return (char*)"TEXCOORD15"; case 15: _vtregs_used += 1; return (char*)"TEXCOORD15";
} }
switch (_vcregs_used) {
case 0: _vcregs_used += 1; return (char*)"COLOR0";
case 1: _vcregs_used += 1; return (char*)"COLOR1";
case 2: _vcregs_used += 1; return (char*)"COLOR2";
case 3: _vcregs_used += 1; return (char*)"COLOR3";
case 4: _vcregs_used += 1; return (char*)"COLOR4";
case 5: _vcregs_used += 1; return (char*)"COLOR5";
case 6: _vcregs_used += 1; return (char*)"COLOR6";
case 7: _vcregs_used += 1; return (char*)"COLOR7";
case 8: _vcregs_used += 1; return (char*)"COLOR8";
case 9: _vcregs_used += 1; return (char*)"COLOR9";
case 10: _vcregs_used += 1; return (char*)"COLOR10";
case 11: _vcregs_used += 1; return (char*)"COLOR11";
case 12: _vcregs_used += 1; return (char*)"COLOR12";
case 13: _vcregs_used += 1; return (char*)"COLOR13";
case 14: _vcregs_used += 1; return (char*)"COLOR14";
case 15: _vcregs_used += 1; return (char*)"COLOR15";
}
return (char*)"UNKNOWN"; return (char*)"UNKNOWN";
} }
@ -141,6 +131,14 @@ alloc_freg() {
case 5: _ftregs_used += 1; return (char*)"TEXCOORD5"; case 5: _ftregs_used += 1; return (char*)"TEXCOORD5";
case 6: _ftregs_used += 1; return (char*)"TEXCOORD6"; case 6: _ftregs_used += 1; return (char*)"TEXCOORD6";
case 7: _ftregs_used += 1; return (char*)"TEXCOORD7"; case 7: _ftregs_used += 1; return (char*)"TEXCOORD7";
}
switch (_fcregs_used) {
case 0: _fcregs_used += 1; return (char*)"COLOR0";
case 1: _fcregs_used += 1; return (char*)"COLOR1";
}
// These don't exist in arbvp1/arbfp1, though they're
// reportedly supported by other profiles.
switch (_ftregs_used) {
case 8: _ftregs_used += 1; return (char*)"TEXCOORD8"; case 8: _ftregs_used += 1; return (char*)"TEXCOORD8";
case 9: _ftregs_used += 1; return (char*)"TEXCOORD9"; case 9: _ftregs_used += 1; return (char*)"TEXCOORD9";
case 10: _ftregs_used += 1; return (char*)"TEXCOORD10"; case 10: _ftregs_used += 1; return (char*)"TEXCOORD10";
@ -150,24 +148,6 @@ alloc_freg() {
case 14: _ftregs_used += 1; return (char*)"TEXCOORD14"; case 14: _ftregs_used += 1; return (char*)"TEXCOORD14";
case 15: _ftregs_used += 1; return (char*)"TEXCOORD15"; case 15: _ftregs_used += 1; return (char*)"TEXCOORD15";
} }
switch (_fcregs_used) {
case 0: _fcregs_used += 1; return (char*)"COLOR0";
case 1: _fcregs_used += 1; return (char*)"COLOR1";
case 2: _fcregs_used += 1; return (char*)"COLOR2";
case 3: _fcregs_used += 1; return (char*)"COLOR3";
case 4: _fcregs_used += 1; return (char*)"COLOR4";
case 5: _fcregs_used += 1; return (char*)"COLOR5";
case 6: _fcregs_used += 1; return (char*)"COLOR6";
case 7: _fcregs_used += 1; return (char*)"COLOR7";
case 8: _fcregs_used += 1; return (char*)"COLOR8";
case 9: _fcregs_used += 1; return (char*)"COLOR9";
case 10: _fcregs_used += 1; return (char*)"COLOR10";
case 11: _fcregs_used += 1; return (char*)"COLOR11";
case 12: _fcregs_used += 1; return (char*)"COLOR12";
case 13: _fcregs_used += 1; return (char*)"COLOR13";
case 14: _fcregs_used += 1; return (char*)"COLOR14";
case 15: _fcregs_used += 1; return (char*)"COLOR15";
}
return (char*)"UNKNOWN"; return (char*)"UNKNOWN";
} }
@ -336,7 +316,6 @@ analyze_renderstate(const RenderState *rs) {
if (la->get_num_on_lights() > 0) { if (la->get_num_on_lights() > 0) {
_lighting = true; _lighting = true;
_need_eye_position = true;
_need_eye_normal = true; _need_eye_normal = true;
} }
@ -390,6 +369,13 @@ analyze_renderstate(const RenderState *rs) {
} else if (_map_index_gloss >= 0) { } else if (_map_index_gloss >= 0) {
_have_specular = true; _have_specular = true;
} }
if (_plights.size() + _slights.size() > 0) {
_need_eye_position = true;
} else if (_have_specular && _material->get_local()) {
_need_eye_position = true;
}
} }
// Decide whether to separate ambient and diffuse calculations. // Decide whether to separate ambient and diffuse calculations.
@ -634,7 +620,6 @@ synthesize_shader(const RenderState *rs) {
// These variables will hold the results of register allocation. // These variables will hold the results of register allocation.
char *normal_vreg = 0;
char *ntangent_vreg = 0; char *ntangent_vreg = 0;
char *ntangent_freg = 0; char *ntangent_freg = 0;
char *nbinormal_vreg = 0; char *nbinormal_vreg = 0;
@ -652,6 +637,7 @@ synthesize_shader(const RenderState *rs) {
char *hpos_freg = 0; char *hpos_freg = 0;
if (_vertex_colors) { if (_vertex_colors) {
// Reserve COLOR0
_vcregs_used = 1; _vcregs_used = 1;
_fcregs_used = 1; _fcregs_used = 1;
} }
@ -676,8 +662,8 @@ synthesize_shader(const RenderState *rs) {
text << "\t out float4 l_texcoord" << i << " : " << texcoord_freg[i] << ",\n"; text << "\t out float4 l_texcoord" << i << " : " << texcoord_freg[i] << ",\n";
} }
if (_vertex_colors) { if (_vertex_colors) {
text << "\t in float4 vtx_color : COLOR,\n"; text << "\t in float4 vtx_color : COLOR0,\n";
text << "\t out float4 l_color : COLOR,\n"; text << "\t out float4 l_color : COLOR0,\n";
} }
if (_need_world_position || _need_world_normal) { if (_need_world_position || _need_world_normal) {
text << "\t uniform float4x4 trans_model_to_world,\n"; text << "\t uniform float4x4 trans_model_to_world,\n";
@ -701,8 +687,7 @@ synthesize_shader(const RenderState *rs) {
text << "\t out float4 l_eye_normal : " << eye_normal_freg << ",\n"; text << "\t out float4 l_eye_normal : " << eye_normal_freg << ",\n";
} }
if (_map_index_height >= 0 || _need_world_normal || _need_eye_normal) { if (_map_index_height >= 0 || _need_world_normal || _need_eye_normal) {
normal_vreg = alloc_vreg(); text << "\t in float4 vtx_normal : NORMAL,\n";
text << "\t in float4 vtx_normal : " << normal_vreg << ",\n";
} }
if (_map_index_height >= 0) { if (_map_index_height >= 0) {
htangent_vreg = alloc_vreg(); htangent_vreg = alloc_vreg();
@ -722,6 +707,7 @@ synthesize_shader(const RenderState *rs) {
if (_map_index_normal != _map_index_height) { if (_map_index_normal != _map_index_height) {
ntangent_vreg = alloc_vreg(); ntangent_vreg = alloc_vreg();
nbinormal_vreg = alloc_vreg(); nbinormal_vreg = alloc_vreg();
// NB. If we used TANGENT and BINORMAL, Cg would have them overlap with TEXCOORD6-7.
text << "\t in float4 vtx_tangent" << _map_index_normal << " : " << ntangent_vreg << ",\n"; text << "\t in float4 vtx_tangent" << _map_index_normal << " : " << ntangent_vreg << ",\n";
text << "\t in float4 vtx_binormal" << _map_index_normal << " : " << nbinormal_vreg << ",\n"; text << "\t in float4 vtx_binormal" << _map_index_normal << " : " << nbinormal_vreg << ",\n";
} }
@ -899,7 +885,7 @@ synthesize_shader(const RenderState *rs) {
} }
text << "\t out float4 o_color : COLOR0,\n"; text << "\t out float4 o_color : COLOR0,\n";
if (_vertex_colors) { if (_vertex_colors) {
text << "\t in float4 l_color : COLOR,\n"; text << "\t in float4 l_color : COLOR0,\n";
} else { } else {
text << "\t uniform float4 attr_color,\n"; text << "\t uniform float4 attr_color,\n";
} }
@ -1431,7 +1417,7 @@ synthesize_shader(const RenderState *rs) {
case Fog::M_linear: case Fog::M_linear:
text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate((attr_fog.z - l_hpos.z) * attr_fog.w));\n"; text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate((attr_fog.z - l_hpos.z) * attr_fog.w));\n";
break; break;
case Fog::M_exponential: case Fog::M_exponential: // 1.442695f = 1 / log(2)
text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate(exp2(attr_fog.x * l_hpos.z * -1.442695f)));\n"; text << "\t result.rgb = lerp(attr_fogcolor.rgb, result.rgb, saturate(exp2(attr_fog.x * l_hpos.z * -1.442695f)));\n";
break; break;
case Fog::M_exponential_squared: case Fog::M_exponential_squared: