diff --git a/panda/src/putil/Sources.pp b/panda/src/putil/Sources.pp
index cf0dade95c..88607ac97e 100644
--- a/panda/src/putil/Sources.pp
+++ b/panda/src/putil/Sources.pp
@@ -51,6 +51,7 @@
     modifierButtons.I modifierButtons.h mouseButton.h \
     mouseData.I mouseData.h nameUniquifier.I nameUniquifier.h \
     nodeCachedReferenceCount.h nodeCachedReferenceCount.I \
+    pbitops.I pbitops.h \
     portalMask.h \
     pta_double.h \
     pta_float.h pta_int.h \
@@ -98,6 +99,7 @@
     modifierButtons.cxx mouseButton.cxx mouseData.cxx \
     nameUniquifier.cxx \
     nodeCachedReferenceCount.cxx \
+    pbitops.cxx \
     pta_double.cxx pta_float.cxx \
     pta_int.cxx pta_ushort.cxx \
     simpleHashMap.cxx \
@@ -156,6 +158,7 @@
     nameUniquifier.I nameUniquifier.h \
     nodeCachedReferenceCount.h nodeCachedReferenceCount.I \
     portalMask.h \
+    pbitops.I pbitops.h \
     pta_double.h \
     pta_float.h pta_int.h pta_ushort.h \
     simpleHashMap.I simpleHashMap.h \
diff --git a/panda/src/putil/bitMask.I b/panda/src/putil/bitMask.I
index e552813a56..58359009b0 100644
--- a/panda/src/putil/bitMask.I
+++ b/panda/src/putil/bitMask.I
@@ -462,12 +462,7 @@ get_lowest_off_bit() const {
 template<class WType, int nbits>
 INLINE int BitMask<WType, nbits>::
 get_highest_on_bit() const {
-  if (_word == 0) {
-    return -1;
-  }
-
-  WordType w = ::flood_bits_down(_word);
-  return count_bits_in_word(w) - 1;
+  return ::get_highest_on_bit(_word);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -871,88 +866,6 @@ init_type() {
   register_type(_type_handle, str.str());
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: count_bits_in_word
-//  Description: Returns the number of 1 bits in the indicated word.
-////////////////////////////////////////////////////////////////////
-INLINE int
-count_bits_in_word(PN_uint32 x) {
-  return (int)num_bits_on[x & 0xffff] + (int)num_bits_on[(x >> 16) & 0xffff];
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: count_bits_in_word
-//  Description: Returns the number of 1 bits in the indicated word.
-////////////////////////////////////////////////////////////////////
-INLINE int
-count_bits_in_word(PN_uint64 x) {
-  return count_bits_in_word((PN_uint32)x) + count_bits_in_word((PN_uint32)(x >> 32));
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_down
-//  Description: Returns a value such that every bit at or below the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint32
-flood_bits_down(PN_uint32 x) {
-  x |= (x >> 1);
-  x |= (x >> 2);
-  x |= (x >> 4);
-  x |= (x >> 8);
-  x |= (x >> 16);
-  return x;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_down
-//  Description: Returns a value such that every bit at or below the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint64
-flood_bits_down(PN_uint64 x) {
-  x |= (x >> 1);
-  x |= (x >> 2);
-  x |= (x >> 4);
-  x |= (x >> 8);
-  x |= (x >> 16);
-  x |= (x >> 32);
-  return x;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_up
-//  Description: Returns a value such that every bit at or above the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint32
-flood_bits_up(PN_uint32 x) {
-  x |= (x << 1);
-  x |= (x << 2);
-  x |= (x << 4);
-  x |= (x << 8);
-  x |= (x << 16);
-  return x;
-}
-
-////////////////////////////////////////////////////////////////////
-//     Function: flood_bits_up
-//  Description: Returns a value such that every bit at or above the
-//               highest bit in x is 1.
-////////////////////////////////////////////////////////////////////
-INLINE PN_uint64
-flood_bits_up(PN_uint64 x) {
-  x |= (x << 1);
-  x |= (x << 2);
-  x |= (x << 4);
-  x |= (x << 8);
-  x |= (x << 16);
-  x |= (x << 32);
-  return x;
-}
-
-
-
 ////////////////////////////////////////////////////////////////////
 //     Function: BitMask::flood_up_in_place
 //       Access: Published
diff --git a/panda/src/putil/bitMask.cxx b/panda/src/putil/bitMask.cxx
index 5c47ba29fc..7396bb49ff 100644
--- a/panda/src/putil/bitMask.cxx
+++ b/panda/src/putil/bitMask.cxx
@@ -22,5 +22,3 @@
 #ifdef __GNUC__
 #pragma implementation
 #endif
-
-unsigned char num_bits_on[65536];
diff --git a/panda/src/putil/bitMask.h b/panda/src/putil/bitMask.h
index be1b3475ce..132a57819d 100644
--- a/panda/src/putil/bitMask.h
+++ b/panda/src/putil/bitMask.h
@@ -20,7 +20,7 @@
 #define BITMASK_H
 
 #include "pandabase.h"
-
+#include "pbitops.h"
 #include "numeric_types.h"
 #include "typedObject.h"
 #include "indent.h"
@@ -151,16 +151,6 @@ private:
   static TypeHandle _type_handle;
 };
 
-INLINE int count_bits_in_word(PN_uint32 x);
-INLINE int count_bits_in_word(PN_uint64 x);
-INLINE PN_uint32 flood_bits_down(PN_uint32 x);
-INLINE PN_uint64 flood_bits_down(PN_uint64 x);
-INLINE PN_uint32 flood_bits_up(PN_uint32 x);
-INLINE PN_uint64 flood_bits_up(PN_uint64 x);
-
-// This table precomputes the number of on bits in each 16-bit word.
-extern EXPCL_PANDA_PUTIL unsigned char num_bits_on[65536];
-
 #include "bitMask.I"
 
 template<class WType, int nbits>
diff --git a/panda/src/putil/pbitops.I b/panda/src/putil/pbitops.I
new file mode 100644
index 0000000000..9ad6e5ef8d
--- /dev/null
+++ b/panda/src/putil/pbitops.I
@@ -0,0 +1,146 @@
+// Filename: pbitops.I
+// Created by:  drose (10May08)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// panda3d-general@lists.sourceforge.net .
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: count_bits_in_word
+//  Description: Returns the number of 1 bits in the indicated word.
+////////////////////////////////////////////////////////////////////
+INLINE int
+count_bits_in_word(PN_uint32 x) {
+  return (int)num_bits_on[x & 0xffff] + (int)num_bits_on[(x >> 16) & 0xffff];
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: count_bits_in_word
+//  Description: Returns the number of 1 bits in the indicated word.
+////////////////////////////////////////////////////////////////////
+INLINE int
+count_bits_in_word(PN_uint64 x) {
+  return count_bits_in_word((PN_uint32)x) + count_bits_in_word((PN_uint32)(x >> 32));
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_down
+//  Description: Returns a value such that every bit at or below the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint32
+flood_bits_down(PN_uint32 x) {
+  x |= (x >> 1);
+  x |= (x >> 2);
+  x |= (x >> 4);
+  x |= (x >> 8);
+  x |= (x >> 16);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_down
+//  Description: Returns a value such that every bit at or below the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint64
+flood_bits_down(PN_uint64 x) {
+  x |= (x >> 1);
+  x |= (x >> 2);
+  x |= (x >> 4);
+  x |= (x >> 8);
+  x |= (x >> 16);
+  x |= (x >> 32);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_up
+//  Description: Returns a value such that every bit at or above the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint32
+flood_bits_up(PN_uint32 x) {
+  x |= (x << 1);
+  x |= (x << 2);
+  x |= (x << 4);
+  x |= (x << 8);
+  x |= (x << 16);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: flood_bits_up
+//  Description: Returns a value such that every bit at or above the
+//               highest bit in x is 1.
+////////////////////////////////////////////////////////////////////
+INLINE PN_uint64
+flood_bits_up(PN_uint64 x) {
+  x |= (x << 1);
+  x |= (x << 2);
+  x |= (x << 4);
+  x |= (x << 8);
+  x |= (x << 16);
+  x |= (x << 32);
+  return x;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_highest_on_bit
+//  Description: Returns the index of the highest 1 bit in the word.
+//               Returns -1 if there are no 1 bits.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_highest_on_bit(PN_uint32 x) {
+  PN_uint32 w = flood_bits_down(x);
+  return count_bits_in_word(w) - 1;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_highest_on_bit
+//  Description: Returns the index of the highest 1 bit in the word.
+//               Returns -1 if there are no 1 bits.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_highest_on_bit(PN_uint64 x) {
+  PN_uint64 w = flood_bits_down(x);
+  return count_bits_in_word(w) - 1;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_next_higher_bit
+//  Description: Returns the smallest power of 2 greater than x.
+//
+//               Returns the smallest number n such that (1 << n) is
+//               larger than x.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_next_higher_bit(PN_uint32 x) {
+  PN_uint32 w = flood_bits_down(x);
+  return count_bits_in_word(w);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: get_next_higher_bit
+//  Description: Returns the smallest power of 2 greater than x.
+//
+//               Returns the smallest number n such that (1 << n) is
+//               larger than x.
+////////////////////////////////////////////////////////////////////
+INLINE int
+get_next_higher_bit(PN_uint64 x) {
+  PN_uint64 w = flood_bits_down(x);
+  return count_bits_in_word(w);
+}
diff --git a/panda/src/putil/pbitops.cxx b/panda/src/putil/pbitops.cxx
new file mode 100644
index 0000000000..8b83aa4ee8
--- /dev/null
+++ b/panda/src/putil/pbitops.cxx
@@ -0,0 +1,21 @@
+// Filename: pbitops.cxx
+// Created by:  drose (10May08)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// panda3d-general@lists.sourceforge.net .
+//
+////////////////////////////////////////////////////////////////////
+
+#include "pbitops.h"
+
+unsigned char num_bits_on[65536];
diff --git a/panda/src/putil/pbitops.h b/panda/src/putil/pbitops.h
new file mode 100644
index 0000000000..f79aa40f1e
--- /dev/null
+++ b/panda/src/putil/pbitops.h
@@ -0,0 +1,49 @@
+// Filename: pbitops.h
+// Created by:  drose (10May08)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) 2001 - 2004, Disney Enterprises, Inc.  All rights reserved
+//
+// All use of this software is subject to the terms of the Panda 3d
+// Software license.  You should have received a copy of this license
+// along with this source code; you will also find a current copy of
+// the license at http://etc.cmu.edu/panda3d/docs/license/ .
+//
+// To contact the maintainers of this program write to
+// panda3d-general@lists.sourceforge.net .
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef PBITOPS_H
+#define PBITOPS_H
+
+#include "pandabase.h"
+#include "numeric_types.h"
+
+////////////////////////////////////////////////////////////////////
+// This file defines a few low-level bit-operation routines, optimized
+// all to heck.
+////////////////////////////////////////////////////////////////////
+
+INLINE int count_bits_in_word(PN_uint32 x);
+INLINE int count_bits_in_word(PN_uint64 x);
+
+INLINE PN_uint32 flood_bits_down(PN_uint32 x);
+INLINE PN_uint64 flood_bits_down(PN_uint64 x);
+INLINE PN_uint32 flood_bits_up(PN_uint32 x);
+INLINE PN_uint64 flood_bits_up(PN_uint64 x);
+
+INLINE int get_highest_on_bit(PN_uint32 x);
+INLINE int get_highest_on_bit(PN_uint64 x);
+
+INLINE int get_next_higher_bit(PN_uint32 x);
+INLINE int get_next_higher_bit(PN_uint64 x);
+
+// This table precomputes the number of on bits in each 16-bit word.
+extern EXPCL_PANDA_PUTIL unsigned char num_bits_on[65536];
+
+#include "pbitops.I"
+
+#endif
diff --git a/panda/src/putil/putil_composite2.cxx b/panda/src/putil/putil_composite2.cxx
index 82deb04a46..8c506f3ef3 100644
--- a/panda/src/putil/putil_composite2.cxx
+++ b/panda/src/putil/putil_composite2.cxx
@@ -8,6 +8,7 @@
 #include "mouseData.cxx"
 #include "nameUniquifier.cxx"
 #include "nodeCachedReferenceCount.cxx"
+#include "pbitops.cxx"
 #include "pta_double.cxx"
 #include "pta_float.cxx"
 #include "pta_int.cxx"
diff --git a/panda/src/tinydisplay/config_tinydisplay.cxx b/panda/src/tinydisplay/config_tinydisplay.cxx
index ce4af6ab9f..675ddb1727 100644
--- a/panda/src/tinydisplay/config_tinydisplay.cxx
+++ b/panda/src/tinydisplay/config_tinydisplay.cxx
@@ -70,6 +70,17 @@ ConfigVariableInt td_texture_ram
           "frame, even if this means this limit remains exceeded.)  "
           "Set it to -1 for no limit."));
 
+ConfigVariableBool td_ignore_mipmaps
+  ("td-ignore-mipmaps", false,
+   PRC_DESC("Configure this true to disable use of mipmaps on the "
+            "tinydisplay software renderer."));
+
+ConfigVariableBool td_perspective_textures
+  ("td-perspective-textures", true,
+   PRC_DESC("Configure this false to disable use of perspective-correct "
+            "textures on the tinydisplay software renderer, for a small "
+            "performance gain."));
+
 ////////////////////////////////////////////////////////////////////
 //     Function: init_libtinydisplay
 //  Description: Initializes the library.  This must be called at
diff --git a/panda/src/tinydisplay/config_tinydisplay.h b/panda/src/tinydisplay/config_tinydisplay.h
index 0559360c64..b76ba558c4 100644
--- a/panda/src/tinydisplay/config_tinydisplay.h
+++ b/panda/src/tinydisplay/config_tinydisplay.h
@@ -35,5 +35,7 @@ extern ConfigVariableInt x_wheel_up_button;
 extern ConfigVariableInt x_wheel_down_button;
 
 extern ConfigVariableInt td_texture_ram;
+extern ConfigVariableBool td_ignore_mipmaps;
+extern ConfigVariableBool td_perspective_textures;
 
 #endif
diff --git a/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx b/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx
index d51f18d06b..f760a8ee54 100644
--- a/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx
+++ b/panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx
@@ -598,6 +598,15 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
     }
   }
 
+  if (_c->texture_2d_enabled && _texture_replace) {
+    // We don't need the vertex color or lighting calculation after
+    // all, since the current texture will just hide all of that.
+    needs_color = false;
+    needs_normal = false;
+  }
+
+  bool lighting_enabled = (needs_normal && _c->lighting_enabled);
+
   for (i = 0; i < num_used_vertices; ++i) {
     GLVertex *v = &_vertices[i];
     const LVecBase4f &d = rvertex.get_data4f();
@@ -642,18 +651,18 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
 
     v->color = _c->current_color;
 
-    if (needs_normal) {
+    if (lighting_enabled) {
       const LVecBase3f &d = rnormal.get_data3f();
       _c->current_normal.X = d[0];
       _c->current_normal.Y = d[1];
       _c->current_normal.Z = d[2];
       _c->current_normal.W = 0.0f;
-    }
 
-    gl_vertex_transform(_c, v);
-
-    if (_c->lighting_enabled) {
+      gl_vertex_transform(_c, v);
       gl_shade_vertex(_c, v);
+
+    } else {
+      gl_vertex_transform(_c, v);
     }
 
     if (v->clip_code == 0) {
@@ -666,16 +675,16 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   // Set up the appropriate function callback for filling triangles,
   // according to the current state.
 
-  int depth_write_state = 0;
+  int depth_write_state = 0;  // zon
   if (_target._depth_write->get_mode() != DepthWriteAttrib::M_on) {
-    depth_write_state = 1;
+    depth_write_state = 1;  // zoff
   }
 
-  int color_write_state = 0;
+  int color_write_state = 0;  // noblend
   switch (_target._transparency->get_mode()) {
   case TransparencyAttrib::M_alpha:
   case TransparencyAttrib::M_dual:
-    color_write_state = 1;
+    color_write_state = 1;    // blend
     break;
 
   default:
@@ -685,36 +694,36 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   unsigned int color_channels =
     _target._color_write->get_channels() & _color_write_mask;
   if (color_channels == ColorWriteAttrib::C_off) {
-    color_write_state = 2;
+    color_write_state = 2;    // nocolor
   }
 
-  int alpha_test_state = 0;
+  int alpha_test_state = 0;   // anone
   switch (_target._alpha_test->get_mode()) {
   case AlphaTestAttrib::M_none:
   case AlphaTestAttrib::M_never:
   case AlphaTestAttrib::M_always:
   case AlphaTestAttrib::M_equal:
   case AlphaTestAttrib::M_not_equal:
-    alpha_test_state = 0;
+    alpha_test_state = 0;    // anone
     break;
 
   case AlphaTestAttrib::M_less:
   case AlphaTestAttrib::M_less_equal:
-    alpha_test_state = 1;
+    alpha_test_state = 1;    // aless
     _c->zb->reference_alpha = (unsigned int)_target._alpha_test->get_reference_alpha() * 0xff00;
     break;
 
   case AlphaTestAttrib::M_greater:
   case AlphaTestAttrib::M_greater_equal:
-    alpha_test_state = 2;
+    alpha_test_state = 2;    // amore
     _c->zb->reference_alpha = (unsigned int)_target._alpha_test->get_reference_alpha() * 0xff00;
     break;
   }
 
-  int depth_test_state = 1;
+  int depth_test_state = 1;    // zless
   _c->depth_test = 1;  // set this for ZB_line
   if (_target._depth_test->get_mode() == DepthTestAttrib::M_none) {
-    depth_test_state = 0;
+    depth_test_state = 0;      // zless
     _c->depth_test = 0;
   }
   
@@ -724,17 +733,17 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
     // as well use the flat shading model.
     shade_model = ShadeModelAttrib::M_flat;
   }
-  int shading_state = 2;  // smooth
+  int shade_model_state = 2;  // smooth
   _c->smooth_shade_model = true;
 
   if (shade_model == ShadeModelAttrib::M_flat) {
     _c->smooth_shade_model = false;
-    shading_state = 1;  // flat
+    shade_model_state = 1;  // flat
     if (_c->current_color.X == 1.0f &&
         _c->current_color.Y == 1.0f &&
         _c->current_color.Z == 1.0f &&
         _c->current_color.W == 1.0f) {
-      shading_state = 0;  // white
+      shade_model_state = 0;  // white
     }
   }
 
@@ -743,12 +752,18 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   if (_c->texture_2d_enabled) {
     texfilter_state = _texfilter_state;
     texturing_state = 2;  // perspective-correct textures
-    if (_c->matrix_model_projection_no_w_transform) {
+    if (_c->matrix_model_projection_no_w_transform || !td_perspective_textures) {
       texturing_state = 1;  // non-perspective-correct textures
     }
+
+    if (_texture_replace) {
+      // If we're completely replacing the underlying color, then it
+      // doesn't matter what the color is.
+      shade_model_state = 0;
+    }
   }
 
-  _c->zb_fill_tri = fill_tri_funcs[depth_write_state][color_write_state][alpha_test_state][depth_test_state][texfilter_state][shading_state][texturing_state];
+  _c->zb_fill_tri = fill_tri_funcs[depth_write_state][color_write_state][alpha_test_state][depth_test_state][texfilter_state][shade_model_state][texturing_state];
   
   return true;
 }
@@ -1592,6 +1607,16 @@ do_issue_texture() {
     
   // Then, turn on the current texture mode.
   apply_texture(tc);
+
+  // Set a few state cache values.
+  _texfilter_state = 0;    // nearest
+  if (texture->uses_mipmaps() && !td_ignore_mipmaps) {
+    _texfilter_state = 1;  // mipmap
+  }
+
+  // M_replace means M_replace; anything else is treated the same as
+  // M_modulate.
+  _texture_replace = (stage->get_mode() == TextureStage::M_replace);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1609,11 +1634,6 @@ apply_texture(TextureContext *tc) {
   _c->current_texture = gtc->_gltex;
   _c->texture_2d_enabled = true;
 
-  _texfilter_state = 0;
-  if (gtc->get_texture()->uses_mipmaps()) {
-    _texfilter_state = 1;
-  }
-
   GLTexture *gltex = gtc->_gltex;
 
   if (gtc->was_image_modified() || gltex->num_levels == 0) {
@@ -1808,13 +1828,15 @@ setup_gltex(GLTexture *gltex, int x_size, int y_size, int num_levels) {
 ////////////////////////////////////////////////////////////////////
 int TinyGraphicsStateGuardian::
 get_tex_shift(int orig_size) {
-  unsigned int filled = flood_bits_down((unsigned int)(orig_size - 1));
-  int size = filled + 1;
-  if (size != orig_size || size > _max_texture_dimension) {
+  if ((orig_size & (orig_size - 1)) != 0) {
+    // Not a power of 2.
+    return -1;
+  }
+  if (orig_size > _max_texture_dimension) {
     return -1;
   }
 
-  return count_bits_in_word((unsigned int)size - 1);
+  return count_bits_in_word((unsigned int)orig_size - 1);
 }
 
 ////////////////////////////////////////////////////////////////////
diff --git a/panda/src/tinydisplay/tinyGraphicsStateGuardian.h b/panda/src/tinydisplay/tinyGraphicsStateGuardian.h
index 2178a8493b..8bca73f6b4 100644
--- a/panda/src/tinydisplay/tinyGraphicsStateGuardian.h
+++ b/panda/src/tinydisplay/tinyGraphicsStateGuardian.h
@@ -136,6 +136,7 @@ private:
   };
   int _color_material_flags;
   int _texfilter_state;
+  bool _texture_replace;
 
   SimpleLru _textures_lru;
 
diff --git a/panda/src/tinydisplay/zbuffer.h b/panda/src/tinydisplay/zbuffer.h
index 05f159d378..eeb3182922 100644
--- a/panda/src/tinydisplay/zbuffer.h
+++ b/panda/src/tinydisplay/zbuffer.h
@@ -6,7 +6,7 @@
  */
 
 #include "zfeatures.h"
-#include "bitMask.h"
+#include "pbitops.h"
 
 typedef unsigned short ZPOINT;
 #define ZB_Z_BITS 16
@@ -23,7 +23,7 @@ typedef unsigned short ZPOINT;
 /* This is the theoretical max number of bits we have available to
    shift down to achieve each next mipmap level, based on the size of
    a 32-bit int.  We need to preallocate mipmap arrays of this size. */
-#define MAX_MIPMAP_LEVELS (32 - ZB_POINT_ST_FRAC_BITS)
+#define MAX_MIPMAP_LEVELS (32 - ZB_POINT_ST_FRAC_BITS + 1)
 
 /* Returns the index within a texture level for the given (s, t) texel. */
 #define ZB_TEXEL(level, s, t)                                         \
@@ -36,8 +36,15 @@ typedef unsigned short ZPOINT;
 #define ZB_LOOKUP_TEXTURE_NEAREST_MIPMAP(texture_levels, s, t, level) \
   ZB_LOOKUP_TEXTURE_NEAREST((texture_levels) + (level), (s) >> (level), (t) >> (level))
 
+/* A special abs() function which doesn't require any branching
+   instructions.  Might not work on some exotic hardware. */
+
+/* Also doesn't appear to be any faster in practice.  Guess gcc is
+   already doing the right thing.  Is msvc? */
+//#define FAST_ABS(v) (((v) ^ ((v) >> (sizeof(v) * 8 - 1))) - ((v) >> (sizeof(v) * 8 - 1)))
+
 #define DO_CALC_MIPMAP_LEVEL \
-    mipmap_level = count_bits_in_word(flood_bits_down((unsigned int)max(abs(dsdx), abs(dtdx)) >> ZB_POINT_ST_FRAC_BITS))
+  mipmap_level = get_next_higher_bit(((unsigned int)abs(dsdx) + (unsigned int)abs(dtdx)) >> ZB_POINT_ST_FRAC_BITS)
 
 #if 0
 /* Experiment with bilinear filtering.  Looks great, but seems to run
diff --git a/panda/src/tinydisplay/ztriangle.h b/panda/src/tinydisplay/ztriangle.h
index 3ad69b4a38..cbe3c02755 100644
--- a/panda/src/tinydisplay/ztriangle.h
+++ b/panda/src/tinydisplay/ztriangle.h
@@ -69,6 +69,8 @@
     return;
   fz = 1.0f / fz;
 
+  EARLY_OUT_FZ();
+
   fdx1 *= fz;
   fdy1 *= fz;
   fdx2 *= fz;
@@ -378,6 +380,7 @@
 #undef INTERP_STZ
 
 #undef EARLY_OUT
+#undef EARLY_OUT_FZ
 #undef DRAW_INIT
 #undef DRAW_LINE  
 #undef PUT_PIXEL
diff --git a/panda/src/tinydisplay/ztriangle.py b/panda/src/tinydisplay/ztriangle.py
index 8506376c08..ae664a6d53 100755
--- a/panda/src/tinydisplay/ztriangle.py
+++ b/panda/src/tinydisplay/ztriangle.py
@@ -30,7 +30,7 @@ Options = [
 # The various combinations of these options are explicit within
 # ztriangle_two.h.
 ExtraOptions = [
-    # shading
+    # shade model
     [ 'white', 'flat', 'smooth' ],
 
     # texturing
diff --git a/panda/src/tinydisplay/ztriangle_two.h b/panda/src/tinydisplay/ztriangle_two.h
index 3abcf73244..5adf45d070 100644
--- a/panda/src/tinydisplay/ztriangle_two.h
+++ b/panda/src/tinydisplay/ztriangle_two.h
@@ -7,18 +7,22 @@ static void FNAME(white_untextured) (ZBuffer *zb,
   {						\
   }
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()                             \
   {                                             \
   }
  
-#define PUT_PIXEL(_a)                                   \
-  {                                                     \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                       \
-    if (ZCMP(pz[_a], zz)) {                             \
-      STORE_PIX(pp[_a], 0xffffffffUL, 0xffffUL, 0xffffUL, 0xffffUL, 0xffffUL);     \
-      STORE_Z(pz[_a], zz);                              \
-    }                                                   \
-    z+=dzdx;                                            \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      STORE_PIX(pp[_a], 0xffffffffUL, 0xffffUL, 0xffffUL, 0xffffUL, 0xffffUL); \
+      STORE_Z(pz[_a], zz);                                              \
+    }                                                                   \
+    z+=dzdx;                                                            \
   }
 
 #include "ztriangle.h"
@@ -36,6 +40,10 @@ static void FNAME(flat_untextured) (ZBuffer *zb,
   {						\
   }
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()                             \
   {                                             \
     if (!ACMP(zb, p2->a)) {                     \
@@ -80,10 +88,14 @@ static void FNAME(smooth_untextured) (ZBuffer *zb,
     c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
     if (c0 == c1 && c0 == c2) {                         \
       /* It's really a flat-shaded triangle. */         \
-      FNAME(flat_untextured)(zb, p0, p1, p2);       \
+      FNAME(flat_untextured)(zb, p0, p1, p2);           \
       return;                                           \
     }                                                   \
   }
+
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
   
 #define DRAW_INIT() 				\
   {						\
@@ -120,16 +132,20 @@ static void FNAME(white_textured) (ZBuffer *zb,
   {						\
   }
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()				\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
   }
 
 #define PUT_PIXEL(_a)                                                   \
   {                                                                     \
     zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
     if (ZCMP(pz[_a], zz)) {                                             \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                           \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
       if (ACMP(zb, PIXEL_A(tmp))) {                                     \
         STORE_PIX(pp[_a], tmp, PIXEL_R(tmp), PIXEL_G(tmp), PIXEL_B(tmp), PIXEL_A(tmp)); \
         STORE_Z(pz[_a], zz);                                            \
@@ -156,37 +172,41 @@ static void FNAME(flat_textured) (ZBuffer *zb,
   {						\
   }
 
+#define EARLY_OUT_FZ() 				\
+  {						\
+  }
+
 #define DRAW_INIT()				\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     or0 = p2->r;                                \
     og0 = p2->g;                                \
     ob0 = p2->b;                                \
     oa0 = p2->a;                                \
   }
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa0 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,       \
-                                og0 * PIXEL_G(tmp) >> 16,       \
-                                ob0 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or0 * PIXEL_R(tmp) >> 16,                     \
-                  og0 * PIXEL_G(tmp) >> 16,                     \
-                  ob0 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa0 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,               \
+                                og0 * PIXEL_G(tmp) >> 16,               \
+                                ob0 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or0 * PIXEL_R(tmp) >> 16,                             \
+                  og0 * PIXEL_G(tmp) >> 16,                             \
+                  ob0 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
 
 #include "ztriangle.h"
@@ -201,55 +221,59 @@ static void FNAME(smooth_textured) (ZBuffer *zb,
 #define INTERP_ST
 #define INTERP_RGB
 
-#define EARLY_OUT()                                             \
-  {                                                             \
-    int c0, c1, c2;                                             \
-    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);             \
-    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);             \
-    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);             \
-    if (c0 == c1 && c0 == c2) {                                 \
-      /* It's really a flat-shaded triangle. */                 \
-      if (c0 == 0xffffffff) {                                   \
-        /* Actually, it's a white triangle. */                  \
+#define EARLY_OUT()                                     \
+  {                                                     \
+    int c0, c1, c2;                                     \
+    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);     \
+    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);     \
+    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
+    if (c0 == c1 && c0 == c2) {                         \
+      /* It's really a flat-shaded triangle. */         \
+      if (c0 == 0xffffffff) {                           \
+        /* Actually, it's a white triangle. */          \
         FNAME(white_textured)(zb, p0, p1, p2);          \
-        return;                                                 \
-      }                                                         \
-      FNAME(flat_textured)(zb, p0, p1, p2);        \
-      return;                                                   \
-    }                                                           \
+        return;                                         \
+      }                                                 \
+      FNAME(flat_textured)(zb, p0, p1, p2);             \
+      return;                                           \
+    }                                                   \
+  }
+
+#define EARLY_OUT_FZ() 				\
+  {						\
   }
 
 #define DRAW_INIT()                             \
   {                                             \
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
   }
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa1 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,       \
-                                og1 * PIXEL_G(tmp) >> 16,       \
-                                ob1 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or1 * PIXEL_R(tmp) >> 16,                     \
-                  og1 * PIXEL_G(tmp) >> 16,                     \
-                  ob1 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    og1+=dgdx;                                                  \
-    or1+=drdx;                                                  \
-    ob1+=dbdx;                                                  \
-    oa1+=dadx;                                                  \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa1 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,               \
+                                og1 * PIXEL_G(tmp) >> 16,               \
+                                ob1 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or1 * PIXEL_R(tmp) >> 16,                             \
+                  og1 * PIXEL_G(tmp) >> 16,                             \
+                  ob1 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    og1+=dgdx;                                                          \
+    or1+=drdx;                                                          \
+    ob1+=dbdx;                                                          \
+    oa1+=dadx;                                                          \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
 
 #include "ztriangle.h"
@@ -275,9 +299,19 @@ static void FNAME(white_perspective) (ZBuffer *zb,
   {						\
   }
 
+#define EARLY_OUT_FZ()                                                  \
+  {                                                                     \
+    if (fz > 0.001 || fz < -.001) {                                     \
+      /* This triangle is small enough not to worry about perspective   \
+         correction. */                                                 \
+      FNAME(white_textured)(zb, p0, p1, p2);                            \
+      return;                                                           \
+    }                                                                   \
+  }
+
 #define DRAW_INIT()				\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     fdzdx=(float)dzdx;                          \
     fndzdx=NB_INTERP * fdzdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
@@ -289,7 +323,7 @@ static void FNAME(white_perspective) (ZBuffer *zb,
   {                                                                     \
     zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
     if (ZCMP(pz[_a], zz)) {                                             \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                           \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
       if (ACMP(zb, PIXEL_A(tmp))) {                                     \
         STORE_PIX(pp[_a], tmp, PIXEL_R(tmp), PIXEL_G(tmp), PIXEL_B(tmp), PIXEL_A(tmp)); \
         STORE_Z(pz[_a], zz);                                            \
@@ -302,7 +336,7 @@ static void FNAME(white_perspective) (ZBuffer *zb,
 
 #define DRAW_LINE()                                     \
   {                                                     \
-    register ZPOINT *pz;                        \
+    register ZPOINT *pz;                                \
     register PIXEL *pp;                                 \
     register unsigned int s,t,z,zz;                     \
     register int n,dsdx,dtdx;                           \
@@ -320,11 +354,11 @@ static void FNAME(white_perspective) (ZBuffer *zb,
         float ss,tt;                                    \
         ss=(sz * zinv);                                 \
         tt=(tz * zinv);                                 \
-        s=(unsigned int) ss;                                     \
-        t=(unsigned int) tt;                                     \
+        s=(unsigned int) ss;                            \
+        t=(unsigned int) tt;                            \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
-        CALC_MIPMAP_LEVEL; \
+        CALC_MIPMAP_LEVEL;                              \
         fz+=fndzdx;                                     \
         zinv=1.0f / fz;                                 \
       }                                                 \
@@ -346,11 +380,11 @@ static void FNAME(white_perspective) (ZBuffer *zb,
       float ss,tt;                                      \
       ss=(sz * zinv);                                   \
       tt=(tz * zinv);                                   \
-      s=(unsigned int) ss;                                       \
-      t=(unsigned int) tt;                                       \
+      s=(unsigned int) ss;                              \
+      t=(unsigned int) tt;                              \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
-      CALC_MIPMAP_LEVEL; \
+      CALC_MIPMAP_LEVEL;                                \
     }                                                   \
     while (n>=0) {                                      \
       PUT_PIXEL(0);                                     \
@@ -383,9 +417,19 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
   {						\
   }
 
+#define EARLY_OUT_FZ()                                                  \
+  {                                                                     \
+    if (fz > 0.001 || fz < -.001) {                                     \
+      /* This triangle is small enough not to worry about perspective   \
+         correction. */                                                 \
+      FNAME(flat_textured)(zb, p0, p1, p2);                             \
+      return;                                                           \
+    }                                                                   \
+  }
+
 #define DRAW_INIT() 				\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     fdzdx=(float)dzdx;                          \
     fndzdx=NB_INTERP * fdzdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
@@ -396,33 +440,33 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
     oa0 = p2->a;                                \
   }
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa0 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,       \
-                                og0 * PIXEL_G(tmp) >> 16,       \
-                                ob0 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or0 * PIXEL_R(tmp) >> 16,                     \
-                  og0 * PIXEL_G(tmp) >> 16,                     \
-                  ob0 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa0 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or0 * PIXEL_R(tmp) >> 16,               \
+                                og0 * PIXEL_G(tmp) >> 16,               \
+                                ob0 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or0 * PIXEL_R(tmp) >> 16,                             \
+                  og0 * PIXEL_G(tmp) >> 16,                             \
+                  ob0 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
 
 #define DRAW_LINE()                                     \
   {                                                     \
-    register ZPOINT *pz;                        \
+    register ZPOINT *pz;                                \
     register PIXEL *pp;                                 \
     register unsigned int s,t,z,zz;                     \
     register int n,dsdx,dtdx;                           \
@@ -445,11 +489,11 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
         float ss,tt;                                    \
         ss=(sz * zinv);                                 \
         tt=(tz * zinv);                                 \
-        s=(unsigned int) ss;                                     \
-        t=(unsigned int) tt;                                     \
+        s=(unsigned int) ss;                            \
+        t=(unsigned int) tt;                            \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
-        CALC_MIPMAP_LEVEL; \
+        CALC_MIPMAP_LEVEL;                              \
         fz+=fndzdx;                                     \
         zinv=1.0f / fz;                                 \
       }                                                 \
@@ -471,11 +515,11 @@ static void FNAME(flat_perspective) (ZBuffer *zb,
       float ss,tt;                                      \
       ss=(sz * zinv);                                   \
       tt=(tz * zinv);                                   \
-      s=(unsigned int) ss;                                       \
-      t=(unsigned int) tt;                                       \
+      s=(unsigned int) ss;                              \
+      t=(unsigned int) tt;                              \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
-      CALC_MIPMAP_LEVEL; \
+      CALC_MIPMAP_LEVEL;                                \
     }                                                   \
     while (n>=0) {                                      \
       PUT_PIXEL(0);                                     \
@@ -502,64 +546,74 @@ static void FNAME(smooth_perspective) (ZBuffer *zb,
 #define INTERP_STZ
 #define INTERP_RGB
 
-#define EARLY_OUT()                                                     \
-  {                                                                     \
-    int c0, c1, c2;                                                     \
-    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);                     \
-    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);                     \
-    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);                     \
-    if (c0 == c1 && c0 == c2) {                                         \
-      /* It's really a flat-shaded triangle. */                         \
-      if (c0 == 0xffffffff) {                                           \
-        /* Actually, it's a white triangle. */                          \
+#define EARLY_OUT()                                     \
+  {                                                     \
+    int c0, c1, c2;                                     \
+    c0 = RGBA_TO_PIXEL(p0->r, p0->g, p0->b, p0->a);     \
+    c1 = RGBA_TO_PIXEL(p1->r, p1->g, p1->b, p1->a);     \
+    c2 = RGBA_TO_PIXEL(p2->r, p2->g, p2->b, p2->a);     \
+    if (c0 == c1 && c0 == c2) {                         \
+      /* It's really a flat-shaded triangle. */         \
+      if (c0 == 0xffffffff) {                           \
+        /* Actually, it's a white triangle. */          \
         FNAME(white_perspective)(zb, p0, p1, p2);       \
-        return;                                                         \
-      }                                                                 \
-      FNAME(flat_perspective)(zb, p0, p1, p2);     \
+        return;                                         \
+      }                                                 \
+      FNAME(flat_perspective)(zb, p0, p1, p2);          \
+      return;                                           \
+    }                                                   \
+  }
+
+#define EARLY_OUT_FZ()                                                  \
+  {                                                                     \
+    if (fz > 0.001 || fz < -.001) {                                     \
+      /* This triangle is small enough not to worry about perspective   \
+         correction. */                                                 \
+      FNAME(smooth_textured)(zb, p0, p1, p2);                           \
       return;                                                           \
     }                                                                   \
   }
 
 #define DRAW_INIT() 				\
   {						\
-    texture_levels = zb->current_texture;             \
+    texture_levels = zb->current_texture;       \
     fdzdx=(float)dzdx;                          \
     fndzdx=NB_INTERP * fdzdx;                   \
     ndszdx=NB_INTERP * dszdx;                   \
     ndtzdx=NB_INTERP * dtzdx;                   \
   }
 
-#define PUT_PIXEL(_a)                                           \
-  {                                                             \
-    zz=z >> ZB_POINT_Z_FRAC_BITS;                               \
-    if (ZCMP(pz[_a], zz)) {                                     \
-      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);                   \
-      int a = oa1 * PIXEL_A(tmp) >> 16;                         \
-      if (ACMP(zb, a)) {                                        \
-        STORE_PIX(pp[_a],                                       \
-                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,       \
-                                og1 * PIXEL_G(tmp) >> 16,       \
-                                ob1 * PIXEL_B(tmp) >> 16,       \
-                                a),                             \
-                  or1 * PIXEL_R(tmp) >> 16,                     \
-                  og1 * PIXEL_G(tmp) >> 16,                     \
-                  ob1 * PIXEL_B(tmp) >> 16,                     \
-                  a);                                           \
-        STORE_Z(pz[_a], zz);                                    \
-      }                                                         \
-    }                                                           \
-    z+=dzdx;                                                    \
-    og1+=dgdx;                                                  \
-    or1+=drdx;                                                  \
-    ob1+=dbdx;                                                  \
-    oa1+=dadx;                                                  \
-    s+=dsdx;                                                    \
-    t+=dtdx;                                                    \
+#define PUT_PIXEL(_a)                                                   \
+  {                                                                     \
+    zz=z >> ZB_POINT_Z_FRAC_BITS;                                       \
+    if (ZCMP(pz[_a], zz)) {                                             \
+      tmp = ZB_LOOKUP_TEXTURE(texture_levels, s, t, mipmap_level);      \
+      int a = oa1 * PIXEL_A(tmp) >> 16;                                 \
+      if (ACMP(zb, a)) {                                                \
+        STORE_PIX(pp[_a],                                               \
+                  RGBA_TO_PIXEL(or1 * PIXEL_R(tmp) >> 16,               \
+                                og1 * PIXEL_G(tmp) >> 16,               \
+                                ob1 * PIXEL_B(tmp) >> 16,               \
+                                a),                                     \
+                  or1 * PIXEL_R(tmp) >> 16,                             \
+                  og1 * PIXEL_G(tmp) >> 16,                             \
+                  ob1 * PIXEL_B(tmp) >> 16,                             \
+                  a);                                                   \
+        STORE_Z(pz[_a], zz);                                            \
+      }                                                                 \
+    }                                                                   \
+    z+=dzdx;                                                            \
+    og1+=dgdx;                                                          \
+    or1+=drdx;                                                          \
+    ob1+=dbdx;                                                          \
+    oa1+=dadx;                                                          \
+    s+=dsdx;                                                            \
+    t+=dtdx;                                                            \
   }
 
 #define DRAW_LINE()                                     \
   {                                                     \
-    register ZPOINT *pz;                        \
+    register ZPOINT *pz;                                \
     register PIXEL *pp;                                 \
     register unsigned int s,t,z,zz;                     \
     register int n,dsdx,dtdx;                           \
@@ -582,11 +636,11 @@ static void FNAME(smooth_perspective) (ZBuffer *zb,
         float ss,tt;                                    \
         ss=(sz * zinv);                                 \
         tt=(tz * zinv);                                 \
-        s=(unsigned int) ss;                                     \
-        t=(unsigned int) tt;                                     \
+        s=(unsigned int) ss;                            \
+        t=(unsigned int) tt;                            \
         dsdx= (int)( (dszdx - ss*fdzdx)*zinv );         \
         dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );         \
-        CALC_MIPMAP_LEVEL; \
+        CALC_MIPMAP_LEVEL;                              \
         fz+=fndzdx;                                     \
         zinv=1.0f / fz;                                 \
       }                                                 \
@@ -608,11 +662,11 @@ static void FNAME(smooth_perspective) (ZBuffer *zb,
       float ss,tt;                                      \
       ss=(sz * zinv);                                   \
       tt=(tz * zinv);                                   \
-      s=(unsigned int) ss;                                       \
-      t=(unsigned int) tt;                                       \
+      s=(unsigned int) ss;                              \
+      t=(unsigned int) tt;                              \
       dsdx= (int)( (dszdx - ss*fdzdx)*zinv );           \
       dtdx= (int)( (dtzdx - tt*fdzdx)*zinv );           \
-      CALC_MIPMAP_LEVEL; \
+      CALC_MIPMAP_LEVEL;                                \
     }                                                   \
     while (n>=0) {                                      \
       PUT_PIXEL(0);                                     \