From 86a898b40a6f693a2b9c60aaa483e9f9a7a7d8c5 Mon Sep 17 00:00:00 2001
From: rdb <git@rdb.name>
Date: Thu, 31 Dec 2015 12:48:02 +0100
Subject: [PATCH] Tack on combining diacritics for static fonts if font doesn't
 specify modified letters

---
 dtool/src/dtoolutil/unicodeLatinMap.cxx  | 80 ++++++++++++++++++++++++
 dtool/src/dtoolutil/unicodeLatinMap.h    |  2 +
 panda/src/text/textAssembler.cxx         | 34 ++++++++--
 panda/src/text/textAssembler.h           |  2 +-
 pandatool/src/egg-mkfont/eggMakeFont.cxx | 42 ++++++++++++-
 5 files changed, 151 insertions(+), 9 deletions(-)

diff --git a/dtool/src/dtoolutil/unicodeLatinMap.cxx b/dtool/src/dtoolutil/unicodeLatinMap.cxx
index f68fd13f3c..081f6f9693 100644
--- a/dtool/src/dtoolutil/unicodeLatinMap.cxx
+++ b/dtool/src/dtoolutil/unicodeLatinMap.cxx
@@ -1306,6 +1306,74 @@ static const UnicodeLatinMap::Entry latin_map[] = {
 static const size_t latin_map_length = sizeof(latin_map) / sizeof(UnicodeLatinMap::Entry);
 #endif
 
+static const wchar_t combining_accent_map[] = {
+  0x0000, // none
+  0x0301, // acute
+  0x0000, // acute_and_dot_above
+  0x0306, // breve
+  0x0000, // breve_and_acute
+  0x0000, // breve_and_dot_below
+  0x0000, // breve_and_grave
+  0x0000, // breve_and_hook_above
+  0x0000, // breve_and_tilde
+  0x032e, // breve_below
+  0x030c, // caron
+  0x0000, // caron_and_dot_above
+  0x0327, // cedilla
+  0x0000, // cedilla_and_acute
+  0x0000, // cedilla_and_breve
+  0x0302, // circumflex
+  0x0000, // circumflex_and_acute
+  0x0000, // circumflex_and_dot_below
+  0x0000, // circumflex_and_grave
+  0x0000, // circumflex_and_hook_above
+  0x0000, // circumflex_and_tilde
+  0x032d, // circumflex_below
+  0x0326, // comma_below
+  0x0000, // curl
+  0x0308, // diaeresis
+  0x0000, // diaeresis_and_acute
+  0x0000, // diaeresis_and_caron
+  0x0000, // diaeresis_and_grave
+  0x0000, // diaeresis_and_macron
+  0x0324, // diaeresis_below
+  0x0307, // dot_above
+  0x0000, // dot_above_and_macron
+  0x0323, // dot_below
+  0x0000, // dot_below_and_dot_above
+  0x0000, // dot_below_and_macron
+  0x030b, // double_acute
+  0x030f, // double_grave
+  0x0300, // grave
+  0x0328, // hook
+  0x0309, // hook_above
+  0x031b, // horn
+  0x0000, // horn_and_acute
+  0x0000, // horn_and_dot_below
+  0x0000, // horn_and_grave
+  0x0000, // horn_and_hook_above
+  0x0000, // horn_and_tilde
+  0x0311, // inverted_breve
+  0x0000, // line_below
+  0x0304, // macron
+  0x0000, // macron_and_acute
+  0x0000, // macron_and_diaeresis
+  0x0000, // macron_and_grave
+  0x0328, // ogonek
+  0x0000, // ogonek_and_macron
+  0x030a, // ring_above
+  0x0000, // ring_above_and_acute
+  0x0325, // ring_below
+  0x0000, // stroke
+  0x0000, // stroke_and_acute
+  0x0000, // stroke_and_hook
+  0x0303, // tilde
+  0x0000, // tilde_and_acute
+  0x0000, // tilde_and_diaeresis
+  0x0000, // tilde_and_macron
+  0x0330, // tilde_below
+  0x0000, // topbar
+};
 
 ////////////////////////////////////////////////////////////////////
 //     Function: UnicodeLatinMap::look_up
@@ -1332,6 +1400,18 @@ look_up(wchar_t character) {
   }
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: UnicodeLatinMap::get_combining_accent
+//       Access: Public, Static
+//  Description: Returns the unicode code point for the combining
+//               character corresponding with the given accent type,
+//               or 0 if none is recorded.
+////////////////////////////////////////////////////////////////////
+wchar_t UnicodeLatinMap::
+get_combining_accent(AccentType accent) {
+  return combining_accent_map[(size_t)accent];
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: UnicodeLatinMap::init
 //       Access: Private, Static
diff --git a/dtool/src/dtoolutil/unicodeLatinMap.h b/dtool/src/dtoolutil/unicodeLatinMap.h
index a221cea15d..90883221d5 100644
--- a/dtool/src/dtoolutil/unicodeLatinMap.h
+++ b/dtool/src/dtoolutil/unicodeLatinMap.h
@@ -129,6 +129,8 @@ public:
 
   static const Entry *look_up(wchar_t character);
 
+  static wchar_t get_combining_accent(AccentType accent);
+
 private:
   static void init();
   static bool _initialized;
diff --git a/panda/src/text/textAssembler.cxx b/panda/src/text/textAssembler.cxx
index bb81f91d6d..028df39787 100644
--- a/panda/src/text/textAssembler.cxx
+++ b/panda/src/text/textAssembler.cxx
@@ -1823,7 +1823,19 @@ get_character_glyphs(int character, const TextProperties *properties,
   if (!got_glyph && map_entry != NULL && map_entry->_ascii_equiv != 0) {
     // If we couldn't find the Unicode glyph, try the ASCII
     // equivalent (without the accent marks).
-    got_glyph = font->get_glyph(map_entry->_ascii_equiv, glyph);
+    if (map_entry->_ascii_equiv == 'i') {
+      // Special case for the i: we want to try the dotless variant first.
+      got_glyph = font->get_glyph(0x0131, glyph) ||
+                  font->get_glyph('i', glyph);
+
+    } else if (map_entry->_ascii_equiv == 'j') {
+      // And the dotless j as well.
+      got_glyph = font->get_glyph(0x0237, glyph) ||
+                  font->get_glyph('j', glyph);
+
+    } else {
+      got_glyph = font->get_glyph(map_entry->_ascii_equiv, glyph);
+    }
     
     if (!got_glyph && map_entry->_toupper_character != character) {
       // If we still couldn't find it, try the uppercase
@@ -1845,7 +1857,7 @@ get_character_glyphs(int character, const TextProperties *properties,
         got_second_glyph = 
           font->get_glyph(map_entry->_ascii_additional, second_glyph);
       }
-      
+
       if ((additional_flags & UnicodeLatinMap::AF_ligature) != 0 &&
           got_second_glyph) {
         // If we have two letters that are supposed to be in a
@@ -1875,6 +1887,16 @@ tack_on_accent(UnicodeLatinMap::AccentType accent_type,
                const LPoint3 &centroid,
                const TextProperties *properties, 
                TextAssembler::GlyphPlacement &placement) const {
+
+  // Look for a combining accent mark character.
+  wchar_t combine_char = UnicodeLatinMap::get_combining_accent(accent_type);
+  if (combine_char != 0 &&
+      tack_on_accent(combine_char, CP_above, CT_none, min_vert, max_vert,
+                     centroid, properties, placement)) {
+    return;
+  }
+
+
   switch (accent_type) {
   case UnicodeLatinMap::AT_grave:
     // We use the slash as the grave and acute accents.  ASCII does
@@ -1978,8 +2000,10 @@ tack_on_accent(UnicodeLatinMap::AccentType accent_type,
     break;
 
   case UnicodeLatinMap::AT_cedilla:
-    tack_on_accent('c', CP_bottom, CT_tiny_mirror_x, min_vert, max_vert, centroid,
-                   properties, placement);
+   tack_on_accent(0xb8, CP_below, CT_none, min_vert, max_vert, centroid,
+                   properties, placement) ||
+      tack_on_accent('c', CP_bottom, CT_tiny_mirror_x, min_vert, max_vert, centroid,
+                     properties, placement);
     //tack_on_accent(',', CP_bottom, CT_none, min_vert, max_vert, centroid,
     //               properties, placement);
     break;
@@ -2014,7 +2038,7 @@ tack_on_accent(UnicodeLatinMap::AccentType accent_type,
 //               font.
 ////////////////////////////////////////////////////////////////////
 bool TextAssembler::
-tack_on_accent(char accent_mark, TextAssembler::CheesyPosition position,
+tack_on_accent(wchar_t accent_mark, TextAssembler::CheesyPosition position,
                TextAssembler::CheesyTransform transform,
                const LPoint3 &min_vert, const LPoint3 &max_vert,
                const LPoint3 &centroid,
diff --git a/panda/src/text/textAssembler.h b/panda/src/text/textAssembler.h
index 6c6ec4f92d..d86f7b4d51 100644
--- a/panda/src/text/textAssembler.h
+++ b/panda/src/text/textAssembler.h
@@ -315,7 +315,7 @@ private:
                  const LPoint3 &centroid,
                  const TextProperties *properties, GlyphPlacement &placement) const;
   bool 
-  tack_on_accent(char accent_mark, CheesyPosition position,
+  tack_on_accent(wchar_t accent_mark, CheesyPosition position,
                  CheesyTransform transform,
                  const LPoint3 &min_vert, const LPoint3 &max_vert,
                  const LPoint3 &centroid,
diff --git a/pandatool/src/egg-mkfont/eggMakeFont.cxx b/pandatool/src/egg-mkfont/eggMakeFont.cxx
index 68403c5e62..5c35157b46 100644
--- a/pandatool/src/egg-mkfont/eggMakeFont.cxx
+++ b/pandatool/src/egg-mkfont/eggMakeFont.cxx
@@ -95,7 +95,8 @@ EggMakeFont() : EggWriter(true, false) {
      "by commas and hyphens to indicate ranges, e.g. '32-126,0xfa0-0xfff'.  "
      "It also may specify ranges of ASCII characters by enclosing them "
      "within square brackets, e.g. '[A-Za-z0-9]'.  If this is not specified, "
-     "the default is the set of ASCII characters.",
+     "the default set has all ASCII characters and an assorted set of "
+     "latin-1 characters, diacritics and punctuation marks.",
      &EggMakeFont::dispatch_range, NULL, &_range);
 
   add_option
@@ -327,9 +328,44 @@ run() {
   }
 
   if (_range.is_empty()) {
-    // If there's no specified range, the default is the entire ASCII
-    // set.
+    // If there's no specified range, the default is the entire ASCII set.
     _range.add_range(0x20, 0x7e);
+
+    _range.add_singleton(0xa1); // Upside down exclamation mark
+    _range.add_singleton(0xa9); // Copyright sign
+    _range.add_singleton(0xab); // Left double angle quote
+    //_range.add_singleton(0xae); // Registered sign
+    _range.add_singleton(0xb0); // Degree symbol
+    _range.add_singleton(0xb5); // Mu/micro
+    _range.add_singleton(0xb8); // Cedilla
+    _range.add_singleton(0xbb); // Right double angle quote
+    _range.add_singleton(0xbf); // Upside down question mark
+
+    _range.add_singleton(0xc6); // AE ligature
+    _range.add_singleton(0xc7); // C cedilla
+    //_range.add_singleton(0xd0); // Upper-case Eth
+    //_range.add_singleton(0xd8); // Upper-case O with line
+    //_range.add_singleton(0xde); // Upper-case Thorn
+    _range.add_singleton(0xdf); // German Eszet
+    _range.add_singleton(0xe6); // ae ligature
+    _range.add_singleton(0xe7); // c cedilla
+    _range.add_singleton(0xf0); // Lower-case Eth
+    _range.add_singleton(0xf8); // Lower-case O with line
+    _range.add_singleton(0xfe); // Lower-case Thorn
+
+    //_range.add_singleton(0x03c0); // pi
+
+    // Dotless i and j, for combining purposes.
+    _range.add_singleton(0x0131);
+    _range.add_singleton(0x0237);
+
+    // And general punctuation.  These don't take up much space anyway.
+    _range.add_range(0x2018, 0x201f);
+
+    _range.add_singleton(0x2026); // Ellipses
+
+    // Also add all the combining diacritic marks.
+    _range.add_range(0x0300, 0x030f);
   }
   if (_output_glyph_pattern.empty()) {
     // Create a default texture filename pattern.