diff --git a/vlib/builtin/rune.v b/vlib/builtin/rune.v index 0d482b47fe..886f52eb8a 100644 --- a/vlib/builtin/rune.v +++ b/vlib/builtin/rune.v @@ -106,7 +106,13 @@ pub fn (c rune) to_lower() rune { // `to_title` convert to title mode. pub fn (c rune) to_title() rune { - return c.to_upper() + if c < 0x80 { + if c >= `a` && c <= `z` { + return c - 32 + } + return c + } + return c.map_to(.to_title) } // `map_to` rune map mode: .to_upper/.to_lower/.to_title @@ -119,19 +125,27 @@ fn (c rune) map_to(mode MapMode) rune { middle := (start + end) / 2 cur_map := unsafe { &rune_maps[middle * rune_maps_columns_in_row] } if c >= u32(unsafe { *cur_map }) && c <= u32(unsafe { *(cur_map + 1) }) { - offset := if mode == .to_upper { + offset := if mode in [.to_upper, .to_title] { unsafe { *(cur_map + 2) } } else { unsafe { *(cur_map + 3) } } if offset == rune_maps_ul { is_odd := (c - unsafe { *cur_map }) % 2 == 1 - if mode == .to_upper && is_odd { + if mode in [.to_upper, .to_title] && is_odd { return c - 1 } else if mode == .to_lower && !is_odd { return c + 1 } return c + } else if offset == rune_maps_utl { + cnt := (c - unsafe { *cur_map }) % 3 + if mode == .to_upper { + return c - cnt + } else if mode == .to_lower { + return c + 2 - cnt + } + return c + 1 - cnt } return c + offset } diff --git a/vlib/builtin/rune_map.v b/vlib/builtin/rune_map.v index 5c41c5e7cc..4f0a297c1b 100644 --- a/vlib/builtin/rune_map.v +++ b/vlib/builtin/rune_map.v @@ -3,6 +3,7 @@ module builtin enum MapMode { to_upper to_lower + to_title } // vfmt off @@ -16,7 +17,10 @@ struct RuneMap { } */ const rune_maps_columns_in_row = 4 +// upper, lower, upper, lower, ... sequence const rune_maps_ul = -3 // NOTE: this should *NOT* be used anywhere in rune_maps, as a normal offset. +// upper, title, lower, upper, title, lower, ... sequence +const rune_maps_utl = -2 // NOTE: this should *NOT* be used anywhere in rune_maps, as a normal offset. // The rune_maps table below, has rows, each containing 4 integers, equivalent to the RuneMap struct from above. // It is represented that way, instead of the more natural array of structs, to save on the .c encoding used for the initialisation. // The overhead for representing it as an array of structs was ~28KB in .c, while with the flat array of ints, it is ~7.5KB. @@ -72,21 +76,11 @@ const rune_maps = [ 0x1B8, 0x1B9, -3, -3, 0x1BC, 0x1BD, -3, -3, 0x1BF, 0x1BF, 56, 0, - 0x1C4, 0x1C4, 0, 2, - 0x1C5, 0x1C5, -1, 1, - 0x1C6, 0x1C6, -2, 0, - 0x1C7, 0x1C7, 0, 2, - 0x1C8, 0x1C8, -1, 1, - 0x1C9, 0x1C9, -2, 0, - 0x1CA, 0x1CA, 0, 2, - 0x1CB, 0x1CB, -1, 1, - 0x1CC, 0x1CC, -2, 0, + 0x1C4, 0x1CC, -2, -2, 0x1CD, 0x1DC, -3, -3, 0x1DD, 0x1DD, -79, 0, 0x1DE, 0x1EF, -3, -3, - 0x1F1, 0x1F1, 0, 2, - 0x1F2, 0x1F2, -1, 1, - 0x1F3, 0x1F3, -2, 0, + 0x1F1, 0x1F3, -2, -2, 0x1F4, 0x1F5, -3, -3, 0x1F6, 0x1F6, 0, -97, 0x1F7, 0x1F7, 0, -56, diff --git a/vlib/builtin/rune_test.v b/vlib/builtin/rune_test.v index 2a6737d9e4..9ec566c198 100644 --- a/vlib/builtin/rune_test.v +++ b/vlib/builtin/rune_test.v @@ -50,6 +50,9 @@ fn test_to_upper() { assert `Ā`.to_upper() == `Ā` assert `Я`.to_upper() == `Я` assert `я`.to_upper() == `Я` + assert `Dž`.to_upper() == `DŽ` + assert `dž`.to_upper() == `DŽ` + assert `DŽ`.to_upper() == `DŽ` } fn test_to_lower() { @@ -61,6 +64,9 @@ fn test_to_lower() { assert `ā`.to_lower() == `ā` assert `я`.to_lower() == `я` assert `Я`.to_lower() == `я` + assert `Dž`.to_lower() == `dž` + assert `DŽ`.to_lower() == `dž` + assert `dž`.to_lower() == `dž` } fn test_to_title() { @@ -72,4 +78,7 @@ fn test_to_title() { assert `Ā`.to_title() == `Ā` assert `я`.to_title() == `Я` assert `Я`.to_title() == `Я` + assert `DŽ`.to_title() == `Dž` + assert `dž`.to_title() == `Dž` + assert `Dž`.to_title() == `Dž` }