builtin: fix rune.to_title(), add tests (#22808)

This commit is contained in:
yuyi 2024-11-08 23:09:20 +08:00 committed by GitHub
parent bf83715b6c
commit 59c8f6b07b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 15 deletions

View File

@ -106,7 +106,13 @@ pub fn (c rune) to_lower() rune {
// `to_title` convert to title mode. // `to_title` convert to title mode.
pub fn (c rune) to_title() rune { pub fn (c rune) to_title() rune {
return c.to_upper() if c < 0x80 {
if c >= `a` && c <= `z` {
return c - 32
}
return c
}
return c.map_to(.to_title)
} }
// `map_to` rune map mode: .to_upper/.to_lower/.to_title // `map_to` rune map mode: .to_upper/.to_lower/.to_title
@ -119,19 +125,27 @@ fn (c rune) map_to(mode MapMode) rune {
middle := (start + end) / 2 middle := (start + end) / 2
cur_map := unsafe { &rune_maps[middle * rune_maps_columns_in_row] } cur_map := unsafe { &rune_maps[middle * rune_maps_columns_in_row] }
if c >= u32(unsafe { *cur_map }) && c <= u32(unsafe { *(cur_map + 1) }) { if c >= u32(unsafe { *cur_map }) && c <= u32(unsafe { *(cur_map + 1) }) {
offset := if mode == .to_upper { offset := if mode in [.to_upper, .to_title] {
unsafe { *(cur_map + 2) } unsafe { *(cur_map + 2) }
} else { } else {
unsafe { *(cur_map + 3) } unsafe { *(cur_map + 3) }
} }
if offset == rune_maps_ul { if offset == rune_maps_ul {
is_odd := (c - unsafe { *cur_map }) % 2 == 1 is_odd := (c - unsafe { *cur_map }) % 2 == 1
if mode == .to_upper && is_odd { if mode in [.to_upper, .to_title] && is_odd {
return c - 1 return c - 1
} else if mode == .to_lower && !is_odd { } else if mode == .to_lower && !is_odd {
return c + 1 return c + 1
} }
return c return c
} else if offset == rune_maps_utl {
cnt := (c - unsafe { *cur_map }) % 3
if mode == .to_upper {
return c - cnt
} else if mode == .to_lower {
return c + 2 - cnt
}
return c + 1 - cnt
} }
return c + offset return c + offset
} }

View File

@ -3,6 +3,7 @@ module builtin
enum MapMode { enum MapMode {
to_upper to_upper
to_lower to_lower
to_title
} }
// vfmt off // vfmt off
@ -16,7 +17,10 @@ struct RuneMap {
} }
*/ */
const rune_maps_columns_in_row = 4 const rune_maps_columns_in_row = 4
// upper, lower, upper, lower, ... sequence
const rune_maps_ul = -3 // NOTE: this should *NOT* be used anywhere in rune_maps, as a normal offset. const rune_maps_ul = -3 // NOTE: this should *NOT* be used anywhere in rune_maps, as a normal offset.
// upper, title, lower, upper, title, lower, ... sequence
const rune_maps_utl = -2 // NOTE: this should *NOT* be used anywhere in rune_maps, as a normal offset.
// The rune_maps table below, has rows, each containing 4 integers, equivalent to the RuneMap struct from above. // The rune_maps table below, has rows, each containing 4 integers, equivalent to the RuneMap struct from above.
// It is represented that way, instead of the more natural array of structs, to save on the .c encoding used for the initialisation. // It is represented that way, instead of the more natural array of structs, to save on the .c encoding used for the initialisation.
// The overhead for representing it as an array of structs was ~28KB in .c, while with the flat array of ints, it is ~7.5KB. // The overhead for representing it as an array of structs was ~28KB in .c, while with the flat array of ints, it is ~7.5KB.
@ -72,21 +76,11 @@ const rune_maps = [
0x1B8, 0x1B9, -3, -3, 0x1B8, 0x1B9, -3, -3,
0x1BC, 0x1BD, -3, -3, 0x1BC, 0x1BD, -3, -3,
0x1BF, 0x1BF, 56, 0, 0x1BF, 0x1BF, 56, 0,
0x1C4, 0x1C4, 0, 2, 0x1C4, 0x1CC, -2, -2,
0x1C5, 0x1C5, -1, 1,
0x1C6, 0x1C6, -2, 0,
0x1C7, 0x1C7, 0, 2,
0x1C8, 0x1C8, -1, 1,
0x1C9, 0x1C9, -2, 0,
0x1CA, 0x1CA, 0, 2,
0x1CB, 0x1CB, -1, 1,
0x1CC, 0x1CC, -2, 0,
0x1CD, 0x1DC, -3, -3, 0x1CD, 0x1DC, -3, -3,
0x1DD, 0x1DD, -79, 0, 0x1DD, 0x1DD, -79, 0,
0x1DE, 0x1EF, -3, -3, 0x1DE, 0x1EF, -3, -3,
0x1F1, 0x1F1, 0, 2, 0x1F1, 0x1F3, -2, -2,
0x1F2, 0x1F2, -1, 1,
0x1F3, 0x1F3, -2, 0,
0x1F4, 0x1F5, -3, -3, 0x1F4, 0x1F5, -3, -3,
0x1F6, 0x1F6, 0, -97, 0x1F6, 0x1F6, 0, -97,
0x1F7, 0x1F7, 0, -56, 0x1F7, 0x1F7, 0, -56,

View File

@ -50,6 +50,9 @@ fn test_to_upper() {
assert `Ā`.to_upper() == `Ā` assert `Ā`.to_upper() == `Ā`
assert `Я`.to_upper() == `Я` assert `Я`.to_upper() == `Я`
assert `я`.to_upper() == `Я` assert `я`.to_upper() == `Я`
assert `Dž`.to_upper() == `DŽ`
assert `dž`.to_upper() == `DŽ`
assert `DŽ`.to_upper() == `DŽ`
} }
fn test_to_lower() { fn test_to_lower() {
@ -61,6 +64,9 @@ fn test_to_lower() {
assert `ā`.to_lower() == `ā` assert `ā`.to_lower() == `ā`
assert `я`.to_lower() == `я` assert `я`.to_lower() == `я`
assert `Я`.to_lower() == `я` assert `Я`.to_lower() == `я`
assert `Dž`.to_lower() == `dž`
assert `DŽ`.to_lower() == `dž`
assert `dž`.to_lower() == `dž`
} }
fn test_to_title() { fn test_to_title() {
@ -72,4 +78,7 @@ fn test_to_title() {
assert `Ā`.to_title() == `Ā` assert `Ā`.to_title() == `Ā`
assert `я`.to_title() == `Я` assert `я`.to_title() == `Я`
assert `Я`.to_title() == `Я` assert `Я`.to_title() == `Я`
assert `DŽ`.to_title() == `Dž`
assert `dž`.to_title() == `Dž`
assert `Dž`.to_title() == `Dž`
} }