Alpha conversion added

This commit is contained in:
Balazs Perlaki-Horvath 2024-02-25 21:43:24 +01:00
parent 910fccce5e
commit 99c7eefa07
4 changed files with 274 additions and 1 deletions

View File

@ -0,0 +1,25 @@
//
// Languages.swift
// Kiwix
import Foundation
enum LanguagesConverter {
static func convert(codes: Set<String>, validCodes: Set<String>) -> Set<String> {
let invalidCodes = codes.subtracting(validCodes)
let validatedCodes = codes.intersection(validCodes)
// try to convert from iso-2 to iso-3 format:
let converted = invalidCodes.compactMap(Self.convertToAlpha3(from:))
let convertedValidatedCodes = Set<String>(converted).intersection(validCodes)
return validatedCodes.union(convertedValidatedCodes)
}
static func convertToAlpha3(from alpha2: String) -> String? {
if #available(iOS 16, macOS 13, *) {
return Locale.LanguageCode(alpha2).identifier(.alpha3)
} else {
// Fallback on earlier versions
return AlphaCodesLookUpTable.alpha2ToAlpha3[alpha2]
}
}
}

View File

@ -0,0 +1,181 @@
//
// AlphaCodesLookUpTable.swift
// Kiwix
import Foundation
// it's only needed for iOS 15 and macOS 12
// should be removed once the min target gets to iOS 16 & macOS 13
enum AlphaCodesLookUpTable {
static let alpha2ToAlpha3: [String: String] = [
"ab": "abk",
"af": "afr",
"am": "amh",
"an": "arg",
"ar": "ara",
"as": "asm",
"av": "ava",
"ay": "aym",
"az": "aze",
"ba": "bak",
"be": "bel",
"bg": "bul",
"bi": "bis",
"bm": "bam",
"bn": "ben",
"bo": "bod",
"br": "bre",
"bs": "bos",
"ca": "cat",
"ce": "che",
"ch": "cha",
"co": "cos",
"cr": "cre",
"cs": "ces",
"cu": "chu",
"cv": "chv",
"cy": "cym",
"da": "dan",
"de": "deu",
"dv": "div",
"dz": "dzo",
"ee": "ewe",
"el": "ell",
"en": "eng",
"eo": "epo",
"es": "spa",
"et": "est",
"eu": "eus",
"fa": "fas",
"ff": "ful",
"fi": "fin",
"fj": "fij",
"fo": "fao",
"fr": "fra",
"fy": "fry",
"ga": "gle",
"gd": "gla",
"gl": "glg",
"gn": "grn",
"gu": "guj",
"gv": "glv",
"ha": "hau",
"he": "heb",
"hi": "hin",
"hr": "hrv",
"ht": "hat",
"hu": "hun",
"hy": "hye",
"ia": "ina",
"id": "ind",
"ie": "ile",
"ig": "ibo",
"ik": "ipk",
"io": "ido",
"is": "isl",
"it": "ita",
"iu": "iku",
"ja": "jpn",
"jv": "jav",
"ka": "kat",
"kg": "kon",
"ki": "kik",
"kk": "kaz",
"kl": "kal",
"km": "khm",
"kn": "kan",
"ko": "kor",
"ks": "kas",
"ku": "kur",
"kv": "kom",
"kw": "cor",
"ky": "kir",
"la": "lat",
"lb": "ltz",
"lg": "lug",
"li": "lim",
"ln": "lin",
"lo": "lao",
"lt": "lit",
"lv": "lav",
"mg": "mlg",
"mi": "mri",
"mk": "mkd",
"ml": "mal",
"mn": "mon",
"mr": "mar",
"ms": "msa",
"mt": "mlt",
"my": "mya",
"na": "nau",
"nb": "nob",
"ne": "nep",
"nl": "nld",
"nn": "nno",
"no": "nor",
"nv": "nav",
"ny": "nya",
"oc": "oci",
"oj": "oji",
"om": "orm",
"or": "ori",
"os": "oss",
"pa": "pan",
"pi": "pli",
"pl": "pol",
"ps": "pus",
"pt": "por",
"qu": "que",
"rm": "roh",
"rn": "run",
"ro": "ron",
"ru": "rus",
"rw": "kin",
"sa": "san",
"sc": "srd",
"sd": "snd",
"se": "sme",
"sg": "sag",
"si": "sin",
"sk": "slk",
"sl": "slv",
"sm": "smo",
"sn": "sna",
"so": "som",
"sq": "sqi",
"sr": "srp",
"ss": "ssw",
"st": "sot",
"su": "sun",
"sv": "swe",
"sw": "swa",
"ta": "tam",
"te": "tel",
"tg": "tgk",
"th": "tha",
"ti": "tir",
"tk": "tuk",
"tn": "tsn",
"to": "ton",
"tr": "tur",
"ts": "tso",
"tt": "tat",
"tw": "twi",
"ty": "tah",
"ug": "uig",
"uk": "ukr",
"ur": "urd",
"uz": "uzb",
"ve": "ven",
"vi": "vie",
"vo": "vol",
"wa": "wln",
"wo": "wol",
"xh": "xho",
"yi": "yid",
"yo": "yor",
"za": "zha",
"zh": "zho",
"zu": "zul",
]
}

View File

@ -0,0 +1,67 @@
//
// LanguagesConverterTests.swift
// UnitTests
import XCTest
@testable import Kiwix
final class LanguagesConverterTests: XCTestCase {
func testShouldHaveEmptyResult() {
let empty: Set<String> = .init()
XCTAssertEqual(LanguagesConverter.convert(
codes: empty,
validCodes: empty
), empty)
XCTAssertEqual(LanguagesConverter.convert(
codes: empty,
validCodes: .init(["eng", "fra"])
), empty)
}
func testShouldFilterOutInvalidOnes() {
let empty: Set<String> = .init()
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["invalid 1", "invalid 2"]),
validCodes: .init(["eng", "fra"])
),
empty)
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["invalid 1", "eng", "invalid 2"]),
validCodes: .init(["eng", "fra"])
),
.init(["eng"]))
}
func testConversion() {
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "ab"), "abk")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "ha"), "hau")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "en"), "eng")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "zh"), "zho")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "zu"), "zul")
}
func testShouldConvertValidOnes() {
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["en"]),
validCodes: .init(["fra", "ita", "eng"])
),
.init(["eng"]))
}
func testShouldLeaveInAlpha3Ones() {
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["fr", "eng", "invalid"]),
validCodes: .init(["fra", "ita", "eng"])
),
.init(["eng", "fra"]))
}
func testShouldIntersectToOnlyValidCodes() {
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["fr", "eng", "it"]),
validCodes: .init(["spa", "fin", "ita"])
),
.init(["ita"]))
}
}

View File

@ -7,7 +7,7 @@
//
import XCTest
import Kiwix
@testable import Kiwix
final class OPDSParserTests: XCTestCase {
/// Test OPDSParser.parse throws error when OPDS data is invalid.