Merge pull request #672 from kiwix/667-switch-to-ISO-639-3-everywhere

Switch to ISO-639-3 language codes everywhere
This commit is contained in:
Kelson 2024-02-28 10:26:02 +01:00 committed by GitHub
commit 467ffaa4f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 338 additions and 30 deletions

View File

@ -62,7 +62,7 @@ struct Language: Identifiable, Comparable {
let count: Int
init?(code: String, count: Int) {
let langCode = NSLocale.canonicalLocaleIdentifier(from: code)
let langCode = Locale.canonicalIdentifier(from: code)
guard let name = Locale.current.localizedString(forLanguageCode: langCode) else { return nil }
self.code = code
self.name = name
@ -179,7 +179,7 @@ class ZimFile: NSManagedObject, Identifiable {
var languageCodesListed: String {
return languageCode.split(separator: ",").compactMap { code -> String? in
let langCode = NSLocale.canonicalLocaleIdentifier(from: String(code))
let langCode = Locale.canonicalIdentifier(from: String(code))
return Locale.current.localizedString(forLanguageCode: langCode)
}.joined(separator: ",")
}

View File

@ -67,7 +67,7 @@
NSArray* components = [string componentsSeparatedByString: @","];
NSMutableArray* langCodes = [NSMutableArray array];
[components enumerateObjectsUsingBlock:^(id _Nonnull codeString, NSUInteger idx, BOOL * _Nonnull stop) {
[langCodes addObject:[NSLocale canonicalLanguageIdentifierFromString: codeString]];
[langCodes addObject: codeString];
}];
return [langCodes componentsJoinedByString: @","];
}

View File

@ -0,0 +1,25 @@
//
// Languages.swift
// Kiwix
import Foundation
enum LanguagesConverter {
static func convert(codes: Set<String>, validCodes: Set<String>) -> Set<String> {
let invalidCodes = codes.subtracting(validCodes)
let validatedCodes = codes.intersection(validCodes)
// try to convert from iso-2 to iso-3 format:
let converted = invalidCodes.compactMap(Self.convertToAlpha3(from:))
let convertedValidatedCodes = Set<String>(converted).intersection(validCodes)
return validatedCodes.union(convertedValidatedCodes)
}
static func convertToAlpha3(from alpha2: String) -> String? {
if #available(iOS 16, macOS 13, *) {
return Locale.LanguageCode(alpha2).identifier(.alpha3)
} else {
// Fallback on earlier versions
return AlphaCodesLookUpTable.alpha2ToAlpha3[alpha2]
}
}
}

View File

@ -6,7 +6,13 @@
// Copyright © 2023 Chris Li. All rights reserved.
//
public extension OPDSParser {
protocol Parser {
var zimFileIDs: Set<UUID> { get }
func parse(data: Data) throws
func getMetaData(id: UUID) -> ZimFileMetaData?
}
extension OPDSParser: Parser {
var zimFileIDs: Set<UUID> {
__getZimFileIDs() as? Set<UUID> ?? Set<UUID>()
}
@ -21,3 +27,18 @@ public extension OPDSParser {
return __getZimFileMetaData(id)
}
}
/// An empty Parser we can use to delete zim entries
/// Based on the assumption we insert new ones, delete the ones not on the list
/// Therefore an empty list will delete everything, using the same method
/// @see: LibraryViewModel.process(parser: Parser)
struct DeletingParser: Parser {
let zimFileIDs: Set<UUID> = .init()
func parse(data: Data) throws {
}
func getMetaData(id: UUID) -> ZimFileMetaData? {
nil
}
}

View File

@ -0,0 +1,181 @@
//
// AlphaCodesLookUpTable.swift
// Kiwix
import Foundation
// it's only needed for iOS 15 and macOS 12
// should be removed once the min target gets to iOS 16 & macOS 13
enum AlphaCodesLookUpTable {
static let alpha2ToAlpha3: [String: String] = [
"ab": "abk",
"af": "afr",
"am": "amh",
"an": "arg",
"ar": "ara",
"as": "asm",
"av": "ava",
"ay": "aym",
"az": "aze",
"ba": "bak",
"be": "bel",
"bg": "bul",
"bi": "bis",
"bm": "bam",
"bn": "ben",
"bo": "bod",
"br": "bre",
"bs": "bos",
"ca": "cat",
"ce": "che",
"ch": "cha",
"co": "cos",
"cr": "cre",
"cs": "ces",
"cu": "chu",
"cv": "chv",
"cy": "cym",
"da": "dan",
"de": "deu",
"dv": "div",
"dz": "dzo",
"ee": "ewe",
"el": "ell",
"en": "eng",
"eo": "epo",
"es": "spa",
"et": "est",
"eu": "eus",
"fa": "fas",
"ff": "ful",
"fi": "fin",
"fj": "fij",
"fo": "fao",
"fr": "fra",
"fy": "fry",
"ga": "gle",
"gd": "gla",
"gl": "glg",
"gn": "grn",
"gu": "guj",
"gv": "glv",
"ha": "hau",
"he": "heb",
"hi": "hin",
"hr": "hrv",
"ht": "hat",
"hu": "hun",
"hy": "hye",
"ia": "ina",
"id": "ind",
"ie": "ile",
"ig": "ibo",
"ik": "ipk",
"io": "ido",
"is": "isl",
"it": "ita",
"iu": "iku",
"ja": "jpn",
"jv": "jav",
"ka": "kat",
"kg": "kon",
"ki": "kik",
"kk": "kaz",
"kl": "kal",
"km": "khm",
"kn": "kan",
"ko": "kor",
"ks": "kas",
"ku": "kur",
"kv": "kom",
"kw": "cor",
"ky": "kir",
"la": "lat",
"lb": "ltz",
"lg": "lug",
"li": "lim",
"ln": "lin",
"lo": "lao",
"lt": "lit",
"lv": "lav",
"mg": "mlg",
"mi": "mri",
"mk": "mkd",
"ml": "mal",
"mn": "mon",
"mr": "mar",
"ms": "msa",
"mt": "mlt",
"my": "mya",
"na": "nau",
"nb": "nob",
"ne": "nep",
"nl": "nld",
"nn": "nno",
"no": "nor",
"nv": "nav",
"ny": "nya",
"oc": "oci",
"oj": "oji",
"om": "orm",
"or": "ori",
"os": "oss",
"pa": "pan",
"pi": "pli",
"pl": "pol",
"ps": "pus",
"pt": "por",
"qu": "que",
"rm": "roh",
"rn": "run",
"ro": "ron",
"ru": "rus",
"rw": "kin",
"sa": "san",
"sc": "srd",
"sd": "snd",
"se": "sme",
"sg": "sag",
"si": "sin",
"sk": "slk",
"sl": "slv",
"sm": "smo",
"sn": "sna",
"so": "som",
"sq": "sqi",
"sr": "srp",
"ss": "ssw",
"st": "sot",
"su": "sun",
"sv": "swe",
"sw": "swa",
"ta": "tam",
"te": "tel",
"tg": "tgk",
"th": "tha",
"ti": "tir",
"tk": "tuk",
"tn": "tsn",
"to": "ton",
"tr": "tur",
"ts": "tso",
"tt": "tat",
"tw": "twi",
"ty": "tah",
"ug": "uig",
"uk": "ukr",
"ur": "urd",
"uz": "uzb",
"ve": "ven",
"vi": "vie",
"vo": "vol",
"wa": "wln",
"wo": "wol",
"xh": "xho",
"yi": "yid",
"yo": "yor",
"za": "zha",
"zh": "zho",
"zu": "zul"
]
}

View File

@ -32,6 +32,7 @@ extension Defaults.Keys {
"libraryLanguageSortingMode", default: LibraryLanguageSortingMode.byCounts
)
static let libraryAutoRefresh = Key<Bool>("libraryAutoRefresh", default: true)
static let libraryUsingOldISOLangCodes = Key<Bool>("libraryUsingOldISOLangCodes", default: true)
static let libraryLastRefresh = Key<Date?>("libraryLastRefresh")
static let libraryLastRefreshTime = Key<Date?>("libraryLastRefreshTime")

View File

@ -0,0 +1,69 @@
//
// LanguagesConverterTests.swift
// UnitTests
import XCTest
@testable import Kiwix
final class LanguagesConverterTests: XCTestCase {
func testShouldHaveEmptyResult() {
let empty: Set<String> = .init()
XCTAssertEqual(LanguagesConverter.convert(
codes: empty,
validCodes: empty
), empty)
XCTAssertEqual(LanguagesConverter.convert(
codes: empty,
validCodes: .init(["eng", "fra"])
), empty)
}
func testShouldFilterOutInvalidOnes() {
let empty: Set<String> = .init()
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["invalid 1", "invalid 2"]),
validCodes: .init(["eng", "fra"])
),
empty)
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["invalid 1", "eng", "invalid 2"]),
validCodes: .init(["eng", "fra"])
),
.init(["eng"]))
}
func testConversion() {
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "ab"), "abk")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "ha"), "hau")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "en"), "eng")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "zh"), "zho")
XCTAssertEqual(LanguagesConverter.convertToAlpha3(from: "zu"), "zul")
}
func testShouldConvertValidOnes() {
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["en"]),
validCodes: .init(["fra", "ita", "eng"])
),
.init(["eng"]))
}
func testShouldLeaveInAlpha3Ones() {
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["fr", "eng", "invalid"]),
validCodes: .init(["fra", "ita", "eng"])
),
.init(["eng", "fra"]))
}
func testShouldIntersectToOnlyValidCodes() {
XCTAssertEqual(LanguagesConverter.convert(
codes: .init(["fr", "eng", "it"]),
validCodes: .init(["spa", "fin", "ita"])
),
.init(["ita"]))
}
}

View File

@ -193,8 +193,8 @@ final class LibraryRefreshViewModelTest: XCTestCase {
XCTAssertEqual(zimFile.hasVideos, false)
XCTAssertEqual(zimFile.includedInSearch, true)
XCTAssertEqual(zimFile.isMissing, false)
// !important make sure the language code is put into the DB as a 2 letter string
XCTAssertEqual(zimFile.languageCode, "en")
// !important make sure the language code is put into the DB as a 3 letter string
XCTAssertEqual(zimFile.languageCode, "eng")
XCTAssertEqual(zimFile.mediaCount, 566835)
XCTAssertEqual(zimFile.name, "Best of Wikipedia")
XCTAssertEqual(zimFile.persistentID, "wikipedia_en_top")

View File

@ -7,7 +7,7 @@
//
import XCTest
import Kiwix
@testable import Kiwix
final class OPDSParserTests: XCTestCase {
/// Test OPDSParser.parse throws error when OPDS data is invalid.
@ -70,8 +70,8 @@ final class OPDSParserTests: XCTestCase {
XCTAssertEqual(metadata.groupIdentifier, "wikipedia_en_top")
XCTAssertEqual(metadata.title, "Best of Wikipedia")
XCTAssertEqual(metadata.fileDescription, "A selection of the best 50,000 Wikipedia articles")
// !important make sure the language code is put into the DB as a 2 letter string
XCTAssertEqual(metadata.languageCodes, "en")
// !important make sure the language code is put into the DB as a 3 letter string
XCTAssertEqual(metadata.languageCodes, "eng")
XCTAssertEqual(metadata.category, "wikipedia")
XCTAssertEqual(metadata.creationDate, try! Date("2023-01-07T00:00:00Z", strategy: .iso8601))
XCTAssertEqual(metadata.size, 6515656704)

View File

@ -50,6 +50,12 @@ public class LibraryViewModel: ObservableObject {
// refresh library
guard let data = try await fetchData() else { return }
let parser = try await parse(data: data)
// delete all old ISO Lang Code entries if needed, by passing in an empty parser
if Defaults[.libraryUsingOldISOLangCodes] {
try await process(parser: DeletingParser())
Defaults[.libraryUsingOldISOLangCodes] = false
}
// process the feed
try await process(parser: parser)
// update library last refresh timestamp
@ -90,34 +96,39 @@ public class LibraryViewModel: ObservableObject {
}
/// The fetched content is filtered by the languages set in settings.
/// Try to set it to the device language, making sure we have content to display.
/// Falls back to English, where most of the content is.
/// This is only affecting the "fresh-install" defaults.
/// The user can always set the prefered content languages in settings.
/// We need to make sure, whatever was set by the user is
/// still on the list of languages we now have from the feed
private func setDefaultContentFilterLanguage() async {
let languages = await Languages.fetch()
let validCodes = Set<String>(languages.map { $0.code })
// preserve only valid selections by:
// converting earlier user selections, and filtering out invalid ones
Defaults[.libraryLanguageCodes] = LanguagesConverter.convert(codes: Defaults[.libraryLanguageCodes],
validCodes: validCodes)
guard Defaults[.libraryLanguageCodes].isEmpty else {
return // it was already set earlier (either by default or the user)
return // what was earlier set by the user or picked by default is valid
}
let fallbackToEnglish = "en"
// Nothing was set earlier, or validation filtered it out to empty
// Try to set it to the device language,
// at the same time make sure, we have content to display, meaning:
// the device language is on the list of languages from the feed
// If all that fails: fallback to English, where most of the content is
let fallbackToEnglish = "eng"
let deviceLang: String?
// In both cases we store the 2 letter version in DB, that is our current
// standard, even though the feed values are 3 letter, those are also converted to 2 letter values
if #available(iOS 16, macOS 13, *) {
deviceLang = Locale.current.language.languageCode?.identifier(.alpha2)
deviceLang = Locale.current.language.languageCode?.identifier(.alpha3)
} else {
deviceLang = Locale.current.languageCode
}
// if the device language code cannot be determined, we fall back to English
let defaultLangCode: String = deviceLang ?? fallbackToEnglish
let languages = await Languages.fetch()
// make sure the language we default to is on the list of Languages comming from the feed
if languages.contains(where: { (lang: Language) in
lang.code == defaultLangCode
}) {
Defaults[.libraryLanguageCodes] = [defaultLangCode]
} else {
// convert it to a set, so we can use the same validation function
let deviceLangSet = Set<String>([deviceLang].compactMap { $0 })
let validDefaults = LanguagesConverter.convert(codes: deviceLangSet, validCodes: validCodes)
if validDefaults.isEmpty { // meaning the device language isn't valid (or nil)
Defaults[.libraryLanguageCodes] = [fallbackToEnglish]
} else {
Defaults[.libraryLanguageCodes] = validDefaults
}
}
@ -156,7 +167,7 @@ public class LibraryViewModel: ObservableObject {
}
}
private func process(parser: OPDSParser) async throws {
private func process(parser: Parser) async throws {
try await withCheckedThrowingContinuation { continuation in
context.perform {
do {
@ -181,7 +192,7 @@ public class LibraryViewModel: ObservableObject {
self.insertionCount = result.result as? Int ?? 0
}
// delete old zim files
// delete old zim entries not included in the feed
let fetchRequest: NSFetchRequest<NSFetchRequestResult> = ZimFile.fetchRequest()
fetchRequest.predicate = NSCompoundPredicate(andPredicateWithSubpredicates: [
NSPredicate(format: "fileURLBookmark == nil"),