Merge pull request #865 from kiwix/853-snippets-using-libzim-only

Remove Fuzi, libxml2, Search snippets are using libzim only
This commit is contained in:
Kelson 2024-07-15 09:01:58 +02:00 committed by GitHub
commit 876e400ab7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 16 additions and 170 deletions

View File

@ -63,7 +63,7 @@ enum Brand {
// for custom apps, where we do not show this in settings, it should be disabled by default
return .disabled
}
return .firstSentence
return .matches
}
}

View File

@ -177,5 +177,3 @@
"enum.navigation_item.downloads" = "Pellgargadurioù";
"enum.navigation_item.settings" = "Arventennoù";
"enum.search_result_snippet_mode.disabled" = "Diweredekaet";
"enum.search_result_snippet_mode.paragraph" = "Rannbennad kentañ";
"enum.search_result_snippet_mode.sentence" = "Frazenn gentañ";

View File

@ -207,6 +207,4 @@
"enum.navigation_item.downloads" = "Deebu";
"enum.navigation_item.settings" = "Bintaɣindisi";
"enum.search_result_snippet_mode.disabled" = "Zaɣisibu";
"enum.search_result_snippet_mode.paragraph" = "Tuuli kuliga";
"enum.search_result_snippet_mode.sentence" = "Tuuli yɛtɔɣili";
"enum.search_result_snippet_mode.matches" = "Manchisi";

View File

@ -236,6 +236,4 @@
"enum.navigation_item.downloads" = "Downloads";
"enum.navigation_item.settings" = "Einstellungen";
"enum.search_result_snippet_mode.disabled" = "Deaktiviert";
"enum.search_result_snippet_mode.paragraph" = "Erster Absatz";
"enum.search_result_snippet_mode.sentence" = "Erster Satz";
"enum.search_result_snippet_mode.matches" = "Treffer";

View File

@ -276,6 +276,4 @@
"enum.navigation_item.downloads" = "Downloads";
"enum.navigation_item.settings" = "Settings";
"enum.search_result_snippet_mode.disabled" = "Disabled";
"enum.search_result_snippet_mode.paragraph" = "First Paragraph";
"enum.search_result_snippet_mode.sentence" = "First Sentence";
"enum.search_result_snippet_mode.matches" = "Matches";

View File

@ -238,6 +238,4 @@
"enum.navigation_item.downloads" = "Téléchargements";
"enum.navigation_item.settings" = "Paramètres";
"enum.search_result_snippet_mode.disabled" = "Désactivé";
"enum.search_result_snippet_mode.paragraph" = "Premier paragraphe";
"enum.search_result_snippet_mode.sentence" = "Première phrase";
"enum.search_result_snippet_mode.matches" = "Correspondances";

View File

@ -227,6 +227,4 @@
"enum.navigation_item.downloads" = "Saukewa";
"enum.navigation_item.settings" = "Saituttuka";
"enum.search_result_snippet_mode.disabled" = "An kashe";
"enum.search_result_snippet_mode.paragraph" = "Sakin Farko";
"enum.search_result_snippet_mode.sentence" = "Hukuncin Farko";
"enum.search_result_snippet_mode.matches" = "Matches";

View File

@ -228,6 +228,4 @@
"enum.navigation_item.downloads" = "הורדות";
"enum.navigation_item.settings" = "הגדרות";
"enum.search_result_snippet_mode.disabled" = "כבוי";
"enum.search_result_snippet_mode.paragraph" = "פסקה ראשונה";
"enum.search_result_snippet_mode.sentence" = "משפט ראשון";
"enum.search_result_snippet_mode.matches" = "התאמות";

View File

@ -227,6 +227,4 @@
"enum.navigation_item.downloads" = "Discargamentos";
"enum.navigation_item.settings" = "Parametros";
"enum.search_result_snippet_mode.disabled" = "Disactivate";
"enum.search_result_snippet_mode.paragraph" = "Prime paragrapho";
"enum.search_result_snippet_mode.sentence" = "Prime phrase";
"enum.search_result_snippet_mode.matches" = "Correspondentias";

View File

@ -237,6 +237,4 @@
"enum.navigation_item.downloads" = "Nbudata";
"enum.navigation_item.settings" = "Ntọala";
"enum.search_result_snippet_mode.disabled" = "Agbanyụrụ";
"enum.search_result_snippet_mode.paragraph" = "Nkeji mbụ";
"enum.search_result_snippet_mode.sentence" = "Ahịrịokwu mbụ";
"enum.search_result_snippet_mode.matches" = "Ndị ndaba";

View File

@ -64,6 +64,4 @@
"enum.navigation_item.downloads" = "Downloadu";
"enum.navigation_item.settings" = "Eñoche";
"enum.search_result_snippet_mode.disabled" = "Nyo ló";
"enum.search_result_snippet_mode.paragraph" = "Óla ubia nei";
"enum.search_result_snippet_mode.sentence" = "Óla ejio dudu";
"enum.search_result_snippet_mode.matches" = "A chana";

View File

@ -168,6 +168,4 @@
"enum.navigation_item.new" = "Nei";
"enum.navigation_item.settings" = "Astellungen";
"enum.search_result_snippet_mode.disabled" = "Desaktivéiert";
"enum.search_result_snippet_mode.paragraph" = "Éischten Abschnitt";
"enum.search_result_snippet_mode.sentence" = "Éischte Saz";
"enum.search_result_snippet_mode.matches" = "Treffer";

View File

@ -228,6 +228,4 @@
"enum.navigation_item.downloads" = "Преземања";
"enum.navigation_item.settings" = "Нагодувања";
"enum.search_result_snippet_mode.disabled" = "Оневозможено";
"enum.search_result_snippet_mode.paragraph" = "Прв пасус";
"enum.search_result_snippet_mode.sentence" = "Прва реченица";
"enum.search_result_snippet_mode.matches" = "Совпаѓања";

View File

@ -167,5 +167,3 @@
"enum.navigation_item.downloads" = "Pobierz";
"enum.navigation_item.settings" = "Ustawienia";
"enum.search_result_snippet_mode.disabled" = "Wyłączony";
"enum.search_result_snippet_mode.paragraph" = "Pierwszy paragraf";
"enum.search_result_snippet_mode.sentence" = "Pierwsze zdanie";

View File

@ -224,7 +224,5 @@
"enum.navigation_item.new" = "Side navigation menu item title";
"enum.navigation_item.downloads" = "Side navigation menu item title";
"enum.navigation_item.settings" = "Side navigation menu item title";
"enum.search_result_snippet_mode.disabled" = "Picker option title in settings. Search snippet: Disabled | First Paragraph | First Sentence | Matches";
"enum.search_result_snippet_mode.paragraph" = "Picker option title in settings. Search snippet: Disabled | First Paragraph | First Sentence | Matches";
"enum.search_result_snippet_mode.sentence" = "Picker option title in settings. Search snippet: Disabled | First Paragraph | First Sentence | Matches";
"enum.search_result_snippet_mode.matches" = "Picker option title in settings. Search snippet: Disabled | First Paragraph | First Sentence | Matches";
"enum.search_result_snippet_mode.disabled" = "Picker option title in settings. Search snippet: Disabled | Matches";
"enum.search_result_snippet_mode.matches" = "Picker option title in settings. Search snippet: Disabled | Matches";

View File

@ -233,6 +233,4 @@
"enum.navigation_item.downloads" = "Скачать";
"enum.navigation_item.settings" = "Настройки";
"enum.search_result_snippet_mode.disabled" = "Выключен";
"enum.search_result_snippet_mode.paragraph" = "Формат абзаца";
"enum.search_result_snippet_mode.sentence" = "Первое предложение";
"enum.search_result_snippet_mode.matches" = "Совпадения";

View File

@ -131,5 +131,3 @@
"enum.navigation_item.downloads" = "ڈاؤن لوڈ";
"enum.navigation_item.settings" = "ترتیباں";
"enum.search_result_snippet_mode.disabled" = "غیر فعال تھی ڳیا";
"enum.search_result_snippet_mode.paragraph" = "پہلا پیرا";
"enum.search_result_snippet_mode.sentence" = "پہلا جملہ";

View File

@ -225,6 +225,4 @@
"enum.navigation_item.downloads" = "Prenosi";
"enum.navigation_item.settings" = "Nastavitve";
"enum.search_result_snippet_mode.disabled" = "Onemogočeno";
"enum.search_result_snippet_mode.paragraph" = "Prvi odstavek";
"enum.search_result_snippet_mode.sentence" = "Prvi stavek";
"enum.search_result_snippet_mode.matches" = "Ujemanja";

View File

@ -231,6 +231,4 @@
"enum.navigation_item.downloads" = "Nedladdningar";
"enum.navigation_item.settings" = "Inställningar";
"enum.search_result_snippet_mode.disabled" = "Inaktiverad";
"enum.search_result_snippet_mode.paragraph" = "Första stycket";
"enum.search_result_snippet_mode.sentence" = "Första meningen";
"enum.search_result_snippet_mode.matches" = "Matchningar";

View File

@ -233,6 +233,4 @@
"enum.navigation_item.downloads" = "İndirilenler";
"enum.navigation_item.settings" = "Ayarlar";
"enum.search_result_snippet_mode.disabled" = "Devre dışı";
"enum.search_result_snippet_mode.paragraph" = "İlk Paragraf";
"enum.search_result_snippet_mode.sentence" = "İlk Cümle";
"enum.search_result_snippet_mode.matches" = "Eşleşmeler";

View File

@ -230,6 +230,4 @@
"enum.navigation_item.downloads" = "下载";
"enum.navigation_item.settings" = "设置";
"enum.search_result_snippet_mode.disabled" = "已停用";
"enum.search_result_snippet_mode.paragraph" = "第一段";
"enum.search_result_snippet_mode.sentence" = "第一句";
"enum.search_result_snippet_mode.matches" = "相符";

View File

@ -229,6 +229,4 @@
"enum.navigation_item.downloads" = "下載";
"enum.navigation_item.settings" = "設定";
"enum.search_result_snippet_mode.disabled" = "已停用";
"enum.search_result_snippet_mode.paragraph" = "第一段";
"enum.search_result_snippet_mode.sentence" = "第一句";
"enum.search_result_snippet_mode.matches" = "相符";

View File

@ -271,8 +271,13 @@ enum NavigationItem: Hashable, Identifiable {
}
}
/// Note: The cases were reduced from:
/// `case disabled, firstParagraph, firstSentence, matches`
/// which (due to enum values) accurately migrating our users, as of our intents
/// DO NOT change the order of the cases, as that might cause migration problems from version 3.4.0
/// see: https://github.com/kiwix/kiwix-apple/issues/853
enum SearchResultSnippetMode: String, CaseIterable, Identifiable, Defaults.Serializable {
case disabled, firstParagraph, firstSentence, matches
case disabled, matches
var id: String { rawValue }
@ -280,10 +285,6 @@ enum SearchResultSnippetMode: String, CaseIterable, Identifiable, Defaults.Seria
switch self {
case .disabled:
return "enum.search_result_snippet_mode.disabled".localized
case .firstParagraph:
return "enum.search_result_snippet_mode.paragraph".localized
case .firstSentence:
return "enum.search_result_snippet_mode.sentence".localized
case .matches:
return "enum.search_result_snippet_mode.matches".localized
}

View File

@ -1,108 +0,0 @@
// This file is part of Kiwix for iOS & macOS.
//
// Kiwix is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// any later version.
//
// Kiwix is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Kiwix; If not, see https://www.gnu.org/licenses/.
import CoreLocation
import NaturalLanguage
#if os(macOS)
import AppKit
#elseif os(iOS)
import UIKit
#endif
import Fuzi
class HTMLParser {
private let document: HTMLDocument
#if os(macOS)
static private let boldFont = NSFont.systemFont(ofSize: 12.0, weight: .medium)
#elseif os(iOS)
static private let boldFont = UIFont.systemFont(ofSize: 12.0, weight: .medium)
#endif
init(document: HTMLDocument) {
self.document = document
}
convenience init(html: String) throws {
self.init(document: try HTMLDocument(string: html))
}
convenience init(data: Data) throws {
self.init(document: try HTMLDocument(data: data))
}
convenience init(url: URL) throws {
guard let content = ZimFileService.shared.getURLContent(url: url) else { throw NSError() }
try self.init(data: content.data)
}
var title: String? { document.title }
func getFirstParagraph() -> NSAttributedString? {
guard let firstParagraph = document.firstChild(xpath: "//p") else { return nil }
let snippet = NSMutableAttributedString()
for child in firstParagraph.childNodes(ofTypes: [.Text, .Element]) {
if let element = child as? Fuzi.XMLElement, element.attributes["class"]?.contains("mw-ref") == true {
continue
} else if let element = child as? Fuzi.XMLElement {
let attributedSting = NSAttributedString(
string: element.stringValue.replacingOccurrences(of: "\n", with: ""),
attributes: element.tag == "b" ? [.font: HTMLParser.boldFont] : nil
)
snippet.append(attributedSting)
} else {
let text = child.stringValue.replacingOccurrences(of: "\n", with: "")
snippet.append(NSAttributedString(string: text))
}
}
return snippet.length > 0 ? snippet : nil
}
func getFirstSentence(languageCode: String?) -> NSAttributedString? {
guard let firstParagraph = self.getFirstParagraph() else { return nil }
let text = firstParagraph.string
var firstSentence: NSAttributedString?
let tokenizer = NLTokenizer(unit: .sentence)
if let languageCode = languageCode {tokenizer.setLanguage(NLLanguage(languageCode))}
tokenizer.string = text
tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
firstSentence = firstParagraph.attributedSubstring(from: NSRange(range, in: firstParagraph.string))
return false
}
return firstSentence
}
func getFirstImagePath() -> String? {
guard let firstImage = document.firstChild(xpath: "//img") else { return nil }
return firstImage.attributes["src"]
}
// func getGeoCoordinate() -> CLLocationCoordinate2D? {
// do {
// let elements = try document.select("head > meta[name='geo.position']")
// let content = try elements.first()?.attr("content")
// guard let parts = content?.split(separator: ";"), parts.count == 2,
// let lat = Double(parts[0]), let lon = Double(parts[1]) else { return nil }
// return CLLocationCoordinate2D(latitude: lat, longitude: lon)
// } catch { return nil }
// }
class func parseBodyFragment(_ bodyFragment: String) -> NSAttributedString? {
let html = "<!DOCTYPE html><html><head></head><body><p>\(bodyFragment)</p></body></html>"
return (try? HTMLParser(html: html))?.getFirstParagraph()
}
}

View File

@ -34,15 +34,14 @@ extension SearchOperation {
guard !self.isCancelled else { return }
switch snippetMode {
case .firstParagraph:
guard let parser = try? HTMLParser(url: result.url) else { return }
result.snippet = parser.getFirstParagraph()
case .firstSentence:
guard let parser = try? HTMLParser(url: result.url) else { return }
result.snippet = parser.getFirstSentence(languageCode: nil)
case .matches:
guard let html = result.htmlSnippet else { return }
result.snippet = HTMLParser.parseBodyFragment(html)
guard let html = result.htmlSnippet,
let data = html.data(using: .utf8) else { return }
result.snippet = try? NSAttributedString(
data: data,
options: [.documentType: NSAttributedString.DocumentType.html],
documentAttributes: nil
)
case .disabled:
break
}

View File

@ -45,9 +45,6 @@ settings:
VALIDATE_PRODUCT: YES
packages:
Fuzi:
url: https://github.com/cezheng/Fuzi
majorVersion: 3.0.0
Defaults:
url: https://github.com/sindresorhus/Defaults
majorVersion: 6.0.0
@ -75,7 +72,6 @@ targetTemplates:
- sdk: WebKit.framework
- sdk: NotificationCenter.framework
- sdk: QuickLook.framework
- package: Fuzi
- package: Defaults
- package: OrderedCollections
sources: