diff --git a/components/search/SearchItemBlog.vue b/components/search/SearchItemBlog.vue new file mode 100644 index 000000000..6c12711d5 --- /dev/null +++ b/components/search/SearchItemBlog.vue @@ -0,0 +1,45 @@ + + + diff --git a/components/search/SearchItemPronoun.vue b/components/search/SearchItemPronoun.vue new file mode 100644 index 000000000..53e95ae38 --- /dev/null +++ b/components/search/SearchItemPronoun.vue @@ -0,0 +1,17 @@ + + + diff --git a/pages/search.vue b/pages/search.vue index b3806690e..3dad62b96 100644 --- a/pages/search.vue +++ b/pages/search.vue @@ -31,40 +31,9 @@ const searchInput = useTemplateRef('searchInput');
diff --git a/server/api/search.get.ts b/server/api/search.get.ts index d5ccca0cd..54a18b1e7 100644 --- a/server/api/search.get.ts +++ b/server/api/search.get.ts @@ -3,25 +3,23 @@ import fs from 'node:fs/promises'; import { JSDOM } from 'jsdom'; import marked from 'marked'; import MiniSearch from 'minisearch'; -import type { MatchInfo } from 'minisearch'; +import type { MatchInfo, SearchResult } from 'minisearch'; +import type { Config } from '~/locale/config.ts'; import { getPosts, type PostMetadata } from '~/server/blog.ts'; import { loadSuml, loadSumlFromBase } from '~/server/loader.ts'; import { rootDir } from '~/server/paths.ts'; +import { parsePronouns } from '~/src/buildPronoun.ts'; +import { clearLinkedText } from '~/src/helpers.ts'; import parseMarkdown from '~/src/parseMarkdown.ts'; import { Translator } from '~/src/translator.ts'; +import { loadTsv } from '~/src/tsv.ts'; const translations = loadSuml('translations'); const baseTranslations = loadSumlFromBase('locale/_base/translations'); const translator = new Translator(translations, baseTranslations, global.config); -interface SearchDocumentPost extends PostMetadata { - id: number; - url: string; - content: string; -} - const getTermsByField = (matches: MatchInfo): Record => { const termsByField: Record = {}; for (const [term, fields] of Object.entries(matches)) { @@ -57,47 +55,167 @@ const highlightMatches = (field: string, terms: string[] | undefined, fragment: return field.replaceAll(termsRegex, `$1`); }; -export default defineEventHandler(async (event) => { - const index = new MiniSearch({ - fields: ['url', 'title', 'content'], - }); +abstract class SearchIndex { + documents: D[]; + index: MiniSearch; - const posts: SearchDocumentPost[] = []; - for (const post of (await getPosts())) { - const content = await fs.readFile(`${rootDir}/data/blog/${post.slug}.md`, 'utf-8'); - // exclude title, date and author from searchable content - const trimmed = content.replace(/^(.+\n+){2}/, ''); - const markdown = marked(trimmed); - const parsed = await parseMarkdown(markdown, translator); - const text = JSDOM.fragment(parsed.content ?? '').textContent; - if (text !== null && config.links.enabled && config.links.blog) { - posts.push({ - id: posts.length, - url: `/${encodeURIComponent(config.links.blogRoute)}/${post.slug}`, - title: post.title, - date: post.date, - authors: post.authors, - hero: post.hero, - content: text, - }); - } + protected constructor(fields: (keyof D)[]) { + this.documents = []; + this.index = new MiniSearch({ + fields: fields as string[], + storeFields: ['type'], + }); } - index.addAll(posts); + + async init(config: Config) { + this.documents = await this.getDocuments(config); + this.index.addAll(this.documents); + } + + abstract getDocuments(config: Config): Promise; + + abstract transform(result: SearchResult): R; +} + +interface SearchDocumentPronoun { + id: number; + type: SearchIndexPronoun['TYPE']; + url: string; + short: string; + small: string | undefined; + content: string; +} + +export type SearchResultPronoun = SearchDocumentPronoun; + +class SearchIndexPronoun extends SearchIndex { + TYPE = 'pronoun' as const; + + constructor() { + super(['url', 'short', 'small', 'content']); + } + + async getDocuments(config: Config): Promise { + if (!config.pronouns.enabled) { + return []; + } + + const pronouns = parsePronouns(config, loadTsv(`${rootDir}/data/pronouns/pronouns.tsv`)); + return Object.values(pronouns).map((pronoun, id): SearchDocumentPronoun => { + const description = Array.isArray(pronoun.description) + ? pronoun.description.join() + : pronoun.description; + const history = clearLinkedText(pronoun.history.replaceAll('@', ' '), false); + const morphemes = Object.values(pronoun.morphemes) + .filter((value) => value !== null) + .flatMap((value) => value.split('&')) + .join(', '); + + return { + id, + type: this.TYPE, + url: `/${encodeURIComponent(pronoun.canonicalName)}`, + short: pronoun.name(), + small: pronoun.smallForm ? pronoun.getMorpheme(pronoun.smallForm) ?? undefined : undefined, + content: `${description}: ${history} ${morphemes}`, + }; + }); + } + + override transform(result: SearchResult): SearchResultPronoun { + const document = this.documents[result.id]; + const termsByField = getTermsByField(result.match); + return { + id: document.id, + type: document.type, + url: document.url, + short: highlightMatches(document.short, termsByField.short), + small: document.small ? highlightMatches(document.small, termsByField.small) : undefined, + content: highlightMatches(document.content, termsByField.content, true), + }; + } +} + +interface SearchDocumentBlog extends PostMetadata { + id: number; + type: SearchIndexBlog['TYPE']; + url: string; + content: string; +} + +export type SearchResultBlog = SearchDocumentBlog; + +class SearchIndexBlog extends SearchIndex { + TYPE = 'blog' as const; + + constructor() { + super(['url', 'title', 'content']); + } + + async getDocuments(config: Config): Promise { + if (!config.links.enabled || !config.links.blog) { + return []; + } + + const documents: SearchDocumentBlog[] = []; + for (const post of (await getPosts())) { + const content = await fs.readFile(`${rootDir}/data/blog/${post.slug}.md`, 'utf-8'); + // exclude title, date and author from searchable content + const trimmed = content.replace(/^(.+\n+){2}/, ''); + const markdown = marked(trimmed); + const parsed = await parseMarkdown(markdown, translator); + const text = JSDOM.fragment(parsed.content ?? '').textContent; + if (text !== null && config.links.enabled && config.links.blog) { + documents.push({ + id: documents.length, + type: this.TYPE, + url: `/${encodeURIComponent(config.links.blogRoute)}/${post.slug}`, + title: post.title, + date: post.date, + authors: post.authors, + hero: post.hero, + content: text, + }); + } + } + return documents; + } + + override transform(result: SearchResult): SearchResultBlog { + const document = this.documents[result.id]; + const termsByField = getTermsByField(result.match); + return { + id: document.id, + type: document.type, + url: document.url, + title: highlightMatches(document.title, termsByField.title), + date: document.date, + authors: document.authors, + hero: document.hero, + content: highlightMatches(document.content, termsByField.content, true), + }; + } +} + +export default defineEventHandler(async (event) => { + const indices = Object.fromEntries( + [ + new SearchIndexPronoun(), + new SearchIndexBlog(), + ].map((index) => [index.TYPE, index]), + ); + await Promise.all(Object.values(indices).map((index) => index.init(config))); const query = getQuery(event); const text = query.text as string; - const results = index.search(text, { prefix: true, fuzzy: 1 }); - return results.map((result) => { - const post = posts[result.id]; - const termsByField = getTermsByField(result.match); - return { - url: post.url, - title: highlightMatches(post.title, termsByField.title), - date: post.date, - authors: post.authors, - hero: post.hero, - fragment: highlightMatches(post.content, termsByField.content, true), - termsByField, - }; - }); + return Object.values(indices) + .flatMap((index) => { + return index.index.search(text, { prefix: true, fuzzy: 1 }); + }) + .toSorted((resultA, resultB) => { + return resultB.score - resultA.score; + }) + .map((result) => { + return indices[result.type as keyof typeof indices].transform(result); + }); });