import fs from 'node:fs/promises'; import { JSDOM } from 'jsdom'; import marked from 'marked'; import MiniSearch from 'minisearch'; import type { MatchInfo, SearchResult, AsPlainObject, Options } from 'minisearch'; import type { RuntimeConfig } from 'nuxt/schema'; import { shortForVariant } from '#shared/buildPronoun.ts'; import { Day } from '#shared/calendar/helpers.ts'; import forbidden from '#shared/forbidden.ts'; import { clearLinkedText, buildImageUrl } from '#shared/helpers.ts'; import { resolveFirstDeclension } from '#shared/nouns.ts'; import type { Numerus } from '#shared/nouns.ts'; import parseMarkdown from '#shared/parseMarkdown.ts'; import { normaliseQuery, validateQuery } from '#shared/search.ts'; import type { SearchDocument } from '#shared/search.ts'; import type { Config } from '~~/locale/config.ts'; import localeDescriptions from '~~/locale/locales.ts'; import { getPosts } from '~~/server/blog.ts'; import { getLocale, loadCalendar, loadConfig, loadNounsData, loadPronounLibrary, loadTranslator, } from '~~/server/data.ts'; import { getInclusiveEntries } from '~~/server/inclusive.ts'; import { buildNoun, getNounEntries } from '~~/server/nouns.ts'; import { rootDir } from '~~/server/paths.ts'; import { getSourcesEntries } from '~~/server/sources.ts'; import { getUrlForLocale } from '~~/server/src/domain.ts'; import { getTermsEntries } from '~~/server/terms.ts'; interface SearchKind { kind: SearchDocument['kind']; options?: Partial>; getDocuments(config: Config, runtimeConfig: RuntimeConfig): Promise; transformDocument?(transformed: SearchDocument, termsByField: Record): void; } interface LoadedSearchKind { kind: SearchKind; documents: SearchDocument[]; index: MiniSearch; } const DEFAULT_OPTIONS: Options = { fields: ['title', 'titleSmall', 'content', 'contentHidden'], storeFields: ['kind'], }; const getSearchDocumentsAndIndex = defineCachedFunction(async ( kind: SearchKind, config: Config, runtimeConfig: RuntimeConfig, ): Promise<{ documents: SearchDocument[]; index: MiniSearch | AsPlainObject }> => { const documents = await kind.getDocuments(config, runtimeConfig); const index = new MiniSearch({ ...kind.options, ...DEFAULT_OPTIONS }); index.addAll(documents); return { documents, index }; }, { name: 'search', getKey: (kind, config) => `${config.locale}:${kind.kind}`, maxAge: 24 * 60 * 60, }); const loadSearchDocumentsAndIndex = async ( kind: SearchKind, config: Config, runtimeConfig: RuntimeConfig, ): Promise => { const { documents, index } = await getSearchDocumentsAndIndex(kind, config, runtimeConfig); if (!(index instanceof MiniSearch)) { return { kind, documents, index: MiniSearch.loadJS(index, { ...kind.options, ...DEFAULT_OPTIONS }) }; } return { kind, documents, index }; }; const loadIndices = async ( kinds: SearchKind[], config: Config, runtimeConfig: RuntimeConfig, ): Promise> => { const promises = kinds.map((kind) => loadSearchDocumentsAndIndex(kind, config, runtimeConfig)); const indices = await Promise.all(promises); return new Map(indices.map((loadedKind) => [loadedKind.kind.kind, loadedKind])); }; const searchIndices = (indices: Map, query: string): SearchResult[] => { return [...indices.values()] .flatMap(({ index }) => { return index.search(query, { prefix: true, fuzzy: 1 }); }) .toSorted((resultA, resultB) => { return resultB.score - resultA.score; }); }; const getTermsByField = (matches: MatchInfo): Record => { const termsByField: Record = {}; for (const [term, fields] of Object.entries(matches)) { if (term.length === 1) { continue; } for (const field of fields) { if (!Object.hasOwn(termsByField, field)) { termsByField[field] = []; } termsByField[field].push(term); } } return termsByField; }; const FRAGMENT_MAX_WORDCOUNT = 24; const highlightMatches = (field: string, terms: string[] | undefined, fragment: boolean = false): string => { const termsRegex = terms && terms.length > 0 ? new RegExp(`(? word.match(termsRegex)) : 0; const start = Math.max(Math.min(firstMatch - 2, words.length - FRAGMENT_MAX_WORDCOUNT), 0); const end = Math.min(start + FRAGMENT_MAX_WORDCOUNT, words.length); field = `${start > 0 ? '[…] ' : ''}${words.slice(start, end).join(' ')}${end < words.length ? ' […]' : ''}`; } if (termsRegex === undefined) { return field; } return field.replaceAll(termsRegex, `$1`); }; const transformResult = (indices: Map, result: SearchResult): SearchDocument => { const { documents, kind } = indices.get(result.kind)!; const document = documents[result.id]; const termsByField = getTermsByField(result.match); const transformed = structuredClone(document); transformed.title = highlightMatches(document.title, termsByField.title); transformed.content = highlightMatches(document.content, termsByField.content, true); delete transformed.contentHidden; if (kind.transformDocument) { kind.transformDocument(transformed, termsByField); } return transformed; }; const kinds: SearchKind[] = [ { kind: 'locale', async getDocuments(config) { return localeDescriptions .filter((localeDescription) => localeDescription.code !== config.locale) .map((localeDescription, id): SearchDocument => { const url = getUrlForLocale(localeDescription.code, useRuntimeConfig().public.domainBase); return { id, kind: this.kind, url, title: localeDescription.fullName, content: url, contentHidden: localeDescription.nameEnglish, }; }); }, }, { kind: 'page', options: { searchOptions: { boost: { title: 2 }, }, }, async getDocuments(config) { // remember to modify ~/components/Header.vue too const translator = await loadTranslator(config.locale); const documents: SearchDocument[] = []; const addDocument = ({ url, title, content }: { url: string; title: string; content?: string }) => { documents.push({ id: documents.length, kind: this.kind, url, title, content: content ?? '', }); }; addDocument({ url: '/', title: translator.translate('home.link'), content: [ translator.translate('home.intro'), translator.translate('home.why'), ...translator.get('home.about').map((text) => clearLinkedText(text, false)), ].join(' '), }); if (config.pronouns.enabled) { addDocument({ url: `/${encodeURIComponent(config.pronouns.route)}`, title: translator.translate('pronouns.prononus'), content: '', }); } if (config.nouns.enabled) { addDocument({ url: `/${encodeURIComponent(config.nouns.route)}`, title: translator.translate('nouns.headerLonger'), content: [ translator.translate('nouns.description'), ...translator.get('nouns.intro').map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.sources.enabled) { addDocument({ url: `/${encodeURIComponent(config.sources.route)}`, title: translator.translate('sources.headerLonger'), content: translator.translate('sources.subheader'), }); } if (config.faq.enabled) { addDocument({ url: `/${encodeURIComponent(config.faq.route)}`, title: translator.translate('faq.header'), content: translator.translate('faq.headerLong'), }); } if (config.links.enabled) { addDocument({ url: `/${encodeURIComponent(config.links.route)}`, title: translator.translate('links.header'), content: translator.translate('links.headerLong'), }); if (config.links.academicRoute) { addDocument({ url: `/${encodeURIComponent(config.links.academicRoute)}`, title: translator.translate('links.academic.header'), content: translator.get('links.academic.intro') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.links.translinguisticsRoute) { addDocument({ url: `/${encodeURIComponent(config.links.translinguisticsRoute)}`, title: translator.translate('links.translinguistics.headerLong'), content: translator.get('links.translinguistics.intro') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.links.mediaRoute) { addDocument({ url: `/${encodeURIComponent(config.links.mediaRoute)}`, title: translator.translate('links.media.header'), content: '', }); } if (config.links.zine?.enabled) { addDocument({ url: `/${encodeURIComponent(config.links.zine.route)}`, title: translator.translate('links.zine.headerLong'), content: translator.get('links.zine.info') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.links.blog) { addDocument({ url: `/${encodeURIComponent(config.links.blogRoute)}`, title: translator.translate('links.blog'), content: '', }); } } if (config.english.enabled) { addDocument({ url: `/${encodeURIComponent(config.english.route)}`, title: translator.translate('links.english.header'), content: [ translator.translate('english.headerLonger'), translator.translate('english.description'), ...translator.get('english.intro') .map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.terminology.enabled) { addDocument({ url: `/${encodeURIComponent(config.terminology.route)}`, title: translator.translate('terminology.headerLong'), content: translator.get('terminology.info') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.calendar?.enabled) { addDocument({ url: `/${encodeURIComponent(config.calendar.route)}`, title: translator.translate('calendar.headerLong'), content: '', }); } if (config.census.enabled) { addDocument({ url: `/${encodeURIComponent(config.census.route)}`, title: translator.translate('census.headerLong'), content: '', }); } if (config.inclusive.enabled) { addDocument({ url: `/${encodeURIComponent(config.inclusive.route)}`, title: translator.translate('inclusive.headerLong'), content: translator.get('inclusive.info') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.names.enabled) { addDocument({ url: `/${encodeURIComponent(config.names.route)}`, title: translator.translate('names.headerLong'), content: [ translator.translate('inclusive.description'), ...translator.get('inclusive.info') .map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.people.enabled) { addDocument({ url: `/${encodeURIComponent(config.people.route)}`, title: translator.translate('people.headerLonger'), content: [ translator.translate('people.description'), ...translator.get('people.info') .map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.contact.enabled && config.contact.team.enabled) { addDocument({ url: `/${encodeURIComponent(config.contact.team.route)}`, title: translator.translate('contact.team.name'), content: [ translator.translate('contact.team.description'), translator.translate('contact.contribute.header'), translator.translate('home.mission.header'), translator.translate('home.mission.summary'), translator.translate('home.mission.freedom'), translator.translate('home.mission.respect'), translator.translate('home.mission.inclusivity'), `${translator.translate('contact.contribute.intro')}:`, ...['entries', 'translations', 'version', 'technical'].map((area) => { const header = translator.translate(`contact.contribute.${area}.header`); const description = translator.translate(`contact.contribute.${area}.description`); return `${header}: ${clearLinkedText(description, false)}`; }), ].join(' '), }); } if (config.workshops?.enabled) { addDocument({ url: `/${encodeURIComponent(config.workshops.route)}`, title: translator.translate('workshops.headerLong'), content: translator.get('workshops.content') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.contact.enabled) { addDocument({ url: `/${encodeURIComponent(config.contact.route)}`, title: translator.translate('contact.header'), content: [ translator.has('contact.faq') ? translator.translate('contact.faq') : '', translator.has('contact.technical') ? translator.translate('contact.technical') : '', translator.translate('contact.language'), translator.translate('localise.long'), ].join(' '), }); } if (config.user.enabled) { addDocument({ url: `/${encodeURIComponent(config.user.termsRoute)}`, title: translator.translate('terms.header'), content: [ translator.has('terms.translationDisclaimer') ? translator.translate('terms.translationDisclaimer') : '', translator.translate('terms.content.consent'), translator.translate('terms.content.accounts.header'), translator.translate('terms.content.accounts.age'), translator.translate('terms.content.accounts.authentication'), translator.translate('terms.content.accounts.termination'), translator.translate('terms.content.accounts.inactivity'), translator.translate('terms.content.content.header'), translator.translate('terms.content.content.ownership'), translator.translate('terms.content.content.liability'), translator.translate('terms.content.content.violations'), forbidden.map((violation) => { translator.translate(`terms.content.content.violationsExamples.${violation}`); }).join(translator.translate('terms.content.content.violationsSeparator')), translator.translate('terms.content.content.violationsEnd'), translator.translate('terms.content.content.violationsStrict'), translator.translate('terms.content.content.responsibility'), translator.translate('terms.content.closing.header'), translator.translate('terms.content.closing.jurisdiction'), translator.translate('terms.content.closing.changes'), ].join(' '), }); addDocument({ url: `/${encodeURIComponent(config.user.privacyRoute)}`, title: translator.translate('privacy.header'), content: [ translator.has('terms.translationDisclaimer') ? translator.translate('terms.translationDisclaimer') : '', translator.translate('privacy.content.intro'), translator.translate('privacy.content.effort'), translator.translate('privacy.content.data'), translator.translate('privacy.content.editRemoval'), translator.translate('privacy.content.contact'), translator.translate('privacy.content.cookies'), translator.translate('privacy.content.plausible'), translator.translate('privacy.content.turnstile'), translator.translate('privacy.content.sentry'), config.ads?.enabled ? translator.translate('privacy.content.publift') : '', config.ads?.enabled ? translator.translate('privacy.content.gtm') : '', translator.translate('privacy.content.logsBackups'), translator.translate('privacy.content.gdpr'), ].join(' '), }); } if (config.api?.enabled) { addDocument({ url: `/${encodeURIComponent(config.api.route)}`, title: translator.translate('api.header'), }); } { const content = await fs.readFile(`${rootDir}/LICENSE.md`, 'utf-8'); const title = content.match(/^# (.*)\n/)?.[1]; // exclude title, date and author from searchable content const trimmed = content.replace(/^(.+\n+){2}/, ''); const markdown = marked(trimmed); const parsed = await parseMarkdown(markdown, translator); const text = JSDOM.fragment(parsed.content ?? '').textContent?.replaceAll(/\s+/g, ' '); addDocument({ url: '/license', title: title ?? '', content: text ?? '', }); } addDocument({ url: 'https://shop.pronouns.page', title: translator.translate('contact.groups.shop'), content: '', }); return documents; }, }, { kind: 'pronoun', async getDocuments(config) { if (!config.pronouns.enabled) { return []; } const [translator, pronounLibrary] = await Promise.all([loadTranslator(config.locale), loadPronounLibrary(config.locale)]); const documents = Object.values(pronounLibrary.pronouns).map((pronoun, id): SearchDocument => { const description = Array.isArray(pronoun.description) ? pronoun.description.join() : pronoun.description; const history = clearLinkedText(pronoun.history.replaceAll('@', ' '), false); const morphemes = Object.values(pronoun.morphemes) .filter((value) => value !== null) .flatMap((value) => value.split('&')) .join(', '); return { id, kind: this.kind, url: `/${encodeURIComponent(pronoun.canonicalName)}`, title: pronoun.name(), titleSmall: pronoun.smallForm ? pronoun.morphemes[pronoun.smallForm] ?? undefined : undefined, content: `${description}: ${history} ${morphemes}`, }; }); if (config.pronouns.generator.enabled) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.route)}#generator`, title: translator.translate('home.generator.header'), content: translator.translate('home.generator.description'), }); documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.route)}#multiple`, title: config.pronouns.multiple.name, content: clearLinkedText(config.pronouns.multiple.description, false), }); if (config.pronouns.null !== false) { for (const variant of config.pronouns.null.routes) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(variant)}`, title: shortForVariant('null', variant, translator), content: clearLinkedText(translator.translate('pronouns.null.description'), false), }); } } if (config.pronouns.emoji !== false) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.route)}#emoji`, title: config.pronouns.emoji.description, content: clearLinkedText(config.pronouns.emoji.history, false), }); } if (config.pronouns.mirror) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.mirror.route)}`, title: clearLinkedText(config.pronouns.mirror.name, false), content: clearLinkedText(config.pronouns.mirror.description, false), }); } if (config.pronouns.any) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.any)}`, title: translator.translate('pronouns.any.short'), content: clearLinkedText(translator.translate('pronouns.any.description'), false), }); for (const [variant, merged] of Object.entries(pronounLibrary.byKey())) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.any)}:${encodeURIComponent(variant)}`, title: merged.short(translator), content: clearLinkedText(translator.translate('pronouns.any.description'), false), }); } } if (config.pronouns.ask) { for (const variant of config.pronouns.ask.routes) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(variant)}`, title: shortForVariant('ask', variant, translator), content: clearLinkedText(translator.translate('pronouns.ask.description'), false), }); } } } return documents; }, transformDocument(transformed: SearchDocument, termsByField: Record) { transformed.title = `${transformed.title}`; if (transformed.titleSmall) { const titleSmall = highlightMatches(transformed.titleSmall, termsByField.titleSmall); transformed.title += `/${titleSmall}`; delete transformed.titleSmall; } }, }, { kind: 'noun', async getDocuments(config) { if (!config.nouns.enabled) { return []; } const nounsData = await loadNounsData(config.locale); const base = encodeURIComponent(config.nouns.route); const db = useDatabase(); const nouns = (await getNounEntries(db, () => false, config.locale)) .map((nounRaw) => buildNoun(nounRaw, config, nounsData)); return nouns.map((noun, id): SearchDocument => { const firstWords = noun.firstWords; return { id, kind: this.kind, url: `/${base}?filter=${firstWords[0]}`, title: firstWords.join(' – '), content: Object.values(noun.words) .map((wordsByNumerus) => { return Object.entries(wordsByNumerus) .flatMap(([numerus, words]) => { return words.map((word) => { return resolveFirstDeclension(word, numerus as Numerus, nounsData); }); }) .join(', '); }) .join(' – '), }; }); }, }, { kind: 'source', async getDocuments(config, runtimeConfig) { if (!config.sources.enabled) { return []; } const base = encodeURIComponent(config.sources.route); const db = useDatabase(); const sources = await getSourcesEntries(db, () => false, config.locale, undefined); return sources.map((source, id): SearchDocument => { let title = ''; if (source.author) { title += `${source.author.replace('^', '')} – `; } title += source.title; if (source.extra) { title += ` (${source.extra})`; } title += `, ${source.year}`; let content = ''; let contentHidden = ''; if (source.comment) { content += `${source.comment} `; } const fragments = source.fragments .replaceAll('[[', '') .replaceAll(']]', '') .replaceAll('|', ' ') .replaceAll(/(? 0 ? buildImageUrl(runtimeConfig.public.cloudfront, images[0], 'thumb') : undefined; return { id, kind: this.kind, url: `/${base}?filter=${source.title}`, title, image: image ? { src: image } : undefined, content, contentHidden, }; }); }, }, { kind: 'link', async getDocuments(config) { if (!config.links.enabled) { return []; } const base = config.links.route; return config.links.links.map((link, id) => { return { id, kind: this.kind, url: link.url ?? base, title: link.headline ?? '', content: `${link.extra ?? ''} ${link.quote ?? ''} ${link.response ?? ''}`, }; }); }, }, { kind: 'faq', async getDocuments(config) { if (!config.faq.enabled) { return []; } const translator = await loadTranslator(config.locale); const faqs = translator.get>('faq.questions'); return Object.entries(faqs).map(([key, { question, answer }], id) => { return { id, kind: this.kind, url: `/${config.faq.route}#${key}`, title: question, content: answer.map((text) => clearLinkedText(text, false)).join(' '), }; }); }, }, { kind: 'blog', async getDocuments(config) { if (!config.links.enabled || !config.links.blog) { return []; } const translator = await loadTranslator(config.locale); const documents: SearchDocument[] = []; for (const post of (await getPosts(config))) { // exclude title, date and author from searchable content const trimmed = post.content.replace(/^(.+\n+){2}/, ''); const markdown = marked(trimmed); const parsed = await parseMarkdown(markdown, translator); const text = JSDOM.fragment(parsed.content ?? '').textContent; if (text !== null && config.links.enabled && config.links.blog) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.links.blogRoute)}/${post.slug}`, title: post.title, date: post.date, authors: post.authors, image: post.hero, content: text, }); } } return documents; }, }, { kind: 'term', async getDocuments(config, runtimeConfig) { if (!config.terminology.enabled) { return []; } const base = encodeURIComponent(config.terminology.route); const db = useDatabase(); const terms = await getTermsEntries(db, () => false, config.locale); return terms.map((term, id): SearchDocument => { const title = term.term.replaceAll('|', ', '); let content = ''; if (term.original) { content += `${clearLinkedText(term.original.replaceAll('|', ';'), false)}`; } content += ` ${clearLinkedText(term.definition, false)}`; let image = undefined; const flags = JSON.parse(term.flags); if (flags.length > 0) { image = `/flags/${flags[0]}.png`; } else if (term.images) { image = buildImageUrl(runtimeConfig.public.cloudfront, term.images.split(',')[0], 'flag'); } return { id, kind: this.kind, url: `/${base}?filter=${term.key}`, title, image: image ? { src: image } : undefined, content, }; }); }, }, { kind: 'inclusive', async getDocuments(config) { if (!config.inclusive.enabled) { return []; } const base = encodeURIComponent(config.inclusive.route); const translator = await loadTranslator(config.locale); const db = useDatabase(); const inclusiveEntries = await getInclusiveEntries(db, () => false, config.locale); return inclusiveEntries.map((inclusiveEntry, id): SearchDocument => { const insteadOf = inclusiveEntry.insteadOf.split('|'); const say = inclusiveEntry.say?.split('|') ?? []; let content = `${translator.translate('inclusive.insteadOf')}: ${insteadOf.join(', ')}` + ` – ${translator.translate('inclusive.say')}: ${say.join(', ')}`; if (inclusiveEntry.clarification) { content += `; ${inclusiveEntry.clarification}`; } content += `; ${inclusiveEntry.because}`; return { id, kind: this.kind, url: `/${base}?filter=${insteadOf[0]}`, title: `${insteadOf[0]} – ${say[0]}`, content, }; }); }, }, { kind: 'calendar', async getDocuments(config) { if (!config.calendar?.enabled) { return []; } const base = config.calendar?.route; const [translator, calendar] = await Promise.all([loadTranslator(config.locale), loadCalendar(config.locale)]); const year = Day.today().year; return (calendar.getYear(year)?.events ?? []) .map((event) => ({ event, firstEventDay: event.getDays(year)[0] })) .filter(({ firstEventDay }) => firstEventDay !== undefined) .map(({ event, firstEventDay }, id): SearchDocument => { let eventName = event.name.split('$')[0]; const translationKey = `calendar.events.${eventName}`; if (translator.has(translationKey)) { eventName = translator.translate(translationKey); } eventName = clearLinkedText(eventName, false); const date = translator.translate( `calendar.dates.${firstEventDay.month}`, { day: event.getRange(year) }, ); let image = undefined; let icon = undefined; if (event.display.type === 'flag') { image = { src: `/flags/${event.display.name}.png`, class: event.display.class }; } else if (event.display.type === 'icon') { icon = event.display.name; } return { id, kind: this.kind, url: `${encodeURIComponent(base)}/${firstEventDay}`, title: eventName, image, icon, content: '', date, }; }); }, }, ]; const SEARCH_LIMIT = 20; export default defineEventHandler(async (event) => { const query = getQuery(event).query as string; const normalisedQuery = normaliseQuery(query); const queryValidation = validateQuery(normalisedQuery); if (queryValidation !== undefined) { throw createError({ status: 400, statusMessage: 'Bad Request', }); } const config = await loadConfig(getLocale(event)); const indices = await loadIndices(kinds, config, useRuntimeConfig(event)); return searchIndices(indices, normalisedQuery) .slice(0, SEARCH_LIMIT) .map((result) => transformResult(indices, result)); });