import fs from 'node:fs/promises'; import { JSDOM } from 'jsdom'; import marked from 'marked'; import MiniSearch from 'minisearch'; import type { MatchInfo, SearchResult, AsPlainObject, Options } from 'minisearch'; import type { Config } from '~/locale/config.ts'; import { getPosts } from '~/server/blog.ts'; import { getInclusiveEntries } from '~/server/express/inclusive.ts'; import { getNounEntries } from '~/server/express/nouns.ts'; import { getSourcesEntries } from '~/server/express/sources.ts'; import { getTermsEntries } from '~/server/express/terms.ts'; import { loadSuml, loadSumlFromBase } from '~/server/loader.ts'; import { rootDir } from '~/server/paths.ts'; import { parsePronounGroups, parsePronouns, shortForVariant } from '~/src/buildPronoun.ts'; import { buildCalendar } from '~/src/calendar/calendar.ts'; import { Day } from '~/src/calendar/helpers.ts'; import { PronounLibrary } from '~/src/classes.ts'; import forbidden from '~/src/forbidden.ts'; import { clearLinkedText, buildImageUrl } from '~/src/helpers.ts'; import { genders, gendersWithNumerus } from '~/src/nouns.ts'; import parseMarkdown from '~/src/parseMarkdown.ts'; import { normaliseQuery, type SearchDocument, validateQuery } from '~/src/search.ts'; import { Translator } from '~/src/translator.ts'; import { loadTsv } from '~/src/tsv.ts'; const translations = loadSuml('translations'); const baseTranslations = loadSumlFromBase('locale/_base/translations'); const translator = new Translator(translations, baseTranslations, global.config); interface SearchKind { kind: SearchDocument['kind']; options?: Partial>; getDocuments(config: Config): Promise; transformDocument?(transformed: SearchDocument, termsByField: Record): void; } interface LoadedSearchKind { kind: SearchKind; documents: SearchDocument[]; index: MiniSearch; } const DEFAULT_OPTIONS: Options = { fields: ['title', 'titleSmall', 'content', 'contentHidden'], storeFields: ['kind'], }; const getSearchDocumentsAndIndex = defineCachedFunction(async ( kind: SearchKind, config: Config, ): Promise<{ documents: SearchDocument[]; index: MiniSearch | AsPlainObject }> => { const documents = await kind.getDocuments(config); const index = new MiniSearch({ ...kind.options, ...DEFAULT_OPTIONS }); index.addAll(documents); return { documents, index }; }, { name: 'search', getKey: (kind) => kind.kind, maxAge: 24 * 60 * 60, }); const loadSearchDocumentsAndIndex = async (kind: SearchKind, config: Config): Promise => { const { documents, index } = await getSearchDocumentsAndIndex(kind, config); if (!(index instanceof MiniSearch)) { return { kind, documents, index: MiniSearch.loadJS(index, { ...kind.options, ...DEFAULT_OPTIONS }) }; } return { kind, documents, index }; }; const loadIndices = async ( kinds: SearchKind[], config: Config, ): Promise> => { const promises = kinds.map((kind) => loadSearchDocumentsAndIndex(kind, config)); const indices = await Promise.all(promises); return new Map(indices.map((loadedKind) => [loadedKind.kind.kind, loadedKind])); }; const searchIndices = (indices: Map, query: string): SearchResult[] => { return [...indices.values()] .flatMap(({ index }) => { return index.search(query, { prefix: true, fuzzy: 1 }); }) .toSorted((resultA, resultB) => { return resultB.score - resultA.score; }); }; const getTermsByField = (matches: MatchInfo): Record => { const termsByField: Record = {}; for (const [term, fields] of Object.entries(matches)) { if (term.length === 1) { continue; } for (const field of fields) { if (!Object.hasOwn(termsByField, field)) { termsByField[field] = []; } termsByField[field].push(term); } } return termsByField; }; const FRAGMENT_MAX_WORDCOUNT = 24; const highlightMatches = (field: string, terms: string[] | undefined, fragment: boolean = false): string => { const termsRegex = terms && terms.length > 0 ? new RegExp(`(? word.match(termsRegex)) : 0; const start = Math.max(Math.min(firstMatch - 2, words.length - FRAGMENT_MAX_WORDCOUNT), 0); const end = Math.min(start + FRAGMENT_MAX_WORDCOUNT, words.length); field = `${start > 0 ? '[…] ' : ''}${words.slice(start, end).join(' ')}${end < words.length ? ' […]' : ''}`; } if (termsRegex === undefined) { return field; } return field.replaceAll(termsRegex, `$1`); }; const transformResult = (indices: Map, result: SearchResult): SearchDocument => { const { documents, kind } = indices.get(result.kind)!; const document = documents[result.id]; const termsByField = getTermsByField(result.match); const transformed = structuredClone(document); transformed.title = highlightMatches(document.title, termsByField.title); transformed.content = highlightMatches(document.content, termsByField.content, true); delete transformed.contentHidden; if (kind.transformDocument) { kind.transformDocument(transformed, termsByField); } return transformed; }; const kinds: SearchKind[] = [ { kind: 'page', options: { searchOptions: { boost: { title: 2 }, }, }, async getDocuments(config: Config): Promise { // remember to modify ~/components/Header.vue too const documents: SearchDocument[] = []; const addDocument = ({ url, title, content }: { url: string; title: string; content?: string }) => { documents.push({ id: documents.length, kind: this.kind, url, title, content: content ?? '', }); }; addDocument({ url: '/', title: translator.translate('home.link'), content: [ translator.translate('home.intro'), translator.translate('home.why'), ...translator.get('home.about').map((text) => clearLinkedText(text, false)), ].join(' '), }); if (config.pronouns.enabled) { addDocument({ url: `/${encodeURIComponent(config.pronouns.route)}`, title: translator.translate('pronouns.prononus'), content: '', }); } if (config.nouns.enabled) { addDocument({ url: `/${encodeURIComponent(config.nouns.route)}`, title: translator.translate('nouns.headerLonger'), content: [ translator.translate('nouns.description'), ...translator.get('nouns.intro').map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.sources.enabled) { addDocument({ url: `/${encodeURIComponent(config.sources.route)}`, title: translator.translate('sources.headerLonger'), content: translator.translate('sources.subheader'), }); } if (config.faq.enabled) { addDocument({ url: `/${encodeURIComponent(config.faq.route)}`, title: translator.translate('faq.header'), content: translator.translate('faq.headerLong'), }); } if (config.links.enabled) { addDocument({ url: `/${encodeURIComponent(config.links.route)}`, title: translator.translate('links.header'), content: translator.translate('links.headerLong'), }); if (config.links.academicRoute) { addDocument({ url: `/${encodeURIComponent(config.links.academicRoute)}`, title: translator.translate('links.academic.header'), content: translator.get('links.academic.intro') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.links.translinguisticsRoute) { addDocument({ url: `/${encodeURIComponent(config.links.translinguisticsRoute)}`, title: translator.translate('links.translinguistics.headerLong'), content: translator.get('links.translinguistics.intro') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.links.mediaRoute) { addDocument({ url: `/${encodeURIComponent(config.links.mediaRoute)}`, title: translator.translate('links.media.header'), content: '', }); } if (config.links.zine?.enabled) { addDocument({ url: `/${encodeURIComponent(config.links.zine.route)}`, title: translator.translate('links.zine.headerLong'), content: translator.get('links.zine.info') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.links.blog) { addDocument({ url: `/${encodeURIComponent(config.links.blogRoute)}`, title: translator.translate('links.blog'), content: '', }); } } if (config.english.enabled) { addDocument({ url: `/${encodeURIComponent(config.english.route)}`, title: translator.translate('links.english.header'), content: [ translator.translate('english.headerLonger'), translator.translate('english.description'), ...translator.get('english.intro') .map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.terminology.enabled) { addDocument({ url: `/${encodeURIComponent(config.terminology.route)}`, title: translator.translate('terminology.headerLong'), content: translator.get('terminology.info') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.calendar?.enabled) { addDocument({ url: `/${encodeURIComponent(config.calendar.route)}`, title: translator.translate('calendar.headerLong'), content: '', }); } if (config.census.enabled) { addDocument({ url: `/${encodeURIComponent(config.census.route)}`, title: translator.translate('census.headerLong'), content: '', }); } if (config.inclusive.enabled) { addDocument({ url: `/${encodeURIComponent(config.inclusive.route)}`, title: translator.translate('inclusive.headerLong'), content: translator.get('inclusive.info') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.names.enabled) { addDocument({ url: `/${encodeURIComponent(config.names.route)}`, title: translator.translate('names.headerLong'), content: [ translator.translate('inclusive.description'), ...translator.get('inclusive.info') .map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.people.enabled) { addDocument({ url: `/${encodeURIComponent(config.people.route)}`, title: translator.translate('people.headerLonger'), content: [ translator.translate('people.description'), ...translator.get('people.info') .map((text) => clearLinkedText(text, false)), ].join(' '), }); } if (config.contact.enabled && config.contact.team.enabled) { addDocument({ url: `/${encodeURIComponent(config.contact.team.route)}`, title: translator.translate('contact.team.name'), content: [ translator.translate('contact.team.description'), translator.translate('contact.contribute.header'), translator.translate('home.mission.header'), translator.translate('home.mission.summary'), translator.translate('home.mission.freedom'), translator.translate('home.mission.respect'), translator.translate('home.mission.inclusivity'), `${translator.translate('contact.contribute.intro')}:`, ...['entries', 'translations', 'version', 'technical'].map((area) => { const header = translator.translate(`contact.contribute.${area}.header`); const description = translator.translate(`contact.contribute.${area}.description`); return `${header}: ${clearLinkedText(description, false)}`; }), ].join(' '), }); } if (config.workshops?.enabled) { addDocument({ url: `/${encodeURIComponent(config.workshops.route)}`, title: translator.translate('workshops.headerLong'), content: translator.get('workshops.content') .map((text) => clearLinkedText(text, false)) .join(' '), }); } if (config.contact.enabled) { addDocument({ url: `/${encodeURIComponent(config.contact.route)}`, title: translator.translate('contact.header'), content: [ translator.translate('contact.faq'), translator.translate('contact.technical'), translator.translate('contact.hate'), translator.translate('contact.language'), ].join(' '), }); } if (config.user.enabled) { addDocument({ url: `/${encodeURIComponent(config.user.termsRoute)}`, title: translator.translate('terms.header'), content: [ translator.has('terms.translationDisclaimer') ? translator.translate('terms.translationDisclaimer') : '', translator.translate('terms.content.consent'), translator.translate('terms.content.accounts.header'), translator.translate('terms.content.accounts.age'), translator.translate('terms.content.accounts.authentication'), translator.translate('terms.content.accounts.termination'), translator.translate('terms.content.accounts.inactivity'), translator.translate('terms.content.content.header'), translator.translate('terms.content.content.ownership'), translator.translate('terms.content.content.liability'), translator.translate('terms.content.content.violations'), forbidden.map((violation) => { translator.translate(`terms.content.content.violationsExamples.${violation}`); }).join(translator.translate('terms.content.content.violationsSeparator')), translator.translate('terms.content.content.violationsEnd'), translator.translate('terms.content.content.violationsStrict'), translator.translate('terms.content.content.responsibility'), translator.translate('terms.content.closing.header'), translator.translate('terms.content.closing.jurisdiction'), translator.translate('terms.content.closing.changes'), ].join(' '), }); addDocument({ url: `/${encodeURIComponent(config.user.privacyRoute)}`, title: translator.translate('privacy.header'), content: [ translator.has('terms.translationDisclaimer') ? translator.translate('terms.translationDisclaimer') : '', translator.translate('privacy.content.intro'), translator.translate('privacy.content.effort'), translator.translate('privacy.content.data'), translator.translate('privacy.content.editRemoval'), translator.translate('privacy.content.contact'), translator.translate('privacy.content.cookies'), translator.translate('privacy.content.plausible'), translator.translate('privacy.content.turnstile'), translator.translate('privacy.content.sentry'), config.ads?.enabled ? translator.translate('privacy.content.publift') : '', config.ads?.enabled ? translator.translate('privacy.content.gtm') : '', translator.translate('privacy.content.logsBackups'), translator.translate('privacy.content.gdpr'), ].join(' '), }); } if (config.api) { addDocument({ url: '/api', title: translator.translate('api.header'), }); } { const content = await fs.readFile(`${rootDir}/LICENSE.md`, 'utf-8'); const title = content.match(/^# (.*)\n/)?.[1]; // exclude title, date and author from searchable content const trimmed = content.replace(/^(.+\n+){2}/, ''); const markdown = marked(trimmed); const parsed = await parseMarkdown(markdown, translator); const text = JSDOM.fragment(parsed.content ?? '').textContent?.replaceAll(/\s+/g, ' '); addDocument({ url: '/license', title: title ?? '', content: text ?? '', }); } addDocument({ url: 'https://shop.pronouns.page', title: translator.translate('contact.groups.shop'), content: '', }); return documents; }, }, { kind: 'pronoun', async getDocuments(config: Config): Promise { if (!config.pronouns.enabled) { return []; } const pronouns = parsePronouns(config, loadTsv(`${rootDir}/data/pronouns/pronouns.tsv`)); const pronounGroups = parsePronounGroups(loadTsv(`${rootDir}/data/pronouns/pronounGroups.tsv`)); const pronounLibrary = new PronounLibrary(config, pronounGroups, pronouns); const documents = Object.values(pronouns).map((pronoun, id): SearchDocument => { const description = Array.isArray(pronoun.description) ? pronoun.description.join() : pronoun.description; const history = clearLinkedText(pronoun.history.replaceAll('@', ' '), false); const morphemes = Object.values(pronoun.morphemes) .filter((value) => value !== null) .flatMap((value) => value.split('&')) .join(', '); return { id, kind: this.kind, url: `/${encodeURIComponent(pronoun.canonicalName)}`, title: pronoun.name(), titleSmall: pronoun.smallForm ? pronoun.getMorpheme(pronoun.smallForm) ?? undefined : undefined, content: `${description}: ${history} ${morphemes}`, }; }); if (config.pronouns.generator.enabled) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.route)}#generator`, title: translator.translate('home.generator.header'), content: translator.translate('home.generator.description'), }); documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.route)}#multiple`, title: config.pronouns.multiple.name, content: clearLinkedText(config.pronouns.multiple.description, false), }); if (config.pronouns.null !== false) { for (const variant of config.pronouns.null.routes) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(variant)}`, title: shortForVariant('null', variant, translator), content: clearLinkedText(translator.translate('pronouns.null.description'), false), }); } } if (config.pronouns.emoji !== false) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.route)}#emoji`, title: config.pronouns.emoji.description, content: clearLinkedText(config.pronouns.emoji.history, false), }); } if (config.pronouns.mirror) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.mirror.route)}`, title: clearLinkedText(config.pronouns.mirror.name, false), content: clearLinkedText(config.pronouns.mirror.description, false), }); } if (config.pronouns.any) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.any)}`, title: translator.translate('pronouns.any.short'), content: clearLinkedText(translator.translate('pronouns.any.description'), false), }); for (const [variant, merged] of Object.entries(pronounLibrary.byKey())) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.pronouns.any)}:${encodeURIComponent(variant)}`, title: merged.short(translator), content: clearLinkedText(translator.translate('pronouns.any.description'), false), }); } } if (config.pronouns.ask) { for (const variant of config.pronouns.ask.routes) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(variant)}`, title: shortForVariant('ask', variant, translator), content: clearLinkedText(translator.translate('pronouns.ask.description'), false), }); } } } return documents; }, transformDocument(transformed: SearchDocument, termsByField: Record) { transformed.title = `${transformed.title}`; if (transformed.titleSmall) { const titleSmall = highlightMatches(transformed.titleSmall, termsByField.titleSmall); transformed.title += `/${titleSmall}`; delete transformed.titleSmall; } }, }, { kind: 'noun', async getDocuments(config: Config): Promise { if (!config.nouns.enabled) { return []; } const base = encodeURIComponent(config.nouns.route); const db = useDatabase(); const nouns = await getNounEntries(db, () => false); return nouns.map((noun, id): SearchDocument => { const firstWords = genders .filter((gender) => noun[gender]) .map((gender) => noun[gender].split('|')[0]); return { id, kind: this.kind, url: `/${base}?filter=${firstWords[0]}`, title: firstWords.join(' – '), content: gendersWithNumerus .filter((genderWithNumerus) => noun[genderWithNumerus]) .map((genderWithNumerus) => noun[genderWithNumerus].replaceAll('|', ', ')) .join(' – '), }; }); }, }, { kind: 'source', async getDocuments(config: Config): Promise { if (!config.sources.enabled) { return []; } const runtimeConfig = useRuntimeConfig(); const base = encodeURIComponent(config.sources.route); const db = useDatabase(); const sources = await getSourcesEntries(db, () => false, undefined); return sources.map((source, id): SearchDocument => { let title = ''; if (source.author) { title += `${source.author.replace('^', '')} – `; } title += source.title; if (source.extra) { title += ` (${source.extra})`; } title += `, ${source.year}`; let content = ''; let contentHidden = ''; if (source.comment) { content += `${source.comment} `; } const fragments = source.fragments .replaceAll('[[', '') .replaceAll(']]', '') .replaceAll('|', ' ') .replaceAll(/(? 0 ? buildImageUrl(runtimeConfig.public.cloudfront, images[0], 'thumb') : undefined; return { id, kind: this.kind, url: `/${base}?filter=${source.title}`, title, image: image ? { src: image } : undefined, content, contentHidden, }; }); }, }, { kind: 'link', async getDocuments(config: Config): Promise { if (!config.links.enabled) { return []; } const base = config.links.route; return config.links.links.map((link, id) => { return { id, kind: this.kind, url: link.url ?? base, title: link.headline ?? '', content: `${link.extra ?? ''} ${link.quote ?? ''} ${link.response ?? ''}`, }; }); }, }, { kind: 'faq', async getDocuments(config: Config): Promise { if (!config.faq.enabled) { return []; } const faqs = translator.get>('faq.questions'); return Object.entries(faqs).map(([key, { question, answer }], id) => { return { id, kind: this.kind, url: `/${config.faq.route}#${key}`, title: question, content: answer.map((text) => clearLinkedText(text, false)).join(' '), }; }); }, }, { kind: 'blog', async getDocuments(config: Config): Promise { if (!config.links.enabled || !config.links.blog) { return []; } const documents: SearchDocument[] = []; for (const post of (await getPosts())) { const content = await fs.readFile(`${rootDir}/data/blog/${post.slug}.md`, 'utf-8'); // exclude title, date and author from searchable content const trimmed = content.replace(/^(.+\n+){2}/, ''); const markdown = marked(trimmed); const parsed = await parseMarkdown(markdown, translator); const text = JSDOM.fragment(parsed.content ?? '').textContent; if (text !== null && config.links.enabled && config.links.blog) { documents.push({ id: documents.length, kind: this.kind, url: `/${encodeURIComponent(config.links.blogRoute)}/${post.slug}`, title: post.title, date: post.date, authors: post.authors, image: post.hero, content: text, }); } } return documents; }, }, { kind: 'term', async getDocuments(config: Config): Promise { if (!config.terminology.enabled) { return []; } const runtimeConfig = useRuntimeConfig(); const base = encodeURIComponent(config.terminology.route); const db = useDatabase(); const terms = await getTermsEntries(db, () => false); return terms.map((term, id): SearchDocument => { const title = term.term.replaceAll('|', ', '); let content = ''; if (term.original) { content += `${clearLinkedText(term.original.replaceAll('|', ';'), false)}`; } content += ` ${clearLinkedText(term.definition, false)}`; let image = undefined; const flags = JSON.parse(term.flags); if (flags.length > 0) { image = `/flags/${flags[0]}.png`; } else if (term.images) { image = buildImageUrl(runtimeConfig.public.cloudfront, term.images.split(',')[0], 'flag'); } return { id, kind: this.kind, url: `/${base}?filter=${term.key}`, title, image: image ? { src: image } : undefined, content, }; }); }, }, { kind: 'inclusive', async getDocuments(config: Config): Promise { if (!config.inclusive.enabled) { return []; } const base = encodeURIComponent(config.inclusive.route); const db = useDatabase(); const inclusiveEntries = await getInclusiveEntries(db, () => false); return inclusiveEntries.map((inclusiveEntry, id): SearchDocument => { const insteadOf = inclusiveEntry.insteadOf.split('|'); const say = inclusiveEntry.say?.split('|') ?? []; let content = `${translator.translate('inclusive.insteadOf')}: ${insteadOf.join(', ')}` + ` – ${translator.translate('inclusive.say')}: ${say.join(', ')}`; if (inclusiveEntry.clarification) { content += `; ${inclusiveEntry.clarification}`; } content += `; ${inclusiveEntry.because}`; return { id, kind: this.kind, url: `/${base}?filter=${insteadOf[0]}`, title: `${insteadOf[0]} – ${say[0]}`, content, }; }); }, }, { kind: 'calendar', async getDocuments(config: Config): Promise { if (!config.calendar?.enabled) { return []; } const base = config.calendar?.route; const runtimeConfig = useRuntimeConfig(); const calendar = buildCalendar(runtimeConfig.public.baseUrl); const year = Day.today().year; return (calendar.getYear(year)?.events ?? []) .map((event) => ({ event, firstEventDay: event.getDays(year)[0] })) .filter(({ firstEventDay }) => firstEventDay !== undefined) .map(({ event, firstEventDay }, id): SearchDocument => { let eventName = event.name.split('$')[0]; const translationKey = `calendar.events.${eventName}`; if (translator.has(translationKey)) { eventName = translator.translate(translationKey); } eventName = clearLinkedText(eventName, false); const date = translator.translate( `calendar.dates.${firstEventDay.month}`, { day: event.getRange(year) }, ); let image = undefined; let icon = undefined; if (event.display.type === 'flag') { image = { src: `/flags/${event.display.name}.png`, class: event.display.class }; } else if (event.display.type === 'icon') { icon = event.display.name; } return { id, kind: this.kind, url: `${encodeURIComponent(base)}/${firstEventDay}`, title: eventName, image, icon, content: '', date, }; }); }, }, ]; const SEARCH_LIMIT = 20; export default defineEventHandler(async (event) => { const query = getQuery(event).query as string; const normalisedQuery = normaliseQuery(query); const queryValidation = validateQuery(normalisedQuery); if (queryValidation !== undefined) { throw createError({ status: 400, statusMessage: 'Bad Request', }); } const indices = await loadIndices(kinds, global.config); return searchIndices(indices, normalisedQuery) .slice(0, SEARCH_LIMIT) .map((result) => transformResult(indices, result)); });