(search)(pronouns) add search for canonical pronouns

This commit is contained in:
Valentyne Stigloher 2024-12-16 22:34:29 +01:00
parent 9d15cb1672
commit 7b27ed0ba7
4 changed files with 228 additions and 79 deletions

View File

@ -0,0 +1,45 @@
<script setup lang="ts">
import type { SearchResultBlog } from '~/server/api/search.get.ts';
defineProps<{
result: SearchResultBlog;
}>();
</script>
<template>
<nuxt-link :to="result.url" class="text-dark">
<div class="h3">
<Icon v="pen-nib" />
<Spelling :text="result.title" />
</div>
<div class="d-flex">
<div class="col-2">
<img
v-if="result.hero"
:src="result.hero.src"
:class="['w-100 pe-2', result.hero.class]"
:alt="result.hero.alt"
loading="lazy"
>
</div>
<div class="col">
<Spelling :text="result.content" />
<ul class="list-inline mb-0 small">
<li class="list-inline-item small">
<Icon v="calendar" />
{{ result.date }}
</li>
<li v-for="author in result.authors" :key="author" class="list-inline-item">
<span v-if="author.startsWith('@')" class="badge bg-light text-dark border">
<Icon v="collective-logo.svg" class="invertible" />
{{ author }}
</span>
<span v-else class="badge bg-light text-dark border">
{{ author }}
</span>
</li>
</ul>
</div>
</div>
</nuxt-link>
</template>

View File

@ -0,0 +1,17 @@
<script setup lang="ts">
import type { SearchResultPronoun } from '~/server/api/search.get.ts';
defineProps<{
result: SearchResultPronoun;
}>();
</script>
<template>
<nuxt-link :to="result.url" class="text-dark">
<div class="h3">
<Icon v="tags" class="h3" />
<strong><Spelling :text="result.short" /></strong><small v-if="result.small">/<Spelling :text="result.small" /></small>
</div>
<Spelling :text="result.content" />
</nuxt-link>
</template>

View File

@ -31,40 +31,9 @@ const searchInput = useTemplateRef('searchInput');
</section>
<section>
<ul class="list-group">
<li v-for="result of results.data.value" :key="result.url" class="list-group-item">
<nuxt-link :to="result.url" class="d-flex text-dark">
<div class="col-auto pt-1 pe-3">
<Icon v="pen-nib" class="h3" />
</div>
<div class="col-2">
<img
v-if="result.hero"
:src="result.hero.src"
:class="['w-100', result.hero.class]"
:alt="result.hero.alt"
loading="lazy"
>
</div>
<div class="col ps-2">
<Spelling class="d-block h4" :text="result.title" />
<Spelling :text="result.fragment" />
<ul class="list-inline mb-0 small">
<li class="list-inline-item small">
<Icon v="calendar" />
{{ result.date }}
</li>
<li v-for="author in result.authors" :key="author" class="list-inline-item">
<span v-if="author.startsWith('@')" class="badge bg-light text-dark border">
<Icon v="collective-logo.svg" class="invertible" />
{{ author }}
</span>
<span v-else class="badge bg-light text-dark border">
{{ author }}
</span>
</li>
</ul>
</div>
</nuxt-link>
<li v-for="result of results.data.value" :key="`${result.type}-${result.id}`" class="list-group-item">
<SearchItemPronoun v-if="result.type === 'pronoun'" :result="result" />
<SearchItemBlog v-else-if="result.type === 'blog'" :result="result" />
</li>
</ul>
</section>

View File

@ -3,25 +3,23 @@ import fs from 'node:fs/promises';
import { JSDOM } from 'jsdom';
import marked from 'marked';
import MiniSearch from 'minisearch';
import type { MatchInfo } from 'minisearch';
import type { MatchInfo, SearchResult } from 'minisearch';
import type { Config } from '~/locale/config.ts';
import { getPosts, type PostMetadata } from '~/server/blog.ts';
import { loadSuml, loadSumlFromBase } from '~/server/loader.ts';
import { rootDir } from '~/server/paths.ts';
import { parsePronouns } from '~/src/buildPronoun.ts';
import { clearLinkedText } from '~/src/helpers.ts';
import parseMarkdown from '~/src/parseMarkdown.ts';
import { Translator } from '~/src/translator.ts';
import { loadTsv } from '~/src/tsv.ts';
const translations = loadSuml('translations');
const baseTranslations = loadSumlFromBase('locale/_base/translations');
const translator = new Translator(translations, baseTranslations, global.config);
interface SearchDocumentPost extends PostMetadata {
id: number;
url: string;
content: string;
}
const getTermsByField = (matches: MatchInfo): Record<string, string[]> => {
const termsByField: Record<string, string[]> = {};
for (const [term, fields] of Object.entries(matches)) {
@ -57,12 +55,109 @@ const highlightMatches = (field: string, terms: string[] | undefined, fragment:
return field.replaceAll(termsRegex, `<mark>$1</mark>`);
};
export default defineEventHandler(async (event) => {
const index = new MiniSearch<SearchDocumentPost>({
fields: ['url', 'title', 'content'],
});
abstract class SearchIndex<D, R> {
documents: D[];
index: MiniSearch<D>;
const posts: SearchDocumentPost[] = [];
protected constructor(fields: (keyof D)[]) {
this.documents = [];
this.index = new MiniSearch({
fields: fields as string[],
storeFields: ['type'],
});
}
async init(config: Config) {
this.documents = await this.getDocuments(config);
this.index.addAll(this.documents);
}
abstract getDocuments(config: Config): Promise<D[]>;
abstract transform(result: SearchResult): R;
}
interface SearchDocumentPronoun {
id: number;
type: SearchIndexPronoun['TYPE'];
url: string;
short: string;
small: string | undefined;
content: string;
}
export type SearchResultPronoun = SearchDocumentPronoun;
class SearchIndexPronoun extends SearchIndex<SearchDocumentPronoun, SearchResultPronoun> {
TYPE = 'pronoun' as const;
constructor() {
super(['url', 'short', 'small', 'content']);
}
async getDocuments(config: Config): Promise<SearchDocumentPronoun[]> {
if (!config.pronouns.enabled) {
return [];
}
const pronouns = parsePronouns(config, loadTsv(`${rootDir}/data/pronouns/pronouns.tsv`));
return Object.values(pronouns).map((pronoun, id): SearchDocumentPronoun => {
const description = Array.isArray(pronoun.description)
? pronoun.description.join()
: pronoun.description;
const history = clearLinkedText(pronoun.history.replaceAll('@', ' '), false);
const morphemes = Object.values(pronoun.morphemes)
.filter((value) => value !== null)
.flatMap((value) => value.split('&'))
.join(', ');
return {
id,
type: this.TYPE,
url: `/${encodeURIComponent(pronoun.canonicalName)}`,
short: pronoun.name(),
small: pronoun.smallForm ? pronoun.getMorpheme(pronoun.smallForm) ?? undefined : undefined,
content: `${description}: ${history} ${morphemes}`,
};
});
}
override transform(result: SearchResult): SearchResultPronoun {
const document = this.documents[result.id];
const termsByField = getTermsByField(result.match);
return {
id: document.id,
type: document.type,
url: document.url,
short: highlightMatches(document.short, termsByField.short),
small: document.small ? highlightMatches(document.small, termsByField.small) : undefined,
content: highlightMatches(document.content, termsByField.content, true),
};
}
}
interface SearchDocumentBlog extends PostMetadata {
id: number;
type: SearchIndexBlog['TYPE'];
url: string;
content: string;
}
export type SearchResultBlog = SearchDocumentBlog;
class SearchIndexBlog extends SearchIndex<SearchDocumentBlog, SearchResultBlog> {
TYPE = 'blog' as const;
constructor() {
super(['url', 'title', 'content']);
}
async getDocuments(config: Config): Promise<SearchDocumentBlog[]> {
if (!config.links.enabled || !config.links.blog) {
return [];
}
const documents: SearchDocumentBlog[] = [];
for (const post of (await getPosts())) {
const content = await fs.readFile(`${rootDir}/data/blog/${post.slug}.md`, 'utf-8');
// exclude title, date and author from searchable content
@ -71,8 +166,9 @@ export default defineEventHandler(async (event) => {
const parsed = await parseMarkdown(markdown, translator);
const text = JSDOM.fragment(parsed.content ?? '').textContent;
if (text !== null && config.links.enabled && config.links.blog) {
posts.push({
id: posts.length,
documents.push({
id: documents.length,
type: this.TYPE,
url: `/${encodeURIComponent(config.links.blogRoute)}/${post.slug}`,
title: post.title,
date: post.date,
@ -82,22 +178,44 @@ export default defineEventHandler(async (event) => {
});
}
}
index.addAll(posts);
return documents;
}
override transform(result: SearchResult): SearchResultBlog {
const document = this.documents[result.id];
const termsByField = getTermsByField(result.match);
return {
id: document.id,
type: document.type,
url: document.url,
title: highlightMatches(document.title, termsByField.title),
date: document.date,
authors: document.authors,
hero: document.hero,
content: highlightMatches(document.content, termsByField.content, true),
};
}
}
export default defineEventHandler(async (event) => {
const indices = Object.fromEntries(
[
new SearchIndexPronoun(),
new SearchIndexBlog(),
].map((index) => [index.TYPE, index]),
);
await Promise.all(Object.values(indices).map((index) => index.init(config)));
const query = getQuery(event);
const text = query.text as string;
const results = index.search(text, { prefix: true, fuzzy: 1 });
return results.map((result) => {
const post = posts[result.id];
const termsByField = getTermsByField(result.match);
return {
url: post.url,
title: highlightMatches(post.title, termsByField.title),
date: post.date,
authors: post.authors,
hero: post.hero,
fragment: highlightMatches(post.content, termsByField.content, true),
termsByField,
};
return Object.values(indices)
.flatMap((index) => {
return index.index.search(text, { prefix: true, fuzzy: 1 });
})
.toSorted((resultA, resultB) => {
return resultB.score - resultA.score;
})
.map((result) => {
return indices[result.type as keyof typeof indices].transform(result);
});
});