171 lines
6.2 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { capitalise, escapePronunciationString } from '#shared/helpers.ts';
import type { MorphemeValues } from '#shared/language/morphemes.ts';
import type { NounConvention, NounDeclension, Numerus } from '#shared/nouns.ts';
import type { Config } from '~~/locale/config.ts';
export type ExamplePart = string | ExamplePartMorpheme | ExamplePartNoun;
interface ExamplePartMorpheme {
type: 'morpheme';
morpheme: string;
capitalise?: boolean;
}
interface ExamplePartNoun {
type: 'noun';
stems: Record<string, string>;
nounClass: string;
caseAbbreviation: string;
numerus?: Numerus;
}
export interface ExampleValues {
morphemeValues: MorphemeValues;
plural?: boolean;
nounConvention?: NounConvention;
nounDeclensions?: Record<string, NounDeclension>;
}
const placeholderRegex = /{([^}]+)}/g;
const morphemeRegex = /^(?<capitalise>'?)(?<morpheme>[a-z0-9_]+)$/;
const nounRegex = /^:noun (?<stems>\S+) (?<nounClass>\S+) (?<caseAbbreviation>\w+) (?<numerus>singular|plural)$/;
export class Example {
parts: ExamplePart[];
constructor(parts: ExamplePart[]) {
this.parts = parts;
}
static parse(example: string): Example {
const parts: ExamplePart[] = [];
let lastPosition = 0;
const processStaticText = (match: RegExpExecArray) => {
const textBefore = example.substring(lastPosition, match.index);
if (textBefore) {
parts.push(textBefore);
}
lastPosition = match.index + match[0].length;
};
for (const match of example.matchAll(placeholderRegex)) {
const morphemeMatch = match[1].match(morphemeRegex);
if (morphemeMatch && morphemeMatch.groups) {
processStaticText(match);
parts.push({
type: 'morpheme',
capitalise: !!morphemeMatch.groups.capitalise,
morpheme: morphemeMatch.groups.morpheme,
});
continue;
}
const nounMatch = match[1].match(nounRegex);
if (nounMatch && nounMatch.groups) {
processStaticText(match);
parts.push({
type: 'noun',
stems: Object.fromEntries(nounMatch.groups.stems.split('|').map((stem) => {
const chunks = stem.split(':');
return chunks.length === 1 ? ['default', chunks[0]] : chunks;
})),
nounClass: nounMatch.groups.nounClass,
caseAbbreviation: nounMatch.groups.caseAbbreviation,
numerus: nounMatch.groups.numerus as Numerus | undefined,
});
}
}
const textAfter = example.substring(lastPosition);
if (textAfter.length) {
parts.push(textAfter);
}
return new Example(parts);
}
hasMorpheme(morpheme: string): boolean {
return this.parts.filter((part) => typeof part !== 'string' && part.type === 'morpheme')
.some((part) => part.morpheme === morpheme);
}
areRequiredExampleValuesPresent(exampleValues: ExampleValues): boolean {
return this.parts.every((part, index) => this.getSpelling(index, exampleValues) !== undefined);
}
getSpelling(index: number, exampleValues: ExampleValues): string | undefined {
const part = this.parts[index];
if (typeof part === 'string') {
return part;
}
switch (part.type) {
case 'morpheme': {
const spelling = exampleValues.morphemeValues.getSpelling(part.morpheme);
return spelling !== undefined && part.capitalise ? capitalise(spelling) : spelling;
}
case 'noun': {
if (exampleValues.nounConvention === undefined || exampleValues.nounDeclensions === undefined) {
return undefined;
}
const template = exampleValues.nounConvention.templates[part.nounClass];
if (template === undefined) {
return undefined;
}
const stem = part.stems[template.stem ?? 'default'];
const numerus = part.numerus ?? 'singular';
const declension = exampleValues.nounDeclensions[template.declension][numerus];
if (declension === undefined) {
return undefined;
}
const declensionSuffix = declension[part.caseAbbreviation];
return `${stem}${template.suffix}${declensionSuffix}`;
}
}
}
toSpellingString(exampleValues: ExampleValues): string {
return this.parts.map((part, index) => this.getSpelling(index, exampleValues)).join('');
}
toPronunciationString(
config: Config,
exampleValues: ExampleValues,
): string | undefined {
const buildPronunciation = (morpheme: string): string | false | undefined => {
const pronunciation = exampleValues.morphemeValues.getPronunciation(morpheme);
if (pronunciation === false) {
return false;
}
if (pronunciation !== undefined) {
return pronunciation.startsWith('=') ? pronunciation.substring(1) : `/${pronunciation}/`;
}
const spelling = exampleValues.morphemeValues.getSpelling(morpheme);
if (config.pronunciation?.ipa && spelling) {
return spelling.split('').map((c) => [' ', ',', '.', ':', ';', '', '-'].includes(c) ? c : `/${c}/`)
.join('');
}
return spelling;
};
const pronunciationParts = this.parts.map((part, index) => {
if (typeof part === 'string') {
return escapePronunciationString(part);
}
switch (part.type) {
case 'morpheme':
return buildPronunciation(part.morpheme);
case 'noun':
return this.getSpelling(index, exampleValues);
}
});
if (pronunciationParts.some((part) => part === false)) {
return undefined;
}
return pronunciationParts.join('');
}
}