PronounsPage/shared/nouns.ts

import { fromUnionEntries } from '#shared/helpers.ts';
import type { GrammarTableDefinition } from '#shared/language/grammarTables.ts';
import type { MorphemeValue } from '#shared/language/morphemes.ts';
import type { Config } from '~~/locale/config.ts';

export const genders = ['masc', 'fem', 'neutr', 'nb'] as const;
export type Gender = typeof genders[number];

export const gendersWithNumerus = ['masc', 'fem', 'neutr', 'nb', 'mascPl', 'femPl', 'neutrPl', 'nbPl'] as const;

export const availableGenders = (config: Config): Gender[] => {
    return genders.filter((gender) => gender !== 'nb' || config.nouns.nonbinary);
};

export const iconNamesByGender: Record<Gender, string> = {
    masc: 'mars',
    fem: 'venus',
    neutr: 'neuter',
    nb: 'transgender-alt',
};
export const iconUnicodesByGender: Record<Gender, string> = {
    masc: '\uf222',
    fem: '\uf221',
    neutr: '\uf22c',
    nb: '\uf225',
};
export const longIdentifierByGender: Record<Gender, string> = {
    masc: 'masculine',
    fem: 'feminine',
    neutr: 'neuter',
    nb: 'nonbinary',
};

export const numeri = ['singular', 'plural'] as const;
export type Numerus = typeof numeri[number];

export const availableNumeri = (config: Config): readonly Numerus[] => {
    if (config.nouns.plurals) {
        return numeri;
    }
    return ['singular'];
};

export const symbolsByNumeri: Record<Numerus, string> = {
    singular: '⋅',
    plural: '⁖',
};

export type NounCaseKey = string;

export interface NounWord {
    spelling: string;
    /**
     * whether this word is regular
     * - `undefined` means an irregular word
     * - `'standalone'` means a regular word which should be displayed standalone
     * - `'collapsible'` means a regular word which can be collapsed with other collapsible words
     */
    regularity?: 'standalone' | 'collapsible';
    convention?: NounConventionKey;
    declension?: NounDeclensionKey | NounDeclension;
}

export type NounWordsRaw = Partial<Record<Gender, Partial<Record<Numerus, (NounWord | string)[]>>>>;

export type NounWords = Partial<Record<Gender, Partial<Record<Numerus, NounWord[]>>>>;

type NounConventionGroupKey = string;
export interface NounConventionGroup {
    name: string;
    conventions: NounConventionKey[];
    description?: string;
}

type NounConventionKey = string;
export interface NounConvention {
    name: string;
    normative: boolean;
    gender: Gender;
    /**
     * whether regular words of this and other conventions of the same gender should be collapsed
     * in the dictionary containing all words
     */
    collapsible?: boolean;
    warning?: string;
    description?: string[];
    morphemes: Record<string, MorphemeValue | string>;
    templates: Record<NounClassKey, NounConventionTemplate>;
}

interface NounConventionTemplate {
    stem?: string;
    suffix: string;
    declension: NounDeclensionKey;
}

export type NounStemKey = string;
interface NounStem {
    name: string;
    example: string;
}

export type NounClassKey = string;
export interface NounClass {
    exampleStems: Record<NounStemKey, string>;
}

export interface NounClassExample {
    singular: Record<NounCaseKey, string>;
    plural: Record<NounCaseKey, string>;
}

type NounDeclensionKey = string;
export interface NounDeclension {
    name?: string;
    singular?: Record<NounCaseKey, string[]>;
    plural?: Record<NounCaseKey, string[]>;
}

export const resolveDeclensionByCase = (
    word: NounWord,
    numerus: Numerus,
    nounsData: NounsData,
): Record<NounCaseKey, string[]> | undefined => {
    if (word.declension === undefined) {
        return undefined;
    }
    if (typeof word.declension === 'string') {
        return nounsData.declensions?.[word.declension]?.[numerus];
    }
    return word.declension[numerus];
};

export const resolveArticles = (
    nounConvention: NounConvention,
    numerus: Numerus,
    nounsData: NounsData,
): Record<NounCaseKey, string> => {
    return Object.fromEntries(Object.entries(nounsData.classExample?.[numerus] ?? {})
        .map(([caseAbbreviation, article]) => {
            const resolvedArticle = article.replace(/\{([^}]+)}/, (_match, morpheme) => {
                const value = nounConvention.morphemes?.[morpheme];
                if (value === undefined) {
                    return '';
                }
                return typeof value === 'string' ? value : value.spelling;
            });
            if (resolvedArticle.trim().length === 0) {
                return [caseAbbreviation, ''];
            }
            return [caseAbbreviation, resolvedArticle];
        }));
};

export const getFirstDeclension = (
    word: NounWord,
    nounsData: NounsData,
    articles: Record<NounCaseKey, string>,
    declensionByCase: Record<NounCaseKey, string[]> | undefined,
): string => {
    if (nounsData.cases === undefined) {
        return word.spelling;
    }
    const caseAbbreviation = Object.keys(nounsData.cases)[0];
    const ending = declensionByCase?.[caseAbbreviation][0] ?? '';
    return `${articles[caseAbbreviation] ?? ''}${word.spelling}${ending}`;
};

export const resolveFirstDeclension = (word: NounWord, numerus: Numerus, nounsData: NounsData) => {
    let articles;
    if (word.convention !== undefined && nounsData.conventions !== undefined) {
        const nounConvention = nounsData.conventions[word.convention];
        articles = resolveArticles(nounConvention, numerus, nounsData);
    } else {
        articles = {};
    }
    const declensionByCase = resolveDeclensionByCase(word, numerus, nounsData);
    return getFirstDeclension(word, nounsData, articles, declensionByCase);
};

export interface NounDeclensionsByFirstCase {
    singular: Record<string, NounDeclensionKey>;
    plural: Record<string, NounDeclensionKey>;
}

export const buildNounDeclensionsByFirstCase = (
    config: Config,
    cases: Record<NounCaseKey, string> | undefined,
    declensions: Record<NounDeclensionKey, NounDeclension> | undefined,
): NounDeclensionsByFirstCase => {
    if (!config.nouns.declension?.enabled || !config.nouns.declension.detect ||
        cases === undefined || declensions === undefined) {
        return { singular: {}, plural: {} };
    }
    const firstCaseAbbreviation = Object.keys(cases)[0];
    return fromUnionEntries(numeri.map((numerus) => {
        return [numerus, Object.fromEntries(Object.entries(declensions)
            .flatMap(([declensionKey, declension]) => {
                const endings = declension[numerus]?.[firstCaseAbbreviation];
                if (endings === undefined) {
                    return [];
                }
                return endings.map((ending) => [ending, declensionKey] as const);
            })
            .toSorted(([suffixA], [suffixB]) => suffixA.length - suffixB.length))] as const;
    }));
};

export interface NounsData {
    cases?: Record<NounCaseKey, string>;
    morphemes?: string[];
    examples?: string[];
    grammarTables?: GrammarTableDefinition[];
    stems?: Record<NounStemKey, NounStem>;
    classes?: Record<NounClassKey, NounClass>;
    classExample?: NounClassExample;
    declensions?: Record<NounDeclensionKey, NounDeclension>;
    groups?: Record<NounConventionGroupKey, NounConventionGroup>;
    conventions?: Record<NounConventionKey, NounConvention>;
}

export interface NounClassInstance {
    classKey: NounClassKey;
    stems: Record<NounStemKey, string>;
}

export const resolveWordsFromClassInstance = (
    nounClassInstance: NounClassInstance,
    nounsData: NounsData,
): NounWords => {
    const words: NounWords = {};
    for (const [conventionKey, convention] of Object.entries(nounsData.conventions ?? {})) {
        if (!Object.hasOwn(convention.templates, nounClassInstance.classKey)) {
            continue;
        }
        const template = convention.templates[nounClassInstance.classKey];
        const stem = nounClassInstance.stems[template.stem ?? 'default'];
        if (!stem) {
            continue;
        }
        const word: NounWord = {
            spelling: stem + template.suffix,
            regularity: convention.collapsible ? 'collapsible' : 'standalone',
            convention: conventionKey,
            declension: template.declension,
        };

        const declension = nounsData.declensions![template.declension];
        for (const numerus of numeri) {
            if (!declension[numerus]) {
                continue;
            }
            insertIntoNounWords(words, word, convention.gender, numerus);
        }
    }
    return words;
};

const insertIntoNounWords = (words: NounWords, word: NounWord, gender: Gender, numerus: Numerus) => {
    if (!Object.hasOwn(words, gender)) {
        words[gender] = {};
    }
    if (!Object.hasOwn(words[gender]!, numerus)) {
        words[gender]![numerus] = [];
    }
    words[gender]![numerus]!.push(word);
};

export const addWordsFromClassInstance = (
    nounWords: NounWords,
    nounClassInstance: NounClassInstance,
    nounsData: NounsData,
): NounWords => {
    const mergedNounWords = structuredClone(nounWords);
    const wordsFromTemplate = resolveWordsFromClassInstance(nounClassInstance, nounsData);
    for (const [gender, wordsOfNumerus] of Object.entries(wordsFromTemplate)) {
        for (const [numerus, words] of Object.entries(wordsOfNumerus)) {
            for (const word of words) {
                insertIntoNounWords(mergedNounWords, word, gender as Gender, numerus as Numerus);
            }
        }
    }
    return mergedNounWords;
};

export const filterIrregularWords = (words: NounWords) => {
    const filteredNounWords: NounWords = {};
    for (const [gender, wordsOfNumerus] of Object.entries(words)) {
        for (const [numerus, words] of Object.entries(wordsOfNumerus)) {
            for (const word of words) {
                if (word.regularity === undefined) {
                    insertIntoNounWords(filteredNounWords, word, gender as Gender, numerus as Numerus);
                }
            }
        }
    }
    return filteredNounWords;
};