PronounsPage/shared/buildPronoun.ts

405 lines
15 KiB
TypeScript

import { isEnabled } from '../locale/config.ts';
import type { Config, ConfigWithEnabled } from '../locale/config.ts';
import type { PronounData, PronounGroupData } from '../locale/data.ts';
import { Pronoun, PronounGroup } from './classes.ts';
import type { PronounLibrary, PronounUsage } from './classes.ts';
import Compressor from './compressor.ts';
import { buildDict, buildList, isEmoji, splitSlashes, unescapeControlSymbols } from './helpers.ts';
import type { Translator } from './translator.ts';
export const normaliseKey = (key: string): string => {
return key.toLowerCase();
};
export const getPronounsByNormalisedAlias = (pronouns: Record<string, Pronoun>): Record<string, Pronoun> => {
const pronounsByNormalisedAlias: Record<string, Pronoun> = {};
for (const [base, pronoun] of Object.entries(pronouns)) {
pronounsByNormalisedAlias[normaliseKey(base)] = pronoun;
for (const alias of pronoun.aliases) {
pronounsByNormalisedAlias[normaliseKey(alias)] = pronoun;
}
}
return pronounsByNormalisedAlias;
};
export const getPronoun = (pronouns: Record<string, Pronoun>, key: string): Pronoun | undefined => {
return getPronounsByNormalisedAlias(pronouns)[normaliseKey(key)];
};
const conditionalKeyPlaceHolder = /#\/([^/]+)\/(\w+)?#/;
const unconditionalKeyPlaceholder = /#/g;
const buildMorphemeFromTemplate = (key: string, template: string): string => {
const variants = template.split('|');
for (const variant of variants) {
const conditionalMatch = variant.match(conditionalKeyPlaceHolder);
if (conditionalMatch) {
if (key.match(new RegExp(conditionalMatch[1], conditionalMatch[2]))) {
return variant.replace(conditionalKeyPlaceHolder, key);
}
} else {
return variant.replace(unconditionalKeyPlaceholder, key);
}
}
return template;
};
interface PronounTemplate {
description: string;
normative?: boolean;
morphemes: Record<string, string>;
plural?: boolean;
pluralHonorific?: boolean;
aliases?: string[];
history?: string;
nullPronoun?: boolean;
}
const buildPronounFromTemplate = (
config: ConfigWithEnabled<'pronouns'>,
key: string,
template: PronounTemplate,
): Pronoun => {
return new Pronoun(
config,
key,
template.description,
template.normative || false,
buildDict(function* (morphemes) {
for (const m of config.pronouns.morphemes) {
yield [m, Object.hasOwn(morphemes, m) ? buildMorphemeFromTemplate(key, morphemes[m]) : null];
}
}, template.morphemes),
[template.plural || false],
[template.pluralHonorific || false],
template.aliases || [],
`${template.history || ''}@__generator__`.replace(/^@/, ''),
false,
null,
null,
null,
false,
template.nullPronoun || false,
);
};
export const NULL_PRONOUNS_MAXLENGTH = 32;
const isModifier = (chunk: string, key: string, translator: Translator): boolean => {
// use both locale and base translations to ensure backwards compatibility if key gets translated
return chunk === `:${translator.translate(key)}` || chunk === `:${translator.get(key, false, true)}`;
};
const extractModifierValue = (chunk: string, key: string, translator: Translator): string | null => {
// use both locale and base translations to ensure backwards compatibility if key gets translated
const prefixes = [`:${translator.translate(key)}=`, `:${translator.get(key, false, true)}=`];
for (const prefix of prefixes) {
if (chunk.startsWith(prefix)) {
return chunk.substring(prefix.length);
}
}
return null;
};
const buildPronounFromSlashes = (
config: ConfigWithEnabled<'pronouns'>,
path: string,
translator: Translator,
): Pronoun | null => {
if (!config.pronouns.generator.enabled) {
return null;
}
const chunks = splitSlashes(path);
let plural = false;
let pluralHonorific = false;
let description = '';
const morphemeChunks: (string | null)[] = [];
for (const chunk of chunks) {
if (chunk.startsWith(':')) {
if (config.pronouns.plurals && isModifier(chunk, 'pronouns.slashes.plural', translator)) {
plural = true;
} else if (config.pronouns.plurals && config.pronouns.honorifics &&
isModifier(chunk, 'pronouns.slashes.pluralHonorific', translator)) {
pluralHonorific = true;
} else {
const descriptionModifierValue =
extractModifierValue(chunk, 'pronouns.slashes.description', translator);
if (descriptionModifierValue) {
description = unescapeControlSymbols(descriptionModifierValue)!;
}
}
} else {
if (chunk === '~') {
morphemeChunks.push(null);
} else if (chunk === ' ') {
morphemeChunks.push('');
} else {
morphemeChunks.push(unescapeControlSymbols(chunk));
}
}
}
if (description.length > Pronoun.DESCRIPTION_MAXLENGTH) {
return null;
}
const slashMorphemes = config.pronouns.generator.slashes === true
? config.pronouns.morphemes
: config.pronouns.generator.slashes;
if (slashMorphemes && morphemeChunks.length === slashMorphemes.length) {
return new Pronoun(
config,
`${morphemeChunks[0]}/${morphemeChunks[1]}`,
description,
false,
buildDict(function* () {
for (const m of config.pronouns.morphemes) {
const index = slashMorphemes.indexOf(m);
yield [m, index === -1 ? null : morphemeChunks[index]];
}
}),
[plural],
[pluralHonorific],
[],
'__generator__',
false,
);
}
return null;
};
export const buildPronoun = (
pronouns: Record<string, Pronoun>,
path: string | null,
config: Config,
translator: Translator,
): Pronoun | null => {
if (!path || !isEnabled(config, 'pronouns')) {
return null;
}
for (const prefix of config.pronouns.sentence ? config.pronouns.sentence.prefixes : []) {
if (`/${path}`.startsWith(`${prefix}/`)) {
path = path.substring(prefix.length);
break;
}
}
const pronounsByNormalisedAlias = getPronounsByNormalisedAlias(pronouns);
let pronounStr: (string | null)[] = path.split(',');
let base: Pronoun | null | undefined = null;
for (const key of pronounStr[0]!.split('&')) {
const normalisedKey = normaliseKey(key);
if (!base) {
base = pronounsByNormalisedAlias[normalisedKey] ?? null;
} else if (pronounsByNormalisedAlias[normalisedKey]) {
base = base.merge(pronounsByNormalisedAlias[normalisedKey]);
}
}
let baseArray = base ? base.toArray() : null;
// i know, it's ugly… didn't think about BC much and now it's a huge mess…
const pronounStrLen = pronounStr.map((x) => x!.startsWith('!') ? parseInt(x!.substring(1)) : 1).reduce((c, a) => c + a, 0);
if (config.locale === 'de') {
// only migrate the four original morphemes as the generator has not supported more morphemes
const oldMorphemeVersions = [
['pronoun_n', 'pronoun_d', 'pronoun_a', 'possessive_determiner_m_n'],
['pronoun_n', 'pronoun_d', 'pronoun_a', 5, 'possessive_determiner_m_n', 15]
.flatMap((morphemeOrIgnoredCount) => {
if (typeof morphemeOrIgnoredCount === 'string') {
return [morphemeOrIgnoredCount];
}
return new Array(morphemeOrIgnoredCount).fill(null);
}),
];
for (const oldMorphemeVersion of oldMorphemeVersions) {
if (pronounStrLen === oldMorphemeVersion.length + 2) {
const baseArrayWithDowngradedMorphemes = oldMorphemeVersion.map((morpheme) => {
if (morpheme === null || !base) {
return null;
}
return base.morphemes[morpheme];
}).concat(baseArray ? baseArray.slice(baseArray.length - 2) : ['0', '']);
const uncompressed = Compressor.uncompress(pronounStr, baseArrayWithDowngradedMorphemes, config.locale);
pronounStr = config.pronouns.morphemes.map((morpheme) => {
const index = oldMorphemeVersion.indexOf(morpheme);
if (index >= 0) {
return uncompressed[index];
}
return null;
}).concat(uncompressed.slice(uncompressed.length - 2));
break;
}
}
} else if (config.locale === 'pl' && baseArray && pronounStrLen < 31) {
baseArray.splice(baseArray.length - 10, 1);
if (pronounStrLen < 30) {
baseArray = [
...baseArray.slice(0, 4),
baseArray[5],
baseArray[8],
...baseArray.slice(11),
];
}
if (pronounStrLen < 24) {
baseArray.splice(2, 1);
} else if (pronounStrLen < 23) {
baseArray.splice(8, 1);
baseArray.splice(2, 1);
} else if (pronounStrLen < 22) {
baseArray.splice(8, 1);
baseArray.splice(8, 1);
baseArray.splice(2, 1);
}
}
let pronoun = pronounStr.length === 1
? base
: Pronoun.from(Compressor.uncompress(pronounStr, baseArray, config.locale), config);
if (!pronoun && config.pronouns.emoji !== false && isEmoji(path)) {
pronoun = buildPronounFromTemplate(config, path, config.pronouns.emoji);
}
if (!pronoun && config.pronouns.null && config.pronouns.null.morphemes && path.startsWith(':') &&
path.length <= NULL_PRONOUNS_MAXLENGTH + 1) {
pronoun = buildPronounFromTemplate(config, path.substring(1), {
description: config.pronouns.null.routes.map((variant) => {
return `{/${variant}=${headerForVariant('null', variant, translator)}}`;
}).join(' / '),
history: translator.translate('pronouns.null.description'),
morphemes: config.pronouns.null.morphemes,
nullPronoun: true,
});
}
if (!pronoun && config.pronouns.generator.slashes !== false) {
return buildPronounFromSlashes(config, path, translator);
}
return pronoun;
};
export const buildPronounUsage = (
pronounLibrary: PronounLibrary,
path: string,
config: Config,
translator: Translator,
): PronounUsage | null => {
const normalisedPath = normaliseKey(path);
if (config.pronouns.null && config.pronouns.null.routes?.includes(normalisedPath)) {
return { short: { options: [shortForVariant('null', normalisedPath, translator)] } };
}
if (config.pronouns.mirror && config.pronouns.mirror.route === normalisedPath) {
const specificTranslationKey = 'pronouns.mirror.short';
const short = translator.has(specificTranslationKey)
? translator.translate(specificTranslationKey)
: normalisedPath;
return { short: { options: [short] } };
}
if (config.pronouns.any) {
if (config.pronouns.any === normalisedPath) {
return { short: { options: [translator.translate('pronouns.any.short')] } };
}
const prefix = `${config.pronouns.any}:`;
if (normalisedPath.startsWith(prefix)) {
const merged = pronounLibrary.byKey()[normalisedPath.substring(prefix.length)];
if (merged) {
return { short: { options: [merged.short(translator)] } };
}
}
}
if (config.pronouns.ask && config.pronouns.ask.routes.includes(normalisedPath)) {
return { short: { options: [shortForVariant('ask', normalisedPath, translator)] } };
}
const pronoun = buildPronoun(pronounLibrary.pronouns, path, config, translator);
if (pronoun) {
return { short: { options: pronoun.nameOptions(), glue: ` ${translator.translate('pronouns.or')} ` }, pronoun };
}
return null;
};
export const headerForVariant = (usage: 'null' | 'ask', variant: string, translator: Translator) => {
const specificTranslationKey = `pronouns.${usage}.header.${variant}`;
return translator.has(specificTranslationKey) ? translator.translate(specificTranslationKey) : variant;
};
export const shortForVariant = (usage: 'null' | 'ask', variant: string, translator: Translator) => {
const specificTranslationKey = `pronouns.${usage}.short.${variant}`;
return translator.has(specificTranslationKey) ? translator.translate(specificTranslationKey) : variant;
};
export const buildAnyPronounsList = (config: Config, pronounLibrary: PronounLibrary): string[] => {
if (!config.pronouns.any) {
return [];
}
return [config.pronouns.any, ...Object.keys(pronounLibrary.byKey()).map((key) => `${config.pronouns.any}:${key}`)];
};
export const parsePronouns = (
config: Config,
pronounsRaw: PronounData<string>[],
): Record<string, Pronoun> => {
if (!isEnabled(config, 'pronouns')) {
return {};
}
return buildDict(function* () {
for (const t of pronounsRaw) {
const aliases = t.key.replace(/،/g, ',').split(',');
yield [
aliases[0],
new Pronoun(
config,
aliases[0],
t.description,
t.normative,
buildDict(function* () {
for (const morpheme of config.pronouns.morphemes) {
let value;
if (t[morpheme] === null) {
// empty cells are parsed as null in dynamic parse mode,
// but most of the time an empty string is intended
value = '';
} else if (t[morpheme] === '~') {
// to really describe that a pronoun does not support a morpheme,
// tilde is used to describe null as in yaml.
value = null;
} else {
value = t[morpheme];
}
yield [morpheme, value];
}
}),
[t.plural],
[t.pluralHonorific],
aliases.slice(1),
t.history ?? '',
t.pronounceable,
t.thirdForm,
t.smallForm,
t.sourcesInfo,
t.hidden ?? false,
),
];
}
});
};
export const parsePronounGroups = (pronounGroupsRaw: PronounGroupData[]): PronounGroup[] => {
return buildList(function* () {
for (const g of pronounGroupsRaw) {
yield new PronounGroup(
g.name,
g.pronouns ? g.pronouns.replace(/،/g, ',').split(',') : [],
g.description,
g.key || null,
g.hidden ?? false,
);
}
});
};