import { isEnabled } from '../locale/config.ts'; import type { Config, ConfigWithEnabled } from '../locale/config.ts'; import type { PronounData, PronounGroupData } from '../locale/data.ts'; import { Pronoun, PronounGroup } from './classes.ts'; import type { PronounLibrary, PronounUsage } from './classes.ts'; import Compressor from './compressor.ts'; import { buildDict, buildList, isEmoji, splitSlashes, unescapeControlSymbols } from './helpers.ts'; import type { Translator } from './translator.ts'; export const normaliseKey = (key: string): string => { return key.toLowerCase(); }; export const getPronounsByNormalisedAlias = (pronouns: Record): Record => { const pronounsByNormalisedAlias: Record = {}; for (const [base, pronoun] of Object.entries(pronouns)) { pronounsByNormalisedAlias[normaliseKey(base)] = pronoun; for (const alias of pronoun.aliases) { pronounsByNormalisedAlias[normaliseKey(alias)] = pronoun; } } return pronounsByNormalisedAlias; }; export const getPronoun = (pronouns: Record, key: string): Pronoun | undefined => { return getPronounsByNormalisedAlias(pronouns)[normaliseKey(key)]; }; const conditionalKeyPlaceHolder = /#\/([^/]+)\/(\w+)?#/; const unconditionalKeyPlaceholder = /#/g; const buildMorphemeFromTemplate = (key: string, template: string): string => { const variants = template.split('|'); for (const variant of variants) { const conditionalMatch = variant.match(conditionalKeyPlaceHolder); if (conditionalMatch) { if (key.match(new RegExp(conditionalMatch[1], conditionalMatch[2]))) { return variant.replace(conditionalKeyPlaceHolder, key); } } else { return variant.replace(unconditionalKeyPlaceholder, key); } } return template; }; interface PronounTemplate { description: string; normative?: boolean; morphemes: Record; plural?: boolean; pluralHonorific?: boolean; aliases?: string[]; history?: string; nullPronoun?: boolean; } const buildPronounFromTemplate = ( config: ConfigWithEnabled<'pronouns'>, key: string, template: PronounTemplate, ): Pronoun => { return new Pronoun( config, key, template.description, template.normative || false, buildDict(function* (morphemes) { for (const m of config.pronouns.morphemes) { yield [m, Object.hasOwn(morphemes, m) ? buildMorphemeFromTemplate(key, morphemes[m]) : null]; } }, template.morphemes), [template.plural || false], [template.pluralHonorific || false], template.aliases || [], `${template.history || ''}@__generator__`.replace(/^@/, ''), false, null, null, null, false, template.nullPronoun || false, ); }; export const NULL_PRONOUNS_MAXLENGTH = 32; const isModifier = (chunk: string, key: string, translator: Translator): boolean => { // use both locale and base translations to ensure backwards compatibility if key gets translated return chunk === `:${translator.translate(key)}` || chunk === `:${translator.get(key, false, true)}`; }; const extractModifierValue = (chunk: string, key: string, translator: Translator): string | null => { // use both locale and base translations to ensure backwards compatibility if key gets translated const prefixes = [`:${translator.translate(key)}=`, `:${translator.get(key, false, true)}=`]; for (const prefix of prefixes) { if (chunk.startsWith(prefix)) { return chunk.substring(prefix.length); } } return null; }; const buildPronounFromSlashes = ( config: ConfigWithEnabled<'pronouns'>, path: string, translator: Translator, ): Pronoun | null => { if (!config.pronouns.generator.enabled) { return null; } const chunks = splitSlashes(path); let plural = false; let pluralHonorific = false; let description = ''; const morphemeChunks: (string | null)[] = []; for (const chunk of chunks) { if (chunk.startsWith(':')) { if (config.pronouns.plurals && isModifier(chunk, 'pronouns.slashes.plural', translator)) { plural = true; } else if (config.pronouns.plurals && config.pronouns.honorifics && isModifier(chunk, 'pronouns.slashes.pluralHonorific', translator)) { pluralHonorific = true; } else { const descriptionModifierValue = extractModifierValue(chunk, 'pronouns.slashes.description', translator); if (descriptionModifierValue) { description = unescapeControlSymbols(descriptionModifierValue)!; } } } else { if (chunk === '~') { morphemeChunks.push(null); } else if (chunk === ' ') { morphemeChunks.push(''); } else { morphemeChunks.push(unescapeControlSymbols(chunk)); } } } if (description.length > Pronoun.DESCRIPTION_MAXLENGTH) { return null; } const slashMorphemes = config.pronouns.generator.slashes === true ? config.pronouns.morphemes : config.pronouns.generator.slashes; if (slashMorphemes && morphemeChunks.length === slashMorphemes.length) { return new Pronoun( config, `${morphemeChunks[0]}/${morphemeChunks[1]}`, description, false, buildDict(function* () { for (const m of config.pronouns.morphemes) { const index = slashMorphemes.indexOf(m); yield [m, index === -1 ? null : morphemeChunks[index]]; } }), [plural], [pluralHonorific], [], '__generator__', false, ); } return null; }; export const buildPronoun = ( pronouns: Record, path: string | null, config: Config, translator: Translator, ): Pronoun | null => { if (!path || !isEnabled(config, 'pronouns')) { return null; } for (const prefix of config.pronouns.sentence ? config.pronouns.sentence.prefixes : []) { if (`/${path}`.startsWith(`${prefix}/`)) { path = path.substring(prefix.length); break; } } const pronounsByNormalisedAlias = getPronounsByNormalisedAlias(pronouns); let pronounStr: (string | null)[] = path.split(','); let base: Pronoun | null | undefined = null; for (const key of pronounStr[0]!.split('&')) { const normalisedKey = normaliseKey(key); if (!base) { base = pronounsByNormalisedAlias[normalisedKey] ?? null; } else if (pronounsByNormalisedAlias[normalisedKey]) { base = base.merge(pronounsByNormalisedAlias[normalisedKey]); } } let baseArray = base ? base.toArray() : null; // i know, it's ugly… didn't think about BC much and now it's a huge mess… const pronounStrLen = pronounStr.map((x) => x!.startsWith('!') ? parseInt(x!.substring(1)) : 1).reduce((c, a) => c + a, 0); if (config.locale === 'de') { // only migrate the four original morphemes as the generator has not supported more morphemes const oldMorphemeVersions = [ ['pronoun_n', 'pronoun_d', 'pronoun_a', 'possessive_determiner_m_n'], ['pronoun_n', 'pronoun_d', 'pronoun_a', 5, 'possessive_determiner_m_n', 15] .flatMap((morphemeOrIgnoredCount) => { if (typeof morphemeOrIgnoredCount === 'string') { return [morphemeOrIgnoredCount]; } return new Array(morphemeOrIgnoredCount).fill(null); }), ]; for (const oldMorphemeVersion of oldMorphemeVersions) { if (pronounStrLen === oldMorphemeVersion.length + 2) { const baseArrayWithDowngradedMorphemes = oldMorphemeVersion.map((morpheme) => { if (morpheme === null || !base) { return null; } return base.morphemes[morpheme]; }).concat(baseArray ? baseArray.slice(baseArray.length - 2) : ['0', '']); const uncompressed = Compressor.uncompress(pronounStr, baseArrayWithDowngradedMorphemes, config.locale); pronounStr = config.pronouns.morphemes.map((morpheme) => { const index = oldMorphemeVersion.indexOf(morpheme); if (index >= 0) { return uncompressed[index]; } return null; }).concat(uncompressed.slice(uncompressed.length - 2)); break; } } } else if (config.locale === 'pl' && baseArray && pronounStrLen < 31) { baseArray.splice(baseArray.length - 10, 1); if (pronounStrLen < 30) { baseArray = [ ...baseArray.slice(0, 4), baseArray[5], baseArray[8], ...baseArray.slice(11), ]; } if (pronounStrLen < 24) { baseArray.splice(2, 1); } else if (pronounStrLen < 23) { baseArray.splice(8, 1); baseArray.splice(2, 1); } else if (pronounStrLen < 22) { baseArray.splice(8, 1); baseArray.splice(8, 1); baseArray.splice(2, 1); } } let pronoun = pronounStr.length === 1 ? base : Pronoun.from(Compressor.uncompress(pronounStr, baseArray, config.locale), config); if (!pronoun && config.pronouns.emoji !== false && isEmoji(path)) { pronoun = buildPronounFromTemplate(config, path, config.pronouns.emoji); } if (!pronoun && config.pronouns.null && config.pronouns.null.morphemes && path.startsWith(':') && path.length <= NULL_PRONOUNS_MAXLENGTH + 1) { pronoun = buildPronounFromTemplate(config, path.substring(1), { description: config.pronouns.null.routes.map((variant) => { return `{/${variant}=${headerForVariant('null', variant, translator)}}`; }).join(' / '), history: translator.translate('pronouns.null.description'), morphemes: config.pronouns.null.morphemes, nullPronoun: true, }); } if (!pronoun && config.pronouns.generator.slashes !== false) { return buildPronounFromSlashes(config, path, translator); } return pronoun; }; export const buildPronounUsage = ( pronounLibrary: PronounLibrary, path: string, config: Config, translator: Translator, ): PronounUsage | null => { const normalisedPath = normaliseKey(path); if (config.pronouns.null && config.pronouns.null.routes?.includes(normalisedPath)) { return { short: { options: [shortForVariant('null', normalisedPath, translator)] } }; } if (config.pronouns.mirror && config.pronouns.mirror.route === normalisedPath) { const specificTranslationKey = 'pronouns.mirror.short'; const short = translator.has(specificTranslationKey) ? translator.translate(specificTranslationKey) : normalisedPath; return { short: { options: [short] } }; } if (config.pronouns.any) { if (config.pronouns.any === normalisedPath) { return { short: { options: [translator.translate('pronouns.any.short')] } }; } const prefix = `${config.pronouns.any}:`; if (normalisedPath.startsWith(prefix)) { const merged = pronounLibrary.byKey()[normalisedPath.substring(prefix.length)]; if (merged) { return { short: { options: [merged.short(translator)] } }; } } } if (config.pronouns.ask && config.pronouns.ask.routes.includes(normalisedPath)) { return { short: { options: [shortForVariant('ask', normalisedPath, translator)] } }; } const pronoun = buildPronoun(pronounLibrary.pronouns, path, config, translator); if (pronoun) { return { short: { options: pronoun.nameOptions(), glue: ` ${translator.translate('pronouns.or')} ` }, pronoun }; } return null; }; export const headerForVariant = (usage: 'null' | 'ask', variant: string, translator: Translator) => { const specificTranslationKey = `pronouns.${usage}.header.${variant}`; return translator.has(specificTranslationKey) ? translator.translate(specificTranslationKey) : variant; }; export const shortForVariant = (usage: 'null' | 'ask', variant: string, translator: Translator) => { const specificTranslationKey = `pronouns.${usage}.short.${variant}`; return translator.has(specificTranslationKey) ? translator.translate(specificTranslationKey) : variant; }; export const buildAnyPronounsList = (config: Config, pronounLibrary: PronounLibrary): string[] => { if (!config.pronouns.any) { return []; } return [config.pronouns.any, ...Object.keys(pronounLibrary.byKey()).map((key) => `${config.pronouns.any}:${key}`)]; }; export const parsePronouns = ( config: Config, pronounsRaw: PronounData[], ): Record => { if (!isEnabled(config, 'pronouns')) { return {}; } return buildDict(function* () { for (const t of pronounsRaw) { const aliases = t.key.replace(/،/g, ',').split(','); yield [ aliases[0], new Pronoun( config, aliases[0], t.description, t.normative, buildDict(function* () { for (const morpheme of config.pronouns.morphemes) { let value; if (t[morpheme] === null) { // empty cells are parsed as null in dynamic parse mode, // but most of the time an empty string is intended value = ''; } else if (t[morpheme] === '~') { // to really describe that a pronoun does not support a morpheme, // tilde is used to describe null as in yaml. value = null; } else { value = t[morpheme]; } yield [morpheme, value]; } }), [t.plural], [t.pluralHonorific], aliases.slice(1), t.history ?? '', t.pronounceable, t.thirdForm, t.smallForm, t.sourcesInfo, t.hidden ?? false, ), ]; } }); }; export const parsePronounGroups = (pronounGroupsRaw: PronounGroupData[]): PronounGroup[] => { return buildList(function* () { for (const g of pronounGroupsRaw) { yield new PronounGroup( g.name, g.pronouns ? g.pronouns.replace(/،/g, ',').split(',') : [], g.description, g.key || null, g.hidden ?? false, ); } }); };