mirror of
https://gitlab.com/PronounsPage/PronounsPage.git
synced 2025-09-24 05:05:20 -04:00
(refactor) decouble pronunciation API from pronouns to allow more flexible use
This commit is contained in:
parent
fa5bb609ec
commit
f82344eb0c
@ -7,7 +7,7 @@
|
||||
<small v-if="link">
|
||||
(<nuxt-link :to="'/' + pronoun.canonicalName"><Spelling escape :text="pronoun.canonicalName"/></nuxt-link>)
|
||||
</small>
|
||||
<template v-if="config.pronunciation.enabled && pronunciation && pronoun.pronounceable && example.pronounce(pronoun)">
|
||||
<template v-if="config.pronunciation.enabled && pronunciation && pronoun.pronounceable && example.toPronunciationString(pronoun)">
|
||||
<a v-for="(pLink, name) in pronunciationLinks"
|
||||
class="mr-2"
|
||||
dir="ltr"
|
||||
@ -20,8 +20,6 @@
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import { pronouns } from '../src/data';
|
||||
|
||||
export default {
|
||||
props: {
|
||||
example: { required: true },
|
||||
@ -37,32 +35,12 @@
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
pronounBase() {
|
||||
const name = this.pronoun.name();
|
||||
for (let key in pronouns) {
|
||||
if (!pronouns.hasOwnProperty(key)) { continue; }
|
||||
if (key === name) {
|
||||
return key;
|
||||
}
|
||||
for (let alias of pronouns[key].aliases) {
|
||||
if (alias === name) {
|
||||
return key;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
},
|
||||
pronounToString() {
|
||||
return this.pronounBase && pronouns[this.pronounBase].equals(this.pronoun) ? this.pronounBase : this.pronoun.toString();
|
||||
},
|
||||
pronunciationLinks() {
|
||||
const justOne = Object.keys(this.config.pronunciation.voices).length === 1;
|
||||
|
||||
const links = {};
|
||||
for (let country in this.config.pronunciation.voices) {
|
||||
if (!this.config.pronunciation.voices.hasOwnProperty(country)) { continue; }
|
||||
links[justOne ? '' : country] = `/api/pronounce/${country}/${this.pronounToString}?example=${encodeURIComponent(this.example.toString())}`;
|
||||
links[justOne ? '' : country] = `/api/pronounce/${country}/${encodeURIComponent(this.example.toPronunciationString(this.pronoun))}`;
|
||||
}
|
||||
return links;
|
||||
}
|
||||
|
@ -1,9 +1,6 @@
|
||||
import { Router } from 'express';
|
||||
import { loadTsv } from '../loader.js';
|
||||
import { buildPronoun, parsePronouns } from '../../src/buildPronoun.js';
|
||||
import { Example } from '../../src/classes.js';
|
||||
import sha1 from 'sha1';
|
||||
import { handleErrorAsync } from '../../src/helpers.js';
|
||||
import { convertPronunciationStringToSsml, handleErrorAsync } from '../../src/helpers.js';
|
||||
|
||||
import awsConfig from '../aws.js';
|
||||
import Polly from 'aws-sdk/clients/polly.js';
|
||||
@ -11,28 +8,10 @@ import S3 from 'aws-sdk/clients/s3.js';
|
||||
|
||||
const router = Router();
|
||||
|
||||
router.get('/pronounce/:voice/:pronoun*', handleErrorAsync(async (req, res) => {
|
||||
const pronounString = req.params.pronoun + req.params[0];
|
||||
const pronoun = buildPronoun(
|
||||
parsePronouns(loadTsv('pronouns/pronouns')),
|
||||
pronounString,
|
||||
);
|
||||
router.get('/pronounce/:voice/*', handleErrorAsync(async (req, res) => {
|
||||
const text = req.params[0];
|
||||
|
||||
if (!pronoun || !req.query.example) {
|
||||
return res.status(404).json({error: 'Not found'});
|
||||
}
|
||||
|
||||
let [singular, plural, isHonorific] = req.query.example.split('|');
|
||||
const example = new Example(
|
||||
Example.parse(singular),
|
||||
Example.parse(plural || singular),
|
||||
!!parseInt(isHonorific || '0'),
|
||||
)
|
||||
|
||||
const text = example.pronounce(pronoun);
|
||||
|
||||
// quick length check to avoid abuse. remove SSML tags but keep both tag value and attributes
|
||||
if (!text || text.replace(/<[^ ]+/g, '').replace('>', '').length > 256) {
|
||||
if (!text || text.length > 256) {
|
||||
return res.status(404).json({error: 'Not found'});
|
||||
}
|
||||
|
||||
@ -44,7 +23,8 @@ router.get('/pronounce/:voice/:pronoun*', handleErrorAsync(async (req, res) => {
|
||||
const s3 = new S3(awsConfig);
|
||||
const polly = new Polly(awsConfig);
|
||||
|
||||
const key = `pronunciation/${global.config.locale}-${req.params.voice}/${pronounString}/${sha1(text)}.mp3`;
|
||||
const ssml = convertPronunciationStringToSsml(text);
|
||||
const key = `pronunciation/${global.config.locale}-${req.params.voice}/${sha1(ssml)}.mp3`;
|
||||
|
||||
try {
|
||||
const s3getResponse = await s3.getObject({Key: key}).promise();
|
||||
@ -52,7 +32,7 @@ router.get('/pronounce/:voice/:pronoun*', handleErrorAsync(async (req, res) => {
|
||||
} catch {
|
||||
const pollyResponse = await polly.synthesizeSpeech({
|
||||
TextType: 'ssml',
|
||||
Text: text,
|
||||
Text: ssml,
|
||||
OutputFormat: 'mp3',
|
||||
LanguageCode: voice.language,
|
||||
VoiceId: voice.voice,
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { buildDict, buildList, capitalise } from './helpers.js';
|
||||
import { buildDict, buildList, capitalise, escapePronunciationString } from './helpers.js';
|
||||
import MORPHEMES from '../data/pronouns/morphemes.js';
|
||||
|
||||
const config = process.env.CONFIG || global.config;
|
||||
@ -55,7 +55,7 @@ export class Example {
|
||||
}).join(''));
|
||||
}
|
||||
|
||||
pronounce(pronoun) {
|
||||
toPronunciationString(pronoun) {
|
||||
let interchangable = false;
|
||||
|
||||
const buildPronunciation = m => {
|
||||
@ -68,29 +68,29 @@ export class Example {
|
||||
return pronunciation
|
||||
? (pronunciation.startsWith('=')
|
||||
? pronunciation.substring(1)
|
||||
: `<phoneme alphabet="ipa" ph="${pronunciation}">${morpheme}</phoneme>`
|
||||
: `/${pronunciation}/`
|
||||
)
|
||||
: ( config.pronunciation.ipa && morpheme
|
||||
? morpheme.split('').map(
|
||||
c => [' ', ',', '.', ':', ';', '–', '-'].includes(c)
|
||||
? c
|
||||
: `<phoneme alphabet="ipa" ph="${c}">${c}</phoneme>`
|
||||
: `/${c}/`
|
||||
).join('')
|
||||
: morpheme
|
||||
);
|
||||
}
|
||||
|
||||
const ssml = '<speak>' + this.parts(pronoun).map(part => {
|
||||
const pronunciationString = this.parts(pronoun).map(part => {
|
||||
return part.variable
|
||||
? buildPronunciation(part.str)
|
||||
: part.str;
|
||||
}).join('') + '</speak>';
|
||||
: escapePronunciationString(part.str);
|
||||
}).join('');
|
||||
|
||||
if (interchangable) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return ssml;
|
||||
return pronunciationString;
|
||||
}
|
||||
|
||||
toString() {
|
||||
|
@ -250,6 +250,49 @@ const escapeChars = {
|
||||
|
||||
export const escapeHtml = (text) => text.replace(/[&<>"]/g, tag => escapeChars[tag] || tag);
|
||||
|
||||
export const escapePronunciationString = (text) => {
|
||||
return text.replaceAll('\\', '\\\\')
|
||||
.replaceAll('/', '\\/');
|
||||
};
|
||||
|
||||
export const convertPronunciationStringToSsml = (pronunciationString) => {
|
||||
const escapedString = escapeHtml(pronunciationString);
|
||||
let ssml = '';
|
||||
let escape = false;
|
||||
let currentPhonemes = null;
|
||||
for (const character of escapedString) {
|
||||
if (escape) {
|
||||
if (currentPhonemes === null) {
|
||||
ssml += character;
|
||||
} else {
|
||||
currentPhonemes += character;
|
||||
}
|
||||
escape = false;
|
||||
} else {
|
||||
if (character === '\\') {
|
||||
escape = true;
|
||||
} else if (character == '/') {
|
||||
if (currentPhonemes === null) {
|
||||
currentPhonemes = '';
|
||||
} else {
|
||||
ssml += `<phoneme alphabet="ipa" ph="${currentPhonemes}"></phoneme>`;
|
||||
currentPhonemes = null;
|
||||
}
|
||||
} else {
|
||||
if (currentPhonemes === null) {
|
||||
ssml += character;
|
||||
} else {
|
||||
currentPhonemes += character;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (currentPhonemes !== null) {
|
||||
ssml += `/${currentPhonemes}`;
|
||||
}
|
||||
return `<speak>${ssml}</speak>`;
|
||||
};
|
||||
|
||||
export class ImmutableArray extends Array {
|
||||
sorted(a, b) {
|
||||
return new ImmutableArray(...[...this].sort(a, b));
|
||||
|
57
test/helpers.test.js
Normal file
57
test/helpers.test.js
Normal file
@ -0,0 +1,57 @@
|
||||
import { describe, expect, test } from '@jest/globals';
|
||||
|
||||
import { convertPronunciationStringToSsml, escapePronunciationString } from '../src/helpers.js';
|
||||
|
||||
describe('when escaping pronunciation', () => {
|
||||
test.each([
|
||||
{
|
||||
description: 'slashes get escaped',
|
||||
text: 'w/o n/A',
|
||||
pronunciationString: String.raw`w\/o n\/A`,
|
||||
},
|
||||
{
|
||||
description: 'backslashes get escaped',
|
||||
text: String.raw`\n is the symbol for a newline, \t for a tab`,
|
||||
pronunciationString: String.raw`\\n is the symbol for a newline, \\t for a tab`,
|
||||
}
|
||||
])('$description', ({ text, pronunciationString }) => {
|
||||
expect(escapePronunciationString(text)).toBe(pronunciationString);
|
||||
});
|
||||
});
|
||||
|
||||
describe('when converting pronunciation', () => {
|
||||
test.each([
|
||||
{
|
||||
description: 'simple text is passed as-is',
|
||||
pronunciationString: 'text',
|
||||
ssml: '<speak>text</speak>',
|
||||
},
|
||||
{
|
||||
description: 'slashes describe IPA phonemes',
|
||||
pronunciationString: '/ðeɪ/',
|
||||
ssml: '<speak><phoneme alphabet="ipa" ph="ðeɪ"></phoneme></speak>',
|
||||
},
|
||||
{
|
||||
description: 'simple text and slashes can be combined',
|
||||
pronunciationString: '/ðeɪ/ are',
|
||||
ssml: '<speak><phoneme alphabet="ipa" ph="ðeɪ"></phoneme> are</speak>',
|
||||
},
|
||||
{
|
||||
description: 'slashes can be escaped at front',
|
||||
pronunciationString: String.raw`w\/o, n/A`,
|
||||
ssml: '<speak>w/o, n/A</speak>',
|
||||
},
|
||||
{
|
||||
description: 'slashes can be escaped at back',
|
||||
pronunciationString: String.raw`w/o, n\/A`,
|
||||
ssml: '<speak>w/o, n/A</speak>',
|
||||
},
|
||||
{
|
||||
description: 'provided HTML is escaped',
|
||||
pronunciationString: '<break time="1s"/>',
|
||||
ssml: '<speak><break time="1s"/></speak>',
|
||||
},
|
||||
])('$description', ({ pronunciationString, ssml }) => {
|
||||
expect(convertPronunciationStringToSsml(pronunciationString)).toBe(ssml);
|
||||
});
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user