PronounsPage/server/express/pronounce.ts
2025-01-19 20:23:00 +01:00

115 lines
4.0 KiB
TypeScript

import { Polly } from '@aws-sdk/client-polly';
import { S3, NoSuchKey } from '@aws-sdk/client-s3';
import type { NodeJsClient } from '@smithy/types';
import { Router } from 'express';
import { Base64 } from 'js-base64';
import sha1 from 'sha1';
import type { PronunciationVoiceConfig, AwsPollyPronunciationVoiceConfig, NarakeetPronunciationVoiceConfig } from '../../locale/config.ts';
import { convertPronunciationStringToSsml, convertPronunciationStringToNarakeetFormat, handleErrorAsync } from '../../src/helpers.ts';
import { awsConfig, awsParams } from '../aws.ts';
const router = Router();
type ProviderKey = 'aws_polly' | 'narakeet';
interface Provider {
tokenised(text: string): string;
generate(textTokenised: string, voice: PronunciationVoiceConfig): Promise<[Uint8Array, string]>;
}
const providers: Record<ProviderKey, Provider> = {
aws_polly: {
tokenised(text: string): string {
return convertPronunciationStringToSsml(text);
},
async generate(textTokenised: string, voice: AwsPollyPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
const polly = new Polly(awsConfig) as NodeJsClient<Polly>;
const pollyResponse = await polly.synthesizeSpeech({
TextType: 'ssml',
Text: textTokenised,
OutputFormat: 'mp3',
LanguageCode: voice.language,
VoiceId: voice.voice,
Engine: voice.engine || 'standard',
});
return [
await pollyResponse.AudioStream!.transformToByteArray(),
pollyResponse.ContentType!,
];
},
},
narakeet: {
tokenised(text: string): string {
return convertPronunciationStringToNarakeetFormat(text);
},
async generate(textTokenised: string, voice: NarakeetPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
const url = `https://api.narakeet.com/text-to-speech/m4a?voice=${voice.voice}`;
const response = await fetch(url, {
method: 'POST',
headers: {
'accept': 'application/octet-stream',
'x-api-key': process.env.NARAKEET_API_KEY!,
'content-type': 'text/plain',
},
body: textTokenised,
});
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
return [
new Uint8Array(await response.arrayBuffer()),
response.headers.get('content-type')!,
];
},
},
};
router.get('/pronounce/:voice/:pronunciation', handleErrorAsync(async (req, res) => {
const text = Base64.decode(req.params.pronunciation);
if (!text || text.length > 256) {
return res.status(404).json({ error: 'Not found' });
}
const voice: PronunciationVoiceConfig | undefined = global.config.pronunciation?.voices?.[req.params.voice];
if (!voice) {
return res.status(404).json({ error: 'Not found' });
}
const s3 = new S3(awsConfig) as NodeJsClient<S3>;
const provider = providers[(voice.provider || 'aws_polly') as ProviderKey];
const tokenised = provider.tokenised(text);
const key = `pronunciation/${global.config.locale}-${req.params.voice}/${sha1(tokenised)}.mp3`;
try {
const s3Response = await s3.getObject({ Key: key, ...awsParams });
res.set('content-type', s3Response.ContentType);
return s3Response.Body!.pipe(res);
} catch (error) {
if (!(error instanceof NoSuchKey)) {
throw error;
}
const [buffer, contentType] = await provider.generate(tokenised, voice);
await s3.putObject({
Key: key,
Body: buffer,
ContentType: contentType,
...awsParams,
});
res.set('content-type', contentType);
res.write(buffer);
return res.end();
}
}));
export default router;