mirror of
https://gitlab.com/PronounsPage/PronounsPage.git
synced 2025-09-27 06:52:35 -04:00
115 lines
4.0 KiB
TypeScript
115 lines
4.0 KiB
TypeScript
import { Polly } from '@aws-sdk/client-polly';
|
|
import { S3, NoSuchKey } from '@aws-sdk/client-s3';
|
|
import type { NodeJsClient } from '@smithy/types';
|
|
import { Router } from 'express';
|
|
import { Base64 } from 'js-base64';
|
|
import sha1 from 'sha1';
|
|
|
|
import type { PronunciationVoiceConfig, AwsPollyPronunciationVoiceConfig, NarakeetPronunciationVoiceConfig } from '../../locale/config.ts';
|
|
import { convertPronunciationStringToSsml, convertPronunciationStringToNarakeetFormat, handleErrorAsync } from '../../src/helpers.ts';
|
|
import { awsConfig, awsParams } from '../aws.ts';
|
|
|
|
const router = Router();
|
|
|
|
type ProviderKey = 'aws_polly' | 'narakeet';
|
|
interface Provider {
|
|
tokenised(text: string): string;
|
|
generate(textTokenised: string, voice: PronunciationVoiceConfig): Promise<[Uint8Array, string]>;
|
|
}
|
|
|
|
const providers: Record<ProviderKey, Provider> = {
|
|
aws_polly: {
|
|
tokenised(text: string): string {
|
|
return convertPronunciationStringToSsml(text);
|
|
},
|
|
async generate(textTokenised: string, voice: AwsPollyPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
|
|
const polly = new Polly(awsConfig) as NodeJsClient<Polly>;
|
|
|
|
const pollyResponse = await polly.synthesizeSpeech({
|
|
TextType: 'ssml',
|
|
Text: textTokenised,
|
|
OutputFormat: 'mp3',
|
|
LanguageCode: voice.language,
|
|
VoiceId: voice.voice,
|
|
Engine: voice.engine || 'standard',
|
|
});
|
|
|
|
return [
|
|
await pollyResponse.AudioStream!.transformToByteArray(),
|
|
pollyResponse.ContentType!,
|
|
];
|
|
},
|
|
},
|
|
narakeet: {
|
|
tokenised(text: string): string {
|
|
return convertPronunciationStringToNarakeetFormat(text);
|
|
},
|
|
async generate(textTokenised: string, voice: NarakeetPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
|
|
const url = `https://api.narakeet.com/text-to-speech/m4a?voice=${voice.voice}`;
|
|
|
|
const response = await fetch(url, {
|
|
method: 'POST',
|
|
headers: {
|
|
'accept': 'application/octet-stream',
|
|
'x-api-key': process.env.NARAKEET_API_KEY!,
|
|
'content-type': 'text/plain',
|
|
},
|
|
body: textTokenised,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP error! Status: ${response.status}`);
|
|
}
|
|
|
|
return [
|
|
new Uint8Array(await response.arrayBuffer()),
|
|
response.headers.get('content-type')!,
|
|
];
|
|
},
|
|
},
|
|
};
|
|
|
|
router.get('/pronounce/:voice/:pronunciation', handleErrorAsync(async (req, res) => {
|
|
const text = Base64.decode(req.params.pronunciation);
|
|
|
|
if (!text || text.length > 256) {
|
|
return res.status(404).json({ error: 'Not found' });
|
|
}
|
|
|
|
const voice: PronunciationVoiceConfig | undefined = global.config.pronunciation?.voices?.[req.params.voice];
|
|
if (!voice) {
|
|
return res.status(404).json({ error: 'Not found' });
|
|
}
|
|
|
|
const s3 = new S3(awsConfig) as NodeJsClient<S3>;
|
|
|
|
const provider = providers[(voice.provider || 'aws_polly') as ProviderKey];
|
|
const tokenised = provider.tokenised(text);
|
|
const key = `pronunciation/${global.config.locale}-${req.params.voice}/${sha1(tokenised)}.mp3`;
|
|
|
|
try {
|
|
const s3Response = await s3.getObject({ Key: key, ...awsParams });
|
|
res.set('content-type', s3Response.ContentType);
|
|
return s3Response.Body!.pipe(res);
|
|
} catch (error) {
|
|
if (!(error instanceof NoSuchKey)) {
|
|
throw error;
|
|
}
|
|
|
|
const [buffer, contentType] = await provider.generate(tokenised, voice);
|
|
|
|
await s3.putObject({
|
|
Key: key,
|
|
Body: buffer,
|
|
ContentType: contentType,
|
|
...awsParams,
|
|
});
|
|
|
|
res.set('content-type', contentType);
|
|
res.write(buffer);
|
|
return res.end();
|
|
}
|
|
}));
|
|
|
|
export default router;
|