import { Polly } from '@aws-sdk/client-polly'; import { S3, NoSuchKey } from '@aws-sdk/client-s3'; import type { NodeJsClient } from '@smithy/types'; import { Router } from 'express'; import { Base64 } from 'js-base64'; import sha1 from 'sha1'; import type { PronunciationVoiceConfig, AwsPollyPronunciationVoiceConfig, NarakeetPronunciationVoiceConfig } from '../../locale/config.ts'; import { convertPronunciationStringToSsml, convertPronunciationStringToNarakeetFormat, handleErrorAsync } from '../../src/helpers.ts'; import { awsConfig, awsParams } from '../aws.ts'; const router = Router(); type ProviderKey = 'aws_polly' | 'narakeet'; interface Provider { tokenised(text: string): string; generate(textTokenised: string, voice: PronunciationVoiceConfig): Promise<[Uint8Array, string]>; } const providers: Record = { aws_polly: { tokenised(text: string): string { return convertPronunciationStringToSsml(text); }, async generate(textTokenised: string, voice: AwsPollyPronunciationVoiceConfig): Promise<[Uint8Array, string]> { const polly = new Polly(awsConfig) as NodeJsClient; const pollyResponse = await polly.synthesizeSpeech({ TextType: 'ssml', Text: textTokenised, OutputFormat: 'mp3', LanguageCode: voice.language, VoiceId: voice.voice, Engine: voice.engine || 'standard', }); return [ await pollyResponse.AudioStream!.transformToByteArray(), pollyResponse.ContentType!, ]; }, }, narakeet: { tokenised(text: string): string { return convertPronunciationStringToNarakeetFormat(text); }, async generate(textTokenised: string, voice: NarakeetPronunciationVoiceConfig): Promise<[Uint8Array, string]> { const url = `https://api.narakeet.com/text-to-speech/m4a?voice=${voice.voice}`; const response = await fetch(url, { method: 'POST', headers: { 'accept': 'application/octet-stream', 'x-api-key': process.env.NARAKEET_API_KEY!, 'content-type': 'text/plain', }, body: textTokenised, }); if (!response.ok) { throw new Error(`HTTP error! Status: ${response.status}`); } return [ new Uint8Array(await response.arrayBuffer()), response.headers.get('content-type')!, ]; }, }, }; router.get('/pronounce/:voice/:pronunciation', handleErrorAsync(async (req, res) => { const text = Base64.decode(req.params.pronunciation); if (!text || text.length > 256) { return res.status(404).json({ error: 'Not found' }); } const voice: PronunciationVoiceConfig | undefined = global.config.pronunciation?.voices?.[req.params.voice]; if (!voice) { return res.status(404).json({ error: 'Not found' }); } const s3 = new S3(awsConfig) as NodeJsClient; const provider = providers[(voice.provider || 'aws_polly') as ProviderKey]; const tokenised = provider.tokenised(text); const key = `pronunciation/${global.config.locale}-${req.params.voice}/${sha1(tokenised)}.mp3`; try { const s3Response = await s3.getObject({ Key: key, ...awsParams }); res.set('content-type', s3Response.ContentType); return s3Response.Body!.pipe(res); } catch (error) { if (!(error instanceof NoSuchKey)) { throw error; } const [buffer, contentType] = await provider.generate(tokenised, voice); await s3.putObject({ Key: key, Body: buffer, ContentType: contentType, ...awsParams, }); res.set('content-type', contentType); res.write(buffer); return res.end(); } })); export default router;