PronounsPage/server/express/pronounce.ts
Valentyne Stigloher 10180aa6a3 (refactor) use #shared alias instead of ~~/shared
the #shared alias used by Nuxt cannot be easily disabled and to prevent breackage with jiti, we make use of it
2025-08-17 18:56:02 +02:00

128 lines
4.2 KiB
TypeScript

import { Polly } from '@aws-sdk/client-polly';
import { NoSuchKey } from '@aws-sdk/client-s3';
import type { S3 } from '@aws-sdk/client-s3';
import type { NodeJsClient } from '@smithy/types';
import { Router } from 'express';
import { getH3Event } from 'h3-express';
import { Base64 } from 'js-base64';
import sha1 from 'sha1';
import { s3, awsConfig, s3BucketParams } from '../cloudServices.ts';
import {
convertPronunciationStringToSsml,
convertPronunciationStringToNarakeetFormat,
handleErrorAsync,
} from '#shared/helpers.ts';
import type {
PronunciationVoiceConfig,
AwsPollyPronunciationVoiceConfig,
NarakeetPronunciationVoiceConfig,
} from '~~/locale/config.ts';
import { getLocale, loadConfig } from '~~/server/data.ts';
const router = Router();
type ProviderKey = 'aws_polly' | 'narakeet';
interface Provider {
tokenised(text: string): string;
generate(textTokenised: string, voice: PronunciationVoiceConfig): Promise<[Uint8Array, string]>;
}
const providers: Record<ProviderKey, Provider> = {
aws_polly: {
tokenised(text: string): string {
return convertPronunciationStringToSsml(text);
},
async generate(textTokenised: string, voice: AwsPollyPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
const polly = new Polly(awsConfig) as NodeJsClient<Polly>;
const pollyResponse = await polly.synthesizeSpeech({
TextType: 'ssml',
Text: textTokenised,
OutputFormat: 'mp3',
LanguageCode: voice.language,
VoiceId: voice.voice,
Engine: voice.engine || 'standard',
});
return [
await pollyResponse.AudioStream!.transformToByteArray(),
pollyResponse.ContentType!,
];
},
},
narakeet: {
tokenised(text: string): string {
return convertPronunciationStringToNarakeetFormat(text);
},
async generate(textTokenised: string, voice: NarakeetPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
const url = `https://api.narakeet.com/text-to-speech/m4a?voice=${voice.voice}`;
const response = await fetch(url, {
method: 'POST',
headers: {
'accept': 'application/octet-stream',
'x-api-key': process.env.NARAKEET_API_KEY!,
'content-type': 'text/plain',
},
body: textTokenised,
});
if (!response.ok) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
return [
new Uint8Array(await response.arrayBuffer()),
response.headers.get('content-type')!,
];
},
},
};
router.get('/pronounce/:voice/:pronunciation', handleErrorAsync(async (req, res) => {
const text = Base64.decode(req.params.pronunciation);
if (!text || text.length > 256) {
return res.status(404).json({ error: 'Not found' });
}
const locale = getLocale(getH3Event(req));
const config = await loadConfig(locale);
const voice: PronunciationVoiceConfig | undefined = config.pronunciation?.voices?.[req.params.voice];
if (!voice) {
return res.status(404).json({ error: 'Not found' });
}
const provider = providers[(voice.provider || 'aws_polly') as ProviderKey];
const tokenised = provider.tokenised(text);
const key = `pronunciation/${config.locale}-${req.params.voice}/${sha1(tokenised)}.mp3`;
try {
const s3Response = await (s3 as NodeJsClient<S3>).getObject({ Key: key, ...s3BucketParams });
res.set('content-type', s3Response.ContentType);
return s3Response.Body!.pipe(res);
} catch (error) {
if (!(error instanceof NoSuchKey)) {
throw error;
}
const [buffer, contentType] = await provider.generate(tokenised, voice);
await s3.putObject({
Key: key,
Body: buffer,
ContentType: contentType,
...s3BucketParams,
});
res.set('content-type', contentType);
res.write(buffer);
return res.end();
}
}));
export default router;