PronounsPage/server/express/pronounce.ts

import { Polly } from '@aws-sdk/client-polly';
import { NoSuchKey } from '@aws-sdk/client-s3';
import type { S3 } from '@aws-sdk/client-s3';
import type { NodeJsClient } from '@smithy/types';
import { Router } from 'express';
import { getH3Event } from 'h3-express';
import { Base64 } from 'js-base64';
import sha1 from 'sha1';

import { s3, awsConfig, s3BucketParams } from '../cloudServices.ts';

import {
    convertPronunciationStringToSsml,
    convertPronunciationStringToNarakeetFormat,
    handleErrorAsync,
} from '#shared/helpers.ts';
import type {
    PronunciationVoiceConfig,
    AwsPollyPronunciationVoiceConfig,
    NarakeetPronunciationVoiceConfig,
} from '~~/locale/config.ts';
import { getLocale, loadConfig } from '~~/server/data.ts';

const router = Router();

type ProviderKey = 'aws_polly' | 'narakeet';
interface Provider {
    tokenised(text: string): string;
    generate(textTokenised: string, voice: PronunciationVoiceConfig): Promise<[Uint8Array, string]>;
}

const providers: Record<ProviderKey, Provider> = {
    aws_polly: {
        tokenised(text: string): string {
            return convertPronunciationStringToSsml(text);
        },
        async generate(textTokenised: string, voice: AwsPollyPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
            const polly = new Polly(awsConfig) as NodeJsClient<Polly>;

            const pollyResponse = await polly.synthesizeSpeech({
                TextType: 'ssml',
                Text: textTokenised,
                OutputFormat: 'mp3',
                LanguageCode: voice.language,
                VoiceId: voice.voice,
                Engine: voice.engine || 'standard',
            });

            return [
                await pollyResponse.AudioStream!.transformToByteArray(),
                pollyResponse.ContentType!,
            ];
        },
    },
    narakeet: {
        tokenised(text: string): string {
            return convertPronunciationStringToNarakeetFormat(text);
        },
        async generate(textTokenised: string, voice: NarakeetPronunciationVoiceConfig): Promise<[Uint8Array, string]> {
            const url = `https://api.narakeet.com/text-to-speech/m4a?voice=${voice.voice}`;

            const response = await fetch(url, {
                method: 'POST',
                headers: {
                    'accept': 'application/octet-stream',
                    'x-api-key': process.env.NARAKEET_API_KEY!,
                    'content-type': 'text/plain',
                },
                body: textTokenised,
            });

            if (!response.ok) {
                throw new Error(`HTTP error! Status: ${response.status}`);
            }

            return [
                new Uint8Array(await response.arrayBuffer()),
                response.headers.get('content-type')!,
            ];
        },
    },
};

router.get('/pronounce/:voice/:pronunciation', handleErrorAsync(async (req, res) => {
    const text = Base64.decode(req.params.pronunciation);

    if (!text || text.length > 256) {
        return res.status(404).json({ error: 'Not found' });
    }

    const locale = getLocale(getH3Event(req));
    const config = await loadConfig(locale);

    const voice: PronunciationVoiceConfig | undefined = config.pronunciation?.voices?.[req.params.voice];
    if (!voice) {
        return res.status(404).json({ error: 'Not found' });
    }

    const provider = providers[(voice.provider || 'aws_polly') as ProviderKey];
    const tokenised = provider.tokenised(text);
    const key = `pronunciation/${config.locale}-${req.params.voice}/${sha1(tokenised)}.mp3`;

    try {
        const s3Response = await (s3 as NodeJsClient<S3>).getObject({ Key: key, ...s3BucketParams });
        res.set('content-type', s3Response.ContentType);
        return s3Response.Body!.pipe(res);
    } catch (error) {
        if (!(error instanceof NoSuchKey)) {
            throw error;
        }

        const [buffer, contentType] = await provider.generate(tokenised, voice);

        await s3.putObject({
            Key: key,
            Body: buffer,
            ContentType: contentType,
            ...s3BucketParams,
        });

        res.set('content-type', contentType);
        res.write(buffer);
        return res.end();
    }
}));

export default router;