import { serve } from "https://deno.land/std/http/server.ts"; import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1"; const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4"; async function fetchVoiceList() { const response = await fetch(VOICES_URL); const voices = await response.json(); return voices.reduce((acc: Record, voice: any) => { const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice; if (!acc[locale]) acc[locale] = []; acc[locale].push({ model, name, friendlyName, locale }); return acc; }, {}); } async function synthesizeSpeech(model: string, voice: string, text: string) { let voiceName; let rate = 0; let pitch = 0; if (!model.includes("Neural")) { switch (model) { case "ava": voiceName = "en-US-AvaMultilingualNeural"; break; case "andrew": voiceName = "en-US-AndrewMultilingualNeural"; break; case "emma": voiceName = "en-US-EmmaMultilingualNeural"; break; case "brian": voiceName = "en-US-BrianMultilingualNeural"; break; case "vivienne": voiceName = "fr-FR-VivienneMultilingualNeural"; break; case "remy": voiceName = "fr-FR-RemyMultilingualNeural"; break; case "seraphina": voiceName = "de-DE-SeraphinaMultilingualNeural"; break; case "florian": voiceName = "de-DE-FlorianMultilingualNeural"; break; case "dmitry": voiceName = "ru-RU-DmitryNeural"; break; case "svetlana": voiceName = "ru-RU-SvetlanaNeural"; break; default: voiceName = "en-US-BrianMultilingualNeural"; break; } } else { voiceName = model; const params = Object.fromEntries( voice.split("|").map((p) => p.split(":") as [string, string]) ); rate = Number(params["rate"] || 0); pitch = Number(params["pitch"] || 0); } const tts = new EdgeSpeechTTS(); const payload = { input: text, options: { rate: rate, pitch: pitch, voice: voiceName }, }; const response = await tts.create(payload); const mp3Buffer = new Uint8Array(await response.arrayBuffer()); console.log(`Successfully synthesized speech, returning audio/mpeg response`); return new Response(mp3Buffer, { headers: { "Content-Type": "audio/mpeg" }, }); } function validateContentType(req: Request, expected: string) { const contentType = req.headers.get("Content-Type"); if (contentType !== expected) { console.log(`Invalid Content-Type ${contentType}, expected ${expected}`); return new Response("Bad Request", { status: 400 }); } } async function handleDebugRequest(req: Request) { const url = new URL(req.url); const voice = url.searchParams.get("voice") || ""; const model = url.searchParams.get("model") || ""; const text = url.searchParams.get("text") || ""; console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`); if (!voice || !model || !text) { console.log("Missing required parameters"); return new Response("Bad Request", { status: 400 }); } return synthesizeSpeech(model, voice, text); } async function handleSynthesisRequest(req: Request) { if (req.method !== "POST") { console.log(`Invalid method ${req.method}, expected POST`); return new Response("Method Not Allowed", { status: 405 }); } const invalidContentType = validateContentType(req, "application/json"); if (invalidContentType) return invalidContentType; const { model, input, voice } = await req.json(); console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`); return synthesizeSpeech(model, voice, input); } async function handleDemoRequest(req: Request) { const html = ` tts
`; return new Response(html, { headers: { "Content-Type": "text/html" }, }); } async function handleVoiceList() { let voices = [ {model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, {model: 'seraphina', gender: 'female'}, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'} ]; const sortedVoiceList = voices.sort((a, b) => { if (a.gender === 'male' && b.gender === 'female') return -1; if (a.gender === 'female' && b.gender === 'male') return 1; return 0; }); return new Response(JSON.stringify(sortedVoiceList), { headers: { "Content-Type": "application/json" }, }); } serve(async (req) => { try { const url = new URL(req.url); if (url.pathname === "/") { return handleDemoRequest(req); } if (url.pathname === "/v1/audio/models") { return handleVoiceList(); } if (url.pathname === "/tts") { return handleDebugRequest(req); } if (url.pathname !== "/v1/audio/speech") { console.log(`Unhandled path ${url.pathname}`); return new Response("Not Found", { status: 404 }); } return handleSynthesisRequest(req); } catch (err) { console.error(`Error processing request: ${err.message}`); return new Response(`Internal Server Error\n${err.message}`, { status: 500, }); } });