Spaces:
Running
Running
import { serve } from "https://deno.land/std/http/server.ts"; | |
import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1"; | |
const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4"; | |
async function fetchVoiceList() { | |
const response = await fetch(VOICES_URL); | |
const voices = await response.json(); | |
return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => { | |
const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice; | |
if (!acc[locale]) acc[locale] = []; | |
acc[locale].push({ model, name, friendlyName, locale }); | |
return acc; | |
}, {}); | |
} | |
async function synthesizeSpeech(model: string, voice: string, text: string) { | |
let voiceName; | |
let rate = 0; | |
let pitch = 0; | |
if (!model.includes("Neural")) { | |
switch (model) { | |
case "ava": | |
voiceName = "en-US-AvaMultilingualNeural"; | |
break; | |
case "andrew": | |
voiceName = "en-US-AndrewMultilingualNeural"; | |
break; | |
case "emma": | |
voiceName = "en-US-EmmaMultilingualNeural"; | |
break; | |
case "brian": | |
voiceName = "en-US-BrianMultilingualNeural"; | |
break; | |
case "vivienne": | |
voiceName = "fr-FR-VivienneMultilingualNeural"; | |
break; | |
case "remy": | |
voiceName = "fr-FR-RemyMultilingualNeural"; | |
break; | |
case "seraphina": | |
voiceName = "de-DE-SeraphinaMultilingualNeural"; | |
break; | |
case "florian": | |
voiceName = "de-DE-FlorianMultilingualNeural"; | |
break; | |
case "dmitry": | |
voiceName = "ru-RU-DmitryNeural"; | |
break; | |
case "svetlana": | |
voiceName = "ru-RU-SvetlanaNeural"; | |
break; | |
default: | |
voiceName = "en-US-BrianMultilingualNeural"; | |
break; | |
} | |
} else { | |
voiceName = model; | |
const params = Object.fromEntries( | |
voice.split("|").map((p) => p.split(":") as [string, string]) | |
); | |
rate = Number(params["rate"] || 0); | |
pitch = Number(params["pitch"] || 0); | |
} | |
const tts = new EdgeSpeechTTS(); | |
const payload = { | |
input: text, | |
options: { | |
rate: rate, | |
pitch: pitch, | |
voice: voiceName | |
}, | |
}; | |
const response = await tts.create(payload); | |
const mp3Buffer = new Uint8Array(await response.arrayBuffer()); | |
console.log(`Successfully synthesized speech, returning audio/mpeg response`); | |
return new Response(mp3Buffer, { | |
headers: { "Content-Type": "audio/mpeg" }, | |
}); | |
} | |
function validateContentType(req: Request, expected: string) { | |
const contentType = req.headers.get("Content-Type"); | |
if (contentType !== expected) { | |
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`); | |
return new Response("Bad Request", { status: 400 }); | |
} | |
} | |
async function handleDebugRequest(req: Request) { | |
const url = new URL(req.url); | |
const voice = url.searchParams.get("voice") || ""; | |
const model = url.searchParams.get("model") || ""; | |
const text = url.searchParams.get("text") || ""; | |
console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`); | |
if (!voice || !model || !text) { | |
console.log("Missing required parameters"); | |
return new Response("Bad Request", { status: 400 }); | |
} | |
return synthesizeSpeech(model, voice, text); | |
} | |
async function handleSynthesisRequest(req: Request) { | |
if (req.method !== "POST") { | |
console.log(`Invalid method ${req.method}, expected POST`); | |
return new Response("Method Not Allowed", { status: 405 }); | |
} | |
const invalidContentType = validateContentType(req, "application/json"); | |
if (invalidContentType) return invalidContentType; | |
const { model, input, voice } = await req.json(); | |
console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`); | |
return synthesizeSpeech(model, voice, input); | |
} | |
async function handleDemoRequest(req: Request) { | |
const html = `<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<title>tts</title> | |
<style> | |
body { | |
background-color: #121212; | |
color: #e0e0e0; | |
font-family: Arial, sans-serif; | |
margin: 0; | |
padding: 20px; | |
} | |
.container { | |
max-width: 800px; | |
margin: 0 auto; | |
padding: 20px; | |
background-color: #1e1e1e; | |
border-radius: 8px; | |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); | |
} | |
.input-area, .output-area { | |
margin-bottom: 20px; | |
} | |
.slider-container, .textarea-container, .dropdown-container { | |
margin-bottom: 20px; | |
} | |
label { | |
display: block; | |
margin-bottom: 8px; | |
font-weight: bold; | |
} | |
input[type="range"] { | |
width: 100%; | |
} | |
.slider-value { | |
text-align: center; | |
margin-top: 8px; | |
} | |
textarea { | |
max-width: 780px; | |
width: calc(100% - 20px); | |
height: 100px; | |
padding: 10px; | |
border: 1px solid #333; | |
border-radius: 4px; | |
background-color: #2e2e2e; | |
color: #e0e0e0; | |
resize: none; | |
} | |
select { | |
width: 100%; | |
padding: 10px; | |
border: 1px solid #333; | |
border-radius: 4px; | |
background-color: #2e2e2e; | |
color: #e0e0e0; | |
} | |
button { | |
width: 100%; | |
padding: 10px; | |
border: none; | |
border-radius: 4px; | |
background-color: #6200ea; | |
color: #fff; | |
font-size: 16px; | |
cursor: pointer; | |
transition: background-color 0.3s; | |
} | |
button:hover { | |
background-color: #3700b3; | |
} | |
h1 { | |
font-size: 24px; | |
margin-bottom: 20px; | |
} | |
a { | |
color: #bb86fc; | |
text-decoration: none; | |
} | |
a:hover { | |
text-decoration: underline; | |
} | |
#audioPlayerContainer { | |
text-align: center; /* Центрируем содержимое контейнера */ | |
} | |
audio { | |
width: 100%; | |
max-width: 600px; /* Ограничиваем максимальную ширину плеера */ | |
margin: 10px 0; | |
} | |
a { | |
display: block; | |
margin: 10px 0; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<div class="input-area"> | |
<div class="textarea-container"> | |
<label for="inputText">текст:</label | |
><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea> | |
</div> | |
<div class="dropdown-container"> | |
<label for="voiceSelect">выбери голос:</label> | |
<select id="voiceSelect"> | |
<option value="ava">ava</option> | |
<option value="andrew">andrew</option> | |
<option value="emma">emma</option> | |
<option value="brian">brian</option> | |
<option value="vivienne">vivienne</option> | |
<option value="remy">remy</option> | |
<option value="seraphina">seraphina</option> | |
<option value="florian">florian</option> | |
<option value="dmitry">dmitry</option> | |
<option value="svetlana">svetlana</option> | |
</select> | |
</div> | |
<button id="synthesizeButton">синтезировать</button> | |
</div> | |
<div class="output-area"> | |
<div id="audioPlayerContainer"></div> | |
</div> | |
</div> | |
<script> | |
let audio = null; | |
document.getElementById('synthesizeButton').addEventListener('click', () => { | |
const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?'; | |
const rate = '0.0'; | |
const pitch = '0.0'; | |
const voice = \`rate:\${rate}|pitch:\${pitch}\`; | |
const model = document.getElementById('voiceSelect').value; | |
if (audio) { | |
audio.pause(); | |
audio.currentTime = 0; | |
} | |
fetch('/v1/audio/speech', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ model, input: text, voice }) | |
}) | |
.then(response => response.blob()) | |
.then(blob => { | |
const audioUrl = URL.createObjectURL(blob); | |
const audioPlayerContainer = document.getElementById('audioPlayerContainer'); | |
// Удаляем старый аудиоплеер, если он существует | |
if (audio) { | |
audio.pause(); | |
audioPlayerContainer.innerHTML = ''; | |
} | |
// Создаем новый аудиоплеер | |
audio = new Audio(audioUrl); | |
audio.controls = true; | |
audioPlayerContainer.appendChild(audio); | |
// Создаем ссылку для скачивания | |
const downloadLink = document.createElement('a'); | |
downloadLink.href = audioUrl; | |
downloadLink.download = 'synthesized_voice.mp3'; | |
downloadLink.textContent = 'Скачать аудио'; | |
downloadLink.style.display = 'block'; | |
downloadLink.style.marginTop = '10px'; | |
// Добавляем ссылку для скачивания в контейнер | |
audioPlayerContainer.appendChild(downloadLink); | |
// Воспроизводим аудио | |
audio.play(); | |
}); | |
}); | |
const rateSlider = document.getElementById('rate'); | |
const rateValue = document.getElementById('rateValue'); | |
rateSlider.oninput = function() { | |
rateValue.innerHTML = this.value; | |
}; | |
const pitchSlider = document.getElementById('pitch'); | |
const pitchValue = document.getElementById('pitchValue'); | |
pitchSlider.oninput = function() { | |
pitchValue.innerHTML = this.value; | |
}; | |
</script> | |
</body></html>`; | |
return new Response(html, { | |
headers: { "Content-Type": "text/html" }, | |
}); | |
} | |
async function handleVoiceList() { | |
let voices = [ | |
{model: 'ava', gender: 'female'}, | |
{model: 'andrew', gender: 'male'}, | |
{model: 'emma', gender: 'female'}, | |
{model: 'brian', gender: 'male'}, | |
{model: 'vivienne', gender: 'female'}, | |
{model: 'remy', gender: 'male'}, | |
{model: 'seraphina', gender: 'female'}, | |
{model: 'florian', gender: 'male'}, | |
{model: 'dmitry', gender: 'male'}, | |
{model: 'svetlana', gender: 'female'} | |
]; | |
const sortedVoiceList = voices.sort((a, b) => { | |
if (a.gender === 'male' && b.gender === 'female') return -1; | |
if (a.gender === 'female' && b.gender === 'male') return 1; | |
return 0; | |
}); | |
return new Response(JSON.stringify(sortedVoiceList), { | |
headers: { "Content-Type": "application/json" }, | |
}); | |
} | |
serve(async (req) => { | |
try { | |
const url = new URL(req.url); | |
if (url.pathname === "/") { | |
return handleDemoRequest(req); | |
} | |
if (url.pathname === "/v1/audio/models") { | |
return handleVoiceList(); | |
} | |
if (url.pathname === "/tts") { | |
return handleDebugRequest(req); | |
} | |
if (url.pathname !== "/v1/audio/speech") { | |
console.log(`Unhandled path ${url.pathname}`); | |
return new Response("Not Found", { status: 404 }); | |
} | |
return handleSynthesisRequest(req); | |
} catch (err) { | |
console.error(`Error processing request: ${err.message}`); | |
return new Response(`Internal Server Error\n${err.message}`, { | |
status: 500, | |
}); | |
} | |
}); | |