Spaces:
Running
Running
import {serve} from "https://deno.land/std/http/server.ts"; | |
import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1"; | |
async function synthesizeSpeech(model: string, voice: string, text: string) { | |
let voiceName; | |
let rate = 0; | |
let pitch = 0; | |
if (!model.includes("Neural")) { | |
switch (model) { | |
case "ava": | |
voiceName = "en-US-AvaMultilingualNeural"; | |
break; | |
case "andrew": | |
voiceName = "en-US-AndrewMultilingualNeural"; | |
break; | |
case "emma": | |
voiceName = "en-US-EmmaMultilingualNeural"; | |
break; | |
case "brian": | |
voiceName = "en-US-BrianMultilingualNeural"; | |
break; | |
case "vivienne": | |
voiceName = "fr-FR-VivienneMultilingualNeural"; | |
break; | |
case "remy": | |
voiceName = "fr-FR-RemyMultilingualNeural"; | |
break; | |
case "seraphina": | |
voiceName = "de-DE-SeraphinaMultilingualNeural"; | |
break; | |
case "florian": | |
voiceName = "de-DE-FlorianMultilingualNeural"; | |
break; | |
case "dmitry": | |
voiceName = "ru-RU-DmitryNeural"; | |
break; | |
case "svetlana": | |
voiceName = "ru-RU-SvetlanaNeural"; | |
break; | |
default: | |
voiceName = "en-US-BrianMultilingualNeural"; | |
break; | |
} | |
} else { | |
voiceName = model; | |
const params = Object.fromEntries(voice.split("|").map((p) => p.split(":") as [string, string])); | |
rate = Number(params["rate"] || 0); | |
pitch = Number(params["pitch"] || 0); | |
} | |
const tts = new EdgeSpeechTTS(); | |
const payload = { | |
input: text, options: { | |
rate: rate, pitch: pitch, voice: voiceName | |
}, | |
}; | |
const response = await tts.create(payload); | |
const mp3Buffer = new Uint8Array(await response.arrayBuffer()); | |
return new Response(mp3Buffer, { | |
headers: {"Content-Type": "audio/mpeg"}, | |
}); | |
} | |
function validateContentType(req: Request, expected: string) { | |
const contentType = req.headers.get("Content-Type"); | |
if (contentType !== expected) { | |
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`); | |
return new Response("Bad Request", {status: 400}); | |
} | |
} | |
async function handleDebugRequest() { | |
const voice = "rate:0.0|pitch:0.0"; | |
const model = "en-US-BrianMultilingualNeural"; | |
const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!"; | |
console.log(`model=${model}, voice=${voice}, text=${text}`); | |
return synthesizeSpeech(model, voice, text); | |
} | |
async function handleSynthesisRequest(req: Request) { | |
if (req.method !== "POST") { | |
return new Response("Method Not Allowed", {status: 405}); | |
} | |
const invalidContentType = validateContentType(req, "application/json"); | |
if (invalidContentType) return invalidContentType; | |
const {model, input, voice} = await req.json(); | |
return synthesizeSpeech(model, voice, input); | |
} | |
async function handleDemoRequest(req: Request) { | |
const html = `<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta content="width=device-width, initial-scale=1.0" name="viewport" /> | |
<title>tts</title> | |
<style> | |
body { | |
background-color: #121212; | |
color: #e0e0e0; | |
font-family: Arial, sans-serif; | |
margin: 0; | |
padding: 20px; | |
} | |
.container { | |
max-width: 800px; | |
margin: 0 auto; | |
padding: 20px; | |
background-color: #1e1e1e; | |
border-radius: 8px; | |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); | |
} | |
.input-area, .output-area { | |
margin-bottom: 20px; | |
} | |
.slider-container, .textarea-container, .dropdown-container { | |
margin-bottom: 20px; | |
} | |
label { | |
display: block; | |
margin-bottom: 8px; | |
font-weight: bold; | |
} | |
input[type="range"] { | |
width: 100%; | |
} | |
.slider-value { | |
text-align: center; | |
margin-top: 8px; | |
} | |
textarea { | |
max-width: 780px; | |
width: calc(100% - 20px); | |
height: 100px; | |
padding: 10px; | |
border: 1px solid #333; | |
border-radius: 4px; | |
background-color: #2e2e2e; | |
color: #e0e0e0; | |
resize: none; | |
} | |
select { | |
width: 100%; | |
padding: 10px; | |
border: 1px solid #333; | |
border-radius: 4px; | |
background-color: #2e2e2e; | |
color: #e0e0e0; | |
} | |
button { | |
width: 100%; | |
padding: 10px; | |
border: none; | |
border-radius: 4px; | |
background-color: #6200ea; | |
color: #fff; | |
font-size: 16px; | |
cursor: pointer; | |
transition: background-color 0.3s; | |
} | |
button:hover { | |
background-color: #3700b3; | |
} | |
h1 { | |
font-size: 24px; | |
margin-bottom: 20px; | |
} | |
a { | |
color: #bb86fc; | |
text-decoration: none; | |
} | |
a:hover { | |
text-decoration: underline; | |
} | |
#audioPlayerContainer { | |
text-align: center; | |
} | |
audio { | |
width: 100%; | |
max-width: 600px; | |
margin: 10px 0; | |
} | |
a { | |
display: block; | |
margin: 10px 0; | |
} | |
pre { | |
color: #94c890; | |
background: #000000; | |
padding: 5px 10px; | |
margin: 0; | |
font-size: 1.12em; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<div class="input-area"> | |
<div class="textarea-container"> | |
<label for="inputText">текст:</label | |
><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea> | |
</div> | |
<div class="dropdown-container"> | |
<label for="voiceSelect">выберите голос:</label> | |
<select id="voiceSelect"></select> | |
</div> | |
<button id="synthesizeButton">синтезировать</button> | |
</div> | |
<div class="output-area"> | |
<div id="audioPlayerContainer"></div> | |
</div> | |
<details> | |
<summary>api</summary> | |
<p>получить список голосов:</p> | |
<pre id="apiVoices"></pre> | |
<p>post-запрос для синтеза голоса из текста:</p> | |
<pre id="apiExamples"></pre> | |
</details> | |
</div> | |
<script> | |
let audio = null; | |
document.getElementById('synthesizeButton').addEventListener('click', () => { | |
const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?'; | |
const rate = '0.0'; | |
const pitch = '0.0'; | |
const voice = \`rate:\${rate}|pitch:\${pitch}\`; | |
const model = document.getElementById('voiceSelect').value; | |
if (audio) { | |
audio.pause(); | |
audio.currentTime = 0; | |
} | |
fetch('/v1/audio/speech', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ model, input: text, voice }) | |
}) | |
.then(response => response.blob()) | |
.then(blob => { | |
const audioUrl = URL.createObjectURL(blob); | |
const audioPlayerContainer = document.getElementById('audioPlayerContainer'); | |
if (audio) { | |
audio.pause(); | |
audioPlayerContainer.innerHTML = ''; | |
} | |
audio = new Audio(audioUrl); | |
audio.controls = true; | |
audioPlayerContainer.appendChild(audio); | |
const downloadLink = document.createElement('a'); | |
downloadLink.href = audioUrl; | |
downloadLink.download = 'synthesized_voice.mp3'; | |
downloadLink.textContent = 'Скачать аудио'; | |
downloadLink.style.display = 'block'; | |
downloadLink.style.marginTop = '10px'; | |
audioPlayerContainer.appendChild(downloadLink); | |
audio.play(); | |
}); | |
}); | |
async function fetchModels() { | |
try { | |
const response = await fetch('/v1/audio/models'); | |
const models = await response.json(); | |
const voiceSelect = document.getElementById('voiceSelect'); | |
models.forEach((model, index) => { | |
const option = document.createElement('option'); | |
option.value = model.model; | |
option.textContent = model.model; | |
if (index === 1) {option.selected = true;} | |
voiceSelect.appendChild(option); | |
}); | |
} catch (error) { | |
console.error('ошибка при получении списка моделей:', error); | |
} | |
} | |
fetchModels(); | |
function createApiExamples() { | |
const apiExamples = document.getElementById('apiExamples'); | |
const apiVoices = document.getElementById('apiVoices'); | |
const currentUrl = window.location.origin; | |
const voices_pre = \`curl \${currentUrl}/v1/audio/models\`; | |
const examples_pre = \`curl 'https://gnilets-tts.hf.space/v1/audio/speech' \\\\\\\\ | |
-H 'content-type: application/json' \\\\\\\\ | |
--data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0|pitch:0"}' \\\\\\\\ | |
-o tts_voice.mp3 | |
\`; | |
apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\'); | |
apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\'); | |
} | |
createApiExamples(); | |
</script> | |
</body></html>`; | |
return new Response(html, { | |
headers: {"Content-Type": "text/html"}, | |
}); | |
} | |
async function handleVoiceList() { | |
let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, { | |
model: 'seraphina', gender: 'female' | |
}, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}]; | |
const sortedVoiceList = voices.sort((a, b) => { | |
if (a.gender === 'male' && b.gender === 'female') return -1; | |
if (a.gender === 'female' && b.gender === 'male') return 1; | |
return 0; | |
}); | |
return new Response(JSON.stringify(sortedVoiceList), { | |
headers: {"Content-Type": "application/json"}, | |
}); | |
} | |
serve(async (req) => { | |
try { | |
const url = new URL(req.url); | |
if (url.pathname === "/") { | |
return handleDemoRequest(req); | |
} | |
if (url.pathname === "/v1/audio/models") { | |
return handleVoiceList(); | |
} | |
if (url.pathname === "/tts") { | |
return handleDebugRequest(); | |
} | |
if (url.pathname !== "/v1/audio/speech") { | |
console.log(`Unhandled path ${url.pathname}`); | |
return new Response("Not Found", {status: 404}); | |
} | |
return handleSynthesisRequest(req); | |
} catch (err) { | |
console.error(`Error processing request: ${err.message}`); | |
return new Response(`Internal Server Error\n${err.message}`, { | |
status: 500, | |
}); | |
} | |
}); |