tts / main.ts
gnilets's picture
Update main.ts
d9be26c verified
raw
history blame
11.2 kB
import { serve } from "https://deno.land/std/http/server.ts";
import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
async function fetchVoiceList() {
const response = await fetch(VOICES_URL);
const voices = await response.json();
return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
if (!acc[locale]) acc[locale] = [];
acc[locale].push({ model, name, friendlyName, locale });
return acc;
}, {});
}
async function synthesizeSpeech(model: string, voice: string, text: string) {
let voiceName;
let rate = 0;
let pitch = 0;
if (!model.includes("Neural")) {
switch (model) {
case "ava":
voiceName = "en-US-AvaMultilingualNeural";
break;
case "andrew":
voiceName = "en-US-AndrewMultilingualNeural";
break;
case "emma":
voiceName = "en-US-EmmaMultilingualNeural";
break;
case "brian":
voiceName = "en-US-BrianMultilingualNeural";
break;
case "vivienne":
voiceName = "fr-FR-VivienneMultilingualNeural";
break;
case "remy":
voiceName = "fr-FR-RemyMultilingualNeural";
break;
case "seraphina":
voiceName = "de-DE-SeraphinaMultilingualNeural";
break;
case "florian":
voiceName = "de-DE-FlorianMultilingualNeural";
break;
case "dmitry":
voiceName = "ru-RU-DmitryNeural";
break;
case "svetlana":
voiceName = "ru-RU-SvetlanaNeural";
break;
default:
voiceName = "en-US-BrianMultilingualNeural";
break;
}
} else {
voiceName = model;
const params = Object.fromEntries(
voice.split("|").map((p) => p.split(":") as [string, string])
);
rate = Number(params["rate"] || 0);
pitch = Number(params["pitch"] || 0);
}
const tts = new EdgeSpeechTTS();
const payload = {
input: text,
options: {
rate: rate,
pitch: pitch,
voice: voiceName
},
};
const response = await tts.create(payload);
const mp3Buffer = new Uint8Array(await response.arrayBuffer());
console.log(`Successfully synthesized speech, returning audio/mpeg response`);
return new Response(mp3Buffer, {
headers: { "Content-Type": "audio/mpeg" },
});
}
function validateContentType(req: Request, expected: string) {
const contentType = req.headers.get("Content-Type");
if (contentType !== expected) {
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
return new Response("Bad Request", { status: 400 });
}
}
async function handleDebugRequest(req: Request) {
const url = new URL(req.url);
const voice = url.searchParams.get("voice") || "";
const model = url.searchParams.get("model") || "";
const text = url.searchParams.get("text") || "";
console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
if (!voice || !model || !text) {
console.log("Missing required parameters");
return new Response("Bad Request", { status: 400 });
}
return synthesizeSpeech(model, voice, text);
}
async function handleSynthesisRequest(req: Request) {
if (req.method !== "POST") {
console.log(`Invalid method ${req.method}, expected POST`);
return new Response("Method Not Allowed", { status: 405 });
}
const invalidContentType = validateContentType(req, "application/json");
if (invalidContentType) return invalidContentType;
const { model, input, voice } = await req.json();
console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
return synthesizeSpeech(model, voice, input);
}
async function handleDemoRequest(req: Request) {
const html = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>tts</title>
<style>
body {
background-color: #121212;
color: #e0e0e0;
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: #1e1e1e;
border-radius: 8px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
}
.input-area, .output-area {
margin-bottom: 20px;
}
.slider-container, .textarea-container, .dropdown-container {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 8px;
font-weight: bold;
}
input[type="range"] {
width: 100%;
}
.slider-value {
text-align: center;
margin-top: 8px;
}
textarea {
max-width: 780px;
width: calc(100% - 20px);
height: 100px;
padding: 10px;
border: 1px solid #333;
border-radius: 4px;
background-color: #2e2e2e;
color: #e0e0e0;
resize: none;
}
select {
width: 100%;
padding: 10px;
border: 1px solid #333;
border-radius: 4px;
background-color: #2e2e2e;
color: #e0e0e0;
}
button {
width: 100%;
padding: 10px;
border: none;
border-radius: 4px;
background-color: #6200ea;
color: #fff;
font-size: 16px;
cursor: pointer;
transition: background-color 0.3s;
}
button:hover {
background-color: #3700b3;
}
h1 {
font-size: 24px;
margin-bottom: 20px;
}
a {
color: #bb86fc;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
#audioPlayerContainer {
text-align: center; /* Центрируем содержимое контейнера */
}
audio {
width: 100%;
max-width: 600px; /* Ограничиваем максимальную ширину плеера */
margin: 10px 0;
}
a {
display: block;
margin: 10px 0;
}
</style>
</head>
<body>
<div class="container">
<div class="input-area">
<div class="textarea-container">
<label for="inputText">текст:</label
><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
</div>
<div class="dropdown-container">
<label for="voiceSelect">выбери голос:</label>
<select id="voiceSelect">
<option value="ava">ava</option>
<option value="andrew">andrew</option>
<option value="emma">emma</option>
<option value="brian">brian</option>
<option value="vivienne">vivienne</option>
<option value="remy">remy</option>
<option value="seraphina">seraphina</option>
<option value="florian">florian</option>
<option value="dmitry">dmitry</option>
<option value="svetlana">svetlana</option>
</select>
</div>
<button id="synthesizeButton">синтезировать</button>
</div>
<div class="output-area">
<div id="audioPlayerContainer"></div>
</div>
</div>
<script>
let audio = null;
document.getElementById('synthesizeButton').addEventListener('click', () => {
const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?';
const rate = '0.0';
const pitch = '0.0';
const voice = \`rate:\${rate}|pitch:\${pitch}\`;
const model = document.getElementById('voiceSelect').value;
if (audio) {
audio.pause();
audio.currentTime = 0;
}
fetch('/v1/audio/speech', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, input: text, voice })
})
.then(response => response.blob())
.then(blob => {
const audioUrl = URL.createObjectURL(blob);
const audioPlayerContainer = document.getElementById('audioPlayerContainer');
// Удаляем старый аудиоплеер, если он существует
if (audio) {
audio.pause();
audioPlayerContainer.innerHTML = '';
}
// Создаем новый аудиоплеер
audio = new Audio(audioUrl);
audio.controls = true;
audioPlayerContainer.appendChild(audio);
// Создаем ссылку для скачивания
const downloadLink = document.createElement('a');
downloadLink.href = audioUrl;
downloadLink.download = 'synthesized_voice.mp3';
downloadLink.textContent = 'Скачать аудио';
downloadLink.style.display = 'block';
downloadLink.style.marginTop = '10px';
// Добавляем ссылку для скачивания в контейнер
audioPlayerContainer.appendChild(downloadLink);
// Воспроизводим аудио
audio.play();
});
});
const rateSlider = document.getElementById('rate');
const rateValue = document.getElementById('rateValue');
rateSlider.oninput = function() {
rateValue.innerHTML = this.value;
};
const pitchSlider = document.getElementById('pitch');
const pitchValue = document.getElementById('pitchValue');
pitchSlider.oninput = function() {
pitchValue.innerHTML = this.value;
};
</script>
</body></html>`;
return new Response(html, {
headers: { "Content-Type": "text/html" },
});
}
async function handleVoiceList() {
let voices = [
{model: 'ava', gender: 'female'},
{model: 'andrew', gender: 'male'},
{model: 'emma', gender: 'female'},
{model: 'brian', gender: 'male'},
{model: 'vivienne', gender: 'female'},
{model: 'remy', gender: 'male'},
{model: 'seraphina', gender: 'female'},
{model: 'florian', gender: 'male'},
{model: 'dmitry', gender: 'male'},
{model: 'svetlana', gender: 'female'}
];
const sortedVoiceList = voices.sort((a, b) => {
if (a.gender === 'male' && b.gender === 'female') return -1;
if (a.gender === 'female' && b.gender === 'male') return 1;
return 0;
});
return new Response(JSON.stringify(sortedVoiceList), {
headers: { "Content-Type": "application/json" },
});
}
serve(async (req) => {
try {
const url = new URL(req.url);
if (url.pathname === "/") {
return handleDemoRequest(req);
}
if (url.pathname === "/v1/audio/models") {
return handleVoiceList();
}
if (url.pathname === "/tts") {
return handleDebugRequest(req);
}
if (url.pathname !== "/v1/audio/speech") {
console.log(`Unhandled path ${url.pathname}`);
return new Response("Not Found", { status: 404 });
}
return handleSynthesisRequest(req);
} catch (err) {
console.error(`Error processing request: ${err.message}`);
return new Response(`Internal Server Error\n${err.message}`, {
status: 500,
});
}
});