Spaces:

gnilets
/

tts

Running

App Files Files Community

tts / main.ts

gnilets

Update main.ts

d9be26c verified 5 months ago

raw

history blame

11.2 kB

	import { serve } from "https://deno.land/std/http/server.ts";
	import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";


	const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";

	async function fetchVoiceList() {
	const response = await fetch(VOICES_URL);
	const voices = await response.json();
	return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
	const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
	if (!acc[locale]) acc[locale] = [];
	acc[locale].push({ model, name, friendlyName, locale });
	return acc;
	}, {});
	}

	async function synthesizeSpeech(model: string, voice: string, text: string) {
	let voiceName;
	let rate = 0;
	let pitch = 0;

	if (!model.includes("Neural")) {
	switch (model) {
	case "ava":
	voiceName = "en-US-AvaMultilingualNeural";
	break;
	case "andrew":
	voiceName = "en-US-AndrewMultilingualNeural";
	break;
	case "emma":
	voiceName = "en-US-EmmaMultilingualNeural";
	break;
	case "brian":
	voiceName = "en-US-BrianMultilingualNeural";
	break;
	case "vivienne":
	voiceName = "fr-FR-VivienneMultilingualNeural";
	break;
	case "remy":
	voiceName = "fr-FR-RemyMultilingualNeural";
	break;
	case "seraphina":
	voiceName = "de-DE-SeraphinaMultilingualNeural";
	break;
	case "florian":
	voiceName = "de-DE-FlorianMultilingualNeural";
	break;
	case "dmitry":
	voiceName = "ru-RU-DmitryNeural";
	break;
	case "svetlana":
	voiceName = "ru-RU-SvetlanaNeural";
	break;
	default:
	voiceName = "en-US-BrianMultilingualNeural";
	break;
	}
	} else {
	voiceName = model;
	const params = Object.fromEntries(
	voice.split("\|").map((p) => p.split(":") as [string, string])
	);
	rate = Number(params["rate"] \|\| 0);
	pitch = Number(params["pitch"] \|\| 0);
	}

	const tts = new EdgeSpeechTTS();

	const payload = {
	input: text,
	options: {
	rate: rate,
	pitch: pitch,
	voice: voiceName
	},
	};
	const response = await tts.create(payload);
	const mp3Buffer = new Uint8Array(await response.arrayBuffer());

	console.log(`Successfully synthesized speech, returning audio/mpeg response`);
	return new Response(mp3Buffer, {
	headers: { "Content-Type": "audio/mpeg" },
	});
	}

	function validateContentType(req: Request, expected: string) {
	const contentType = req.headers.get("Content-Type");
	if (contentType !== expected) {
	console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
	return new Response("Bad Request", { status: 400 });
	}
	}

	async function handleDebugRequest(req: Request) {
	const url = new URL(req.url);
	const voice = url.searchParams.get("voice") \|\| "";
	const model = url.searchParams.get("model") \|\| "";
	const text = url.searchParams.get("text") \|\| "";

	console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);

	if (!voice \|\| !model \|\| !text) {
	console.log("Missing required parameters");
	return new Response("Bad Request", { status: 400 });
	}

	return synthesizeSpeech(model, voice, text);
	}

	async function handleSynthesisRequest(req: Request) {

	if (req.method !== "POST") {
	console.log(`Invalid method ${req.method}, expected POST`);
	return new Response("Method Not Allowed", { status: 405 });
	}

	const invalidContentType = validateContentType(req, "application/json");
	if (invalidContentType) return invalidContentType;

	const { model, input, voice } = await req.json();
	console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);

	return synthesizeSpeech(model, voice, input);
	}


	async function handleDemoRequest(req: Request) {
	const html = `<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<title>tts</title>
	<style>
	body {
	background-color: #121212;
	color: #e0e0e0;
	font-family: Arial, sans-serif;
	margin: 0;
	padding: 20px;
	}

	.container {
	max-width: 800px;
	margin: 0 auto;
	padding: 20px;
	background-color: #1e1e1e;
	border-radius: 8px;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
	}

	.input-area, .output-area {
	margin-bottom: 20px;
	}

	.slider-container, .textarea-container, .dropdown-container {
	margin-bottom: 20px;
	}

	label {
	display: block;
	margin-bottom: 8px;
	font-weight: bold;
	}

	input[type="range"] {
	width: 100%;
	}

	.slider-value {
	text-align: center;
	margin-top: 8px;
	}

	textarea {
	max-width: 780px;
	width: calc(100% - 20px);
	height: 100px;
	padding: 10px;
	border: 1px solid #333;
	border-radius: 4px;
	background-color: #2e2e2e;
	color: #e0e0e0;
	resize: none;
	}

	select {
	width: 100%;
	padding: 10px;
	border: 1px solid #333;
	border-radius: 4px;
	background-color: #2e2e2e;
	color: #e0e0e0;
	}

	button {
	width: 100%;
	padding: 10px;
	border: none;
	border-radius: 4px;
	background-color: #6200ea;
	color: #fff;
	font-size: 16px;
	cursor: pointer;
	transition: background-color 0.3s;
	}

	button:hover {
	background-color: #3700b3;
	}

	h1 {
	font-size: 24px;
	margin-bottom: 20px;
	}

	a {
	color: #bb86fc;
	text-decoration: none;
	}

	a:hover {
	text-decoration: underline;
	}

	#audioPlayerContainer {
	text-align: center; /* Центрируем содержимое контейнера */
	}

	audio {
	width: 100%;
	max-width: 600px; /* Ограничиваем максимальную ширину плеера */
	margin: 10px 0;
	}

	a {
	display: block;
	margin: 10px 0;
	}
	</style>

	</head>
	<body>
	<div class="container">
	<div class="input-area">
	<div class="textarea-container">
	<label for="inputText">текст:</label
	><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
	</div>
	<div class="dropdown-container">
	<label for="voiceSelect">выбери голос:</label>
	<select id="voiceSelect">
	<option value="ava">ava</option>
	<option value="andrew">andrew</option>
	<option value="emma">emma</option>
	<option value="brian">brian</option>
	<option value="vivienne">vivienne</option>
	<option value="remy">remy</option>
	<option value="seraphina">seraphina</option>
	<option value="florian">florian</option>
	<option value="dmitry">dmitry</option>
	<option value="svetlana">svetlana</option>
	</select>
	</div>
	<button id="synthesizeButton">синтезировать</button>
	</div>
	<div class="output-area">
	<div id="audioPlayerContainer"></div>
	</div>
	</div>
	<script>
	let audio = null;

	document.getElementById('synthesizeButton').addEventListener('click', () => {
	const text = document.getElementById('inputText').value \|\| 'приветик! давай поболтаем немного?';
	const rate = '0.0';
	const pitch = '0.0';
	const voice = \`rate:\${rate}\|pitch:\${pitch}\`;
	const model = document.getElementById('voiceSelect').value;

	if (audio) {
	audio.pause();
	audio.currentTime = 0;
	}

	fetch('/v1/audio/speech', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({ model, input: text, voice })
	})
	.then(response => response.blob())
	.then(blob => {
	const audioUrl = URL.createObjectURL(blob);
	const audioPlayerContainer = document.getElementById('audioPlayerContainer');

	// Удаляем старый аудиоплеер, если он существует
	if (audio) {
	audio.pause();
	audioPlayerContainer.innerHTML = '';
	}

	// Создаем новый аудиоплеер
	audio = new Audio(audioUrl);
	audio.controls = true;
	audioPlayerContainer.appendChild(audio);

	// Создаем ссылку для скачивания
	const downloadLink = document.createElement('a');
	downloadLink.href = audioUrl;
	downloadLink.download = 'synthesized_voice.mp3';
	downloadLink.textContent = 'Скачать аудио';
	downloadLink.style.display = 'block';
	downloadLink.style.marginTop = '10px';

	// Добавляем ссылку для скачивания в контейнер
	audioPlayerContainer.appendChild(downloadLink);

	// Воспроизводим аудио
	audio.play();
	});

	});

	const rateSlider = document.getElementById('rate');
	const rateValue = document.getElementById('rateValue');
	rateSlider.oninput = function() {
	rateValue.innerHTML = this.value;
	};

	const pitchSlider = document.getElementById('pitch');
	const pitchValue = document.getElementById('pitchValue');
	pitchSlider.oninput = function() {
	pitchValue.innerHTML = this.value;
	};
	</script>
	</body></html>`;

	return new Response(html, {
	headers: { "Content-Type": "text/html" },
	});
	}

	async function handleVoiceList() {
	let voices = [
	{model: 'ava', gender: 'female'},
	{model: 'andrew', gender: 'male'},
	{model: 'emma', gender: 'female'},
	{model: 'brian', gender: 'male'},
	{model: 'vivienne', gender: 'female'},
	{model: 'remy', gender: 'male'},
	{model: 'seraphina', gender: 'female'},
	{model: 'florian', gender: 'male'},
	{model: 'dmitry', gender: 'male'},
	{model: 'svetlana', gender: 'female'}
	];

	const sortedVoiceList = voices.sort((a, b) => {
	if (a.gender === 'male' && b.gender === 'female') return -1;
	if (a.gender === 'female' && b.gender === 'male') return 1;
	return 0;
	});

	return new Response(JSON.stringify(sortedVoiceList), {
	headers: { "Content-Type": "application/json" },
	});

	}


	serve(async (req) => {
	try {
	const url = new URL(req.url);

	if (url.pathname === "/") {
	return handleDemoRequest(req);
	}
	if (url.pathname === "/v1/audio/models") {
	return handleVoiceList();
	}
	if (url.pathname === "/tts") {
	return handleDebugRequest(req);
	}

	if (url.pathname !== "/v1/audio/speech") {
	console.log(`Unhandled path ${url.pathname}`);
	return new Response("Not Found", { status: 404 });
	}

	return handleSynthesisRequest(req);
	} catch (err) {
	console.error(`Error processing request: ${err.message}`);
	return new Response(`Internal Server Error\n${err.message}`, {
	status: 500,
	});
	}
	});