Spaces:
Running
Running
import { serve } from "https://deno.land/std/http/server.ts"; | |
import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1"; | |
const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4"; | |
async function fetchVoiceList() { | |
const response = await fetch(VOICES_URL); | |
const voices = await response.json(); | |
return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => { | |
const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice; | |
if (!acc[locale]) acc[locale] = []; | |
acc[locale].push({ model, name, friendlyName, locale }); | |
return acc; | |
}, {}); | |
} | |
async function synthesizeSpeech(model: string, voice: string, text: string) { | |
let voiceName; | |
let rate = 0; | |
let pitch = 0; | |
if (model.includes("tts")) { | |
rate = 0.1; | |
pitch = 0.2; | |
switch (voice) { | |
case "ava": | |
voiceName = "en-US-AvaMultilingualNeural"; | |
break; | |
case "andrew": | |
voiceName = "en-US-AndrewMultilingualNeural"; | |
break; | |
case "emma": | |
voiceName = "en-US-EmmaMultilingualNeural"; | |
break; | |
case "brian": | |
voiceName = "en-US-BrianMultilingualNeural"; | |
break; | |
case "vivienne": | |
voiceName = "fr-FR-VivienneMultilingualNeural"; | |
break; | |
case "remy": | |
voiceName = "fr-FR-RemyMultilingualNeural"; | |
break; | |
case "seraphina": | |
voiceName = "de-DE-SeraphinaMultilingualNeural"; | |
break; | |
case "florian": | |
voiceName = "de-DE-FlorianMultilingualNeural"; | |
break; | |
case "dmitry": | |
voiceName = "ru-RU-DmitryNeural"; | |
break; | |
case "svetlana": | |
voiceName = "ru-RU-SvetlanaNeural"; | |
break; | |
default: | |
voiceName = "en-US-BrianMultilingualNeural"; | |
break; | |
} | |
} else { | |
voiceName = model; | |
const params = Object.fromEntries( | |
voice.split("|").map((p) => p.split(":") as [string, string]) | |
); | |
rate = Number(params["rate"] || 0); | |
pitch = Number(params["pitch"] || 0); | |
} | |
const tts = new EdgeSpeechTTS(); | |
const payload = { | |
input: text, | |
options: { | |
rate: rate, | |
pitch: pitch, | |
voice: voiceName | |
}, | |
}; | |
const response = await tts.create(payload); | |
const mp3Buffer = new Uint8Array(await response.arrayBuffer()); | |
console.log(`Successfully synthesized speech, returning audio/mpeg response`); | |
return new Response(mp3Buffer, { | |
headers: { "Content-Type": "audio/mpeg" }, | |
}); | |
} | |
function validateContentType(req: Request, expected: string) { | |
const contentType = req.headers.get("Content-Type"); | |
if (contentType !== expected) { | |
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`); | |
return new Response("Bad Request", { status: 400 }); | |
} | |
} | |
async function handleDebugRequest(req: Request) { | |
const url = new URL(req.url); | |
const voice = url.searchParams.get("voice") || ""; | |
const model = url.searchParams.get("model") || ""; | |
const text = url.searchParams.get("text") || ""; | |
console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`); | |
if (!voice || !model || !text) { | |
console.log("Missing required parameters"); | |
return new Response("Bad Request", { status: 400 }); | |
} | |
return synthesizeSpeech(model, voice, text); | |
} | |
async function handleSynthesisRequest(req: Request) { | |
if (req.method !== "POST") { | |
console.log(`Invalid method ${req.method}, expected POST`); | |
return new Response("Method Not Allowed", { status: 405 }); | |
} | |
const invalidContentType = validateContentType(req, "application/json"); | |
if (invalidContentType) return invalidContentType; | |
const { model, input, voice } = await req.json(); | |
console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`); | |
return synthesizeSpeech(model, voice, input); | |
} | |
async function handleDemoRequest(req: Request) { | |
const groupedVoiceList = await fetchVoiceList(); | |
const html = `<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<title>tts</title> | |
<link | |
href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@400;700&display=swap" | |
rel="stylesheet" | |
/> | |
<style> | |
:root { | |
--primary-color: #6c8bd6; | |
--primary-light: #a2b3e3; | |
--primary-dark: #3d5b8f; | |
--secondary-color: #f08080; | |
--text-color: #333; | |
--text-secondary: #777; | |
--bg-color: #fff; | |
} | |
body { | |
font-family: "Noto Sans SC", "Arial", sans-serif; | |
color: var(--text-color); | |
margin: 0; | |
padding: 0; | |
display: flex; | |
justify-content: center; | |
background-color: #fafafa; | |
background-image: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); | |
position: relative; | |
overflow: hidden; | |
} | |
body::before { | |
content: ""; | |
position: absolute; | |
top: 0; | |
left: 0; | |
right: 0; | |
bottom: 0; | |
background: repeating-radial-gradient( | |
circle at 50% 50%, | |
rgba(255, 255, 255, 0.8) 0%, | |
rgba(255, 255, 255, 0.8) 2%, | |
transparent 2%, | |
transparent 4%, | |
rgba(255, 255, 255, 0.8) 4%, | |
rgba(255, 255, 255, 0.8) 6%, | |
transparent 6%, | |
transparent 8%, | |
rgba(255, 255, 255, 0.8) 8%, | |
rgba(255, 255, 255, 0.8) 10%, | |
transparent 10% | |
), | |
repeating-linear-gradient( | |
45deg, | |
#d4f4ff 0%, | |
#d4f4ff 5%, | |
#e6f9ff 5%, | |
#e6f9ff 10%, | |
#f0faff 10%, | |
#f0faff 15%, | |
#e6f9ff 15%, | |
#e6f9ff 20%, | |
#d4f4ff 20%, | |
#d4f4ff 25% | |
); | |
background-blend-mode: multiply; | |
opacity: 0.8; | |
z-index: -1; | |
animation: glitch 15s infinite; | |
} | |
.container { | |
display: flex; | |
max-width: 1200px; | |
width: 100%; | |
margin: 40px; | |
background: #fff; | |
border-radius: 12px; | |
position: relative; | |
background-color: rgba(255, 255, 255, 0.8); | |
z-index: 1; | |
} | |
@keyframes glitch { | |
0% { | |
background-position: 0 0, 0 0; | |
filter: hue-rotate(0deg); | |
} | |
50% { | |
background-position: 10px 10px, -10px 10px; | |
filter: hue-rotate(360deg); | |
} | |
100% { | |
background-position: 0 0, 0 0; | |
filter: hue-rotate(0deg); | |
} | |
} | |
.input-area, | |
.output-area { | |
padding: 30px; | |
width: 50%; | |
} | |
.input-area { | |
border-right: 1px solid #e0e0e0; | |
} | |
h1 { | |
font-size: 36px; | |
color: var(--primary-color); | |
margin-bottom: 30px; | |
} | |
.filter-section { | |
margin-bottom: 30px; | |
} | |
.filter-section label { | |
display: block; | |
font-size: 16px; | |
color: var(--text-secondary); | |
margin-bottom: 10px; | |
} | |
.filter-section input { | |
font-size: 16px; | |
padding: 10px 15px; | |
border: 2px solid var(--primary-light); | |
border-radius: 8px; | |
outline: none; | |
transition: border-color 0.3s, box-shadow 0.3s; | |
width: 100%; | |
box-sizing: border-box; | |
} | |
.filter-section input:focus { | |
border-color: var(--primary-color); | |
box-shadow: 0 0 0 2px var(--primary-light); | |
} | |
.slider-container { | |
margin-bottom: 30px; | |
} | |
.slider-container label { | |
display: block; | |
font-size: 16px; | |
color: var(--text-secondary); | |
margin-bottom: 10px; | |
} | |
.slider { | |
-webkit-appearance: none; | |
width: 100%; | |
height: 10px; | |
border-radius: 5px; | |
background: linear-gradient( | |
to right, | |
var(--secondary-color) 0%, | |
var(--primary-color) 50%, | |
var(--primary-light) 100% | |
); | |
box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1), | |
0 1px rgba(255, 255, 255, 0.1); | |
outline: none; | |
opacity: 0.7; | |
-webkit-transition: 0.2s; | |
transition: opacity 0.2s; | |
margin-bottom: 10px; | |
} | |
.slider:hover { | |
opacity: 1; | |
} | |
.slider::-webkit-slider-thumb { | |
-webkit-appearance: none; | |
appearance: none; | |
width: 20px; | |
height: 20px; | |
border-radius: 50%; | |
background: #fff; | |
border: 2px solid var(--primary-color); | |
cursor: pointer; | |
} | |
.slider::-moz-range-thumb { | |
width: 20px; | |
height: 20px; | |
border-radius: 50%; | |
background: #fff; | |
border: 2px solid var(--primary-color); | |
cursor: pointer; | |
} | |
.slider-value { | |
font-size: 14px; | |
color: var(--text-secondary); | |
} | |
.textarea-container { | |
margin-bottom: 30px; | |
} | |
.textarea-container label { | |
display: block; | |
font-size: 18px; | |
margin-bottom: 10px; | |
} | |
.textarea-container textarea { | |
width: 100%; | |
padding: 10px; | |
font-size: 16px; | |
border: 2px solid var(--primary-light); | |
border-radius: 8px; | |
outline: none; | |
resize: vertical; | |
transition: border-color 0.3s, box-shadow 0.3s; | |
box-sizing: border-box; | |
height: 200px; | |
} | |
.textarea-container textarea:focus { | |
border-color: var(--primary-color); | |
box-shadow: 0 0 0 2px var(--primary-light); | |
} | |
.voice-group { | |
margin-bottom: 20px; | |
border: 2px solid var(--primary-light); | |
border-radius: 12px; | |
overflow: hidden; | |
cursor: move; | |
background: #fff; | |
} | |
.voice-header { | |
padding: 15px 20px; | |
font-size: 18px; | |
background: var(--primary-light); | |
color: #fff; | |
cursor: pointer; | |
display: flex; | |
justify-content: space-between; | |
align-items: center; | |
} | |
.voice-header:hover { | |
background: var(--primary-color); | |
} | |
.voice-buttons { | |
padding: 20px; | |
display: none; | |
gap: 12px; | |
flex-wrap: wrap; | |
} | |
.voice-button { | |
background: var(--secondary-color); | |
color: #fff; | |
border: none; | |
padding: 10px 20px; | |
border-radius: 50px; | |
cursor: pointer; | |
transition: filter 0.3s; | |
} | |
.voice-button:hover { | |
filter: brightness(0.9); | |
} | |
.chevron { | |
transition: transform 0.3s; | |
} | |
.voice-group.open .voice-buttons { | |
display: flex; | |
} | |
.voice-group.open .chevron { | |
transform: rotate(180deg); | |
} | |
.dragging { | |
opacity: 0.5; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<div class="input-area"> | |
<div class="filter-section"> | |
<label for="keywords">фильтр по языкам:</label | |
><input type="text" id="keywords" value="multilingual,-RU" /> | |
</div> | |
<div class="slider-container"> | |
<label for="rate">скорость:</label | |
><input | |
type="range" | |
min="-1" | |
max="1" | |
step="0.1" | |
value="-0.1" | |
class="slider" | |
id="rate" | |
/> | |
<div class="slider-value" id="rateValue">-0.1</div> | |
<label for="pitch">тон:</label | |
><input | |
type="range" | |
min="-1" | |
max="1" | |
step="0.1" | |
value="0.1" | |
class="slider" | |
id="pitch" | |
/> | |
<div class="slider-value" id="pitchValue">0.1</div> | |
</div> | |
<div class="textarea-container"> | |
<label for="inputText">текст:</label | |
><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea> | |
</div> | |
</div> | |
<div class="output-area"> | |
<h1>голос</h1> | |
<div id="voices"></div> | |
</div> | |
</div> | |
<script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='▼';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script> | |
</body></html>`; | |
return new Response(html, { | |
headers: { "Content-Type": "text/html" }, | |
}); | |
} | |
serve(async (req) => { | |
try { | |
const url = new URL(req.url); | |
if (url.pathname === "/") { | |
return handleDemoRequest(req); | |
} | |
if (url.pathname === "/tts") { | |
return handleDebugRequest(req); | |
} | |
if (url.pathname !== "/v1/audio/speech") { | |
console.log(`Unhandled path ${url.pathname}`); | |
return new Response("Not Found", { status: 404 }); | |
} | |
return handleSynthesisRequest(req); | |
} catch (err) { | |
console.error(`Error processing request: ${err.message}`); | |
return new Response(`Internal Server Error\n${err.message}`, { | |
status: 500, | |
}); | |
} | |
}); |