gnilets commited on
Commit
ecced97
·
verified ·
1 Parent(s): 33357e8

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +448 -153
main.ts CHANGED
@@ -1,154 +1,449 @@
1
- import { serve } from "https://deno.land/std/http/server.ts";
2
- import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
3
-
4
- const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
5
- const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
6
-
7
- async function fetchVoiceList() {
8
- const response = await fetch(VOICES_URL);
9
- const voices = await response.json();
10
- return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
11
- const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
12
- if (!acc[locale]) acc[locale] = [];
13
- acc[locale].push({ model, name, friendlyName, locale });
14
- return acc;
15
- }, {});
16
- }
17
-
18
- async function synthesizeSpeech(model: string, voice: string, text: string) {
19
- let voiceName;
20
- let rate = 0;
21
- let pitch = 0;
22
-
23
- if (model.includes("tts")) {
24
- rate = 0.1;
25
- pitch = 0.2;
26
-
27
- switch (voice) {
28
- case "alloy":
29
- voiceName = "zh-CN-YunjianNeural";
30
- break;
31
- case "echo":
32
- voiceName = "zh-CN-YunyangNeural";
33
- break;
34
- case "fable":
35
- voiceName = "zh-CN-XiaoxiaoNeural";
36
- break;
37
- default:
38
- voiceName = "zh-CN-YunxiNeural";
39
- break;
40
- }
41
- } else {
42
- voiceName = model;
43
- const params = Object.fromEntries(
44
- voice.split("|").map((p) => p.split(":") as [string, string])
45
- );
46
- rate = Number(params["rate"] || 0);
47
- pitch = Number(params["pitch"] || 0);
48
- }
49
-
50
- const tts = new EdgeSpeechTTS();
51
-
52
- const payload = {
53
- input: text,
54
- options: {
55
- rate: rate,
56
- pitch: pitch,
57
- voice: voiceName
58
- },
59
- };
60
- const response = await tts.create(payload);
61
- const mp3Buffer = new Uint8Array(await response.arrayBuffer());
62
-
63
- console.log(`Successfully synthesized speech, returning audio/mpeg response`);
64
- return new Response(mp3Buffer, {
65
- headers: { "Content-Type": "audio/mpeg" },
66
- });
67
- }
68
-
69
- function unauthorized(req: Request) {
70
- const authHeader = req.headers.get("Authorization");
71
- return AUTH_TOKEN && authHeader !== `Bearer ${AUTH_TOKEN}`;
72
- }
73
-
74
- function validateContentType(req: Request, expected: string) {
75
- const contentType = req.headers.get("Content-Type");
76
- if (contentType !== expected) {
77
- console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
78
- return new Response("Bad Request", { status: 400 });
79
- }
80
- }
81
-
82
- async function handleDebugRequest(req: Request) {
83
- const url = new URL(req.url);
84
- const voice = url.searchParams.get("voice") || "";
85
- const model = url.searchParams.get("model") || "";
86
- const text = url.searchParams.get("text") || "";
87
-
88
- console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
89
-
90
- if (!voice || !model || !text) {
91
- console.log("Missing required parameters");
92
- return new Response("Bad Request", { status: 400 });
93
- }
94
-
95
- return synthesizeSpeech(model, voice, text);
96
- }
97
-
98
- async function handleSynthesisRequest(req: Request) {
99
- if (unauthorized(req)) {
100
- console.log("Unauthorized request");
101
- return new Response("Unauthorized", { status: 401 });
102
- }
103
-
104
- if (req.method !== "POST") {
105
- console.log(`Invalid method ${req.method}, expected POST`);
106
- return new Response("Method Not Allowed", { status: 405 });
107
- }
108
-
109
- const invalidContentType = validateContentType(req, "application/json");
110
- if (invalidContentType) return invalidContentType;
111
-
112
- const { model, input, voice } = await req.json();
113
- console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
114
-
115
- return synthesizeSpeech(model, voice, input);
116
- }
117
-
118
-
119
- async function handleDemoRequest(req: Request) {
120
- const groupedVoiceList = await fetchVoiceList();
121
-
122
- const html = `<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>语音合成演示</title><link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@400;700&display=swap" rel="stylesheet"><style>:root{--primary-color:#6c8bd6;--primary-light:#a2b3e3;--primary-dark:#3d5b8f;--secondary-color:#f08080;--text-color:#333;--text-secondary:#777;--bg-color:#fff}body{font-family:'Noto Sans SC','Arial',sans-serif;color:var(--text-color);margin:0;padding:0;display:flex;justify-content:center;background-color:#fafafa;background-image:linear-gradient(135deg,#f5f7fa 0%,#c3cfe2 100%);position:relative;overflow:hidden}body::before{content:"";position:absolute;top:0;left:0;right:0;bottom:0;background:repeating-radial-gradient(circle at 50% 50%,rgba(255,255,255,0.8) 0%,rgba(255,255,255,0.8) 2%,transparent 2%,transparent 4%,rgba(255,255,255,0.8) 4%,rgba(255,255,255,0.8) 6%,transparent 6%,transparent 8%,rgba(255,255,255,0.8) 8%,rgba(255,255,255,0.8) 10%,transparent 10%),repeating-linear-gradient(45deg,#D4F4FF 0%,#D4F4FF 5%,#E6F9FF 5%,#E6F9FF 10%,#F0FAFF 10%,#F0FAFF 15%,#E6F9FF 15%,#E6F9FF 20%,#D4F4FF 20%,#D4F4FF 25%);background-blend-mode:multiply;opacity:0.8;z-index:-1;animation:glitch 15s infinite}.container{display:flex;max-width:1200px;width:100%;margin:40px;background:#fff;border-radius:12px;position:relative;background-color:rgba(255,255,255,0.8);z-index:1}@keyframes glitch{0%{background-position:0 0,0 0;filter:hue-rotate(0deg)}50%{background-position:10px 10px,-10px 10px;filter:hue-rotate(360deg)}100%{background-position:0 0,0 0;filter:hue-rotate(0deg)}}.input-area,.output-area{padding:30px;width:50%}.input-area{border-right:1px solid #E0E0E0}h1{font-size:36px;color:var(--primary-color);margin-bottom:30px}.filter-section{margin-bottom:30px}.filter-section label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.filter-section input{font-size:16px;padding:10px 15px;border:2px solid var(--primary-light);border-radius:8px;outline:none;transition:border-color .3s,box-shadow .3s;width:100%;box-sizing:border-box}.filter-section input:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.slider-container{margin-bottom:30px}.slider-container label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.slider{-webkit-appearance:none;width:100%;height:10px;border-radius:5px;background:linear-gradient(to right,var(--secondary-color) 0%,var(--primary-color) 50%,var(--primary-light) 100%);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px rgba(255,255,255,0.1);outline:none;opacity:0.7;-webkit-transition:.2s;transition:opacity .2s;margin-bottom:10px}.slider:hover{opacity:1}.slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none;width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider::-moz-range-thumb{width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider-value{font-size:14px;color:var(--text-secondary)}.textarea-container{margin-bottom:30px}.textarea-container label{display:block;font-size:18px;margin-bottom:10px}.textarea-container textarea{width:100%;padding:10px;font-size:16px;border:2px solid var(--primary-light);border-radius:8px;outline:none;resize:vertical;transition:border-color .3s,box-shadow .3s;box-sizing:border-box;height:200px}.textarea-container textarea:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.voice-group{margin-bottom:20px;border:2px solid var(--primary-light);border-radius:12px;overflow:hidden;cursor:move;background:#fff}.voice-header{padding:15px 20px;font-size:18px;background:var(--primary-light);color:#fff;cursor:pointer;display:flex;justify-content:space-between;align-items:center}.voice-header:hover{background:var(--primary-color)}.voice-buttons{padding:20px;display:none;gap:12px;flex-wrap:wrap}.voice-button{background:var(--secondary-color);color:#fff;border:none;padding:10px 20px;border-radius:50px;cursor:pointer;transition:filter .3s}.voice-button:hover{filter:brightness(0.9)}.chevron{transition:transform .3s}.voice-group.open .voice-buttons{display:flex}.voice-group.open .chevron{transform:rotate(180deg)}.dragging{opacity:0.5}</style></head><body><div class="container"><div class="input-area"><h1>输入文本</h1><div class="filter-section"><label for="keywords">Speaker筛选:</label><input type="text" id="keywords" value="multilingual,-TW,-CN"></div><div class="slider-container"><label for="rate">语速:</label><input type="range" min="-1" max="1" step="0.1" value="-0.1" class="slider" id="rate"><div class="slider-value" id="rateValue">-0.1</div><label for="pitch">音调:</label><input type="range" min="-1" max="1" step="0.1" value="0.1" class="slider" id="pitch"><div class="slider-value" id="pitchValue">0.1</div></div><div class="textarea-container"><label for="inputText">输入文本:</label><textarea id="inputText">Hello world</textarea></div></div><div class="output-area"><h1>选择语音</h1><div id="voices"></div></div></div><script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='&#9660;';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script></body></html>`;
123
-
124
- return new Response(html, {
125
- headers: { "Content-Type": "text/html" },
126
- });
127
- }
128
-
129
-
130
- serve(async (req) => {
131
- try {
132
- const url = new URL(req.url);
133
-
134
- if (url.pathname === "/") {
135
- return handleDemoRequest(req);
136
- }
137
-
138
- if (url.pathname === "/tts") {
139
- return handleDebugRequest(req);
140
- }
141
-
142
- if (url.pathname !== "/v1/audio/speech") {
143
- console.log(`Unhandled path ${url.pathname}`);
144
- return new Response("Not Found", { status: 404 });
145
- }
146
-
147
- return handleSynthesisRequest(req);
148
- } catch (err) {
149
- console.error(`Error processing request: ${err.message}`);
150
- return new Response(`Internal Server Error\n${err.message}`, {
151
- status: 500,
152
- });
153
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  });
 
1
+ import { serve } from "https://deno.land/std/http/server.ts";
2
+ import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
3
+
4
+ const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
5
+ const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
6
+
7
+ async function fetchVoiceList() {
8
+ const response = await fetch(VOICES_URL);
9
+ const voices = await response.json();
10
+ return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
11
+ const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
12
+ if (!acc[locale]) acc[locale] = [];
13
+ acc[locale].push({ model, name, friendlyName, locale });
14
+ return acc;
15
+ }, {});
16
+ }
17
+
18
+ async function synthesizeSpeech(model: string, voice: string, text: string) {
19
+ let voiceName;
20
+ let rate = 0;
21
+ let pitch = 0;
22
+
23
+ if (model.includes("tts")) {
24
+ rate = 0.1;
25
+ pitch = 0.2;
26
+
27
+ switch (voice) {
28
+ case "alloy":
29
+ voiceName = "zh-CN-YunjianNeural";
30
+ break;
31
+ case "echo":
32
+ voiceName = "zh-CN-YunyangNeural";
33
+ break;
34
+ case "fable":
35
+ voiceName = "zh-CN-XiaoxiaoNeural";
36
+ break;
37
+ default:
38
+ voiceName = "zh-CN-YunxiNeural";
39
+ break;
40
+ }
41
+ } else {
42
+ voiceName = model;
43
+ const params = Object.fromEntries(
44
+ voice.split("|").map((p) => p.split(":") as [string, string])
45
+ );
46
+ rate = Number(params["rate"] || 0);
47
+ pitch = Number(params["pitch"] || 0);
48
+ }
49
+
50
+ const tts = new EdgeSpeechTTS();
51
+
52
+ const payload = {
53
+ input: text,
54
+ options: {
55
+ rate: rate,
56
+ pitch: pitch,
57
+ voice: voiceName
58
+ },
59
+ };
60
+ const response = await tts.create(payload);
61
+ const mp3Buffer = new Uint8Array(await response.arrayBuffer());
62
+
63
+ console.log(`Successfully synthesized speech, returning audio/mpeg response`);
64
+ return new Response(mp3Buffer, {
65
+ headers: { "Content-Type": "audio/mpeg" },
66
+ });
67
+ }
68
+
69
+ function validateContentType(req: Request, expected: string) {
70
+ const contentType = req.headers.get("Content-Type");
71
+ if (contentType !== expected) {
72
+ console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
73
+ return new Response("Bad Request", { status: 400 });
74
+ }
75
+ }
76
+
77
+ async function handleDebugRequest(req: Request) {
78
+ const url = new URL(req.url);
79
+ const voice = url.searchParams.get("voice") || "";
80
+ const model = url.searchParams.get("model") || "";
81
+ const text = url.searchParams.get("text") || "";
82
+
83
+ console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
84
+
85
+ if (!voice || !model || !text) {
86
+ console.log("Missing required parameters");
87
+ return new Response("Bad Request", { status: 400 });
88
+ }
89
+
90
+ return synthesizeSpeech(model, voice, text);
91
+ }
92
+
93
+ async function handleSynthesisRequest(req: Request) {
94
+
95
+ if (req.method !== "POST") {
96
+ console.log(`Invalid method ${req.method}, expected POST`);
97
+ return new Response("Method Not Allowed", { status: 405 });
98
+ }
99
+
100
+ const invalidContentType = validateContentType(req, "application/json");
101
+ if (invalidContentType) return invalidContentType;
102
+
103
+ const { model, input, voice } = await req.json();
104
+ console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
105
+
106
+ return synthesizeSpeech(model, voice, input);
107
+ }
108
+
109
+
110
+ async function handleDemoRequest(req: Request) {
111
+ const groupedVoiceList = await fetchVoiceList();
112
+
113
+ const html = `<!DOCTYPE html>
114
+ <html lang="en">
115
+ <head>
116
+ <meta charset="UTF-8" />
117
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
118
+ <title>tts</title>
119
+ <link
120
+ href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@400;700&display=swap"
121
+ rel="stylesheet"
122
+ />
123
+ <style>
124
+ :root {
125
+ --primary-color: #6c8bd6;
126
+ --primary-light: #a2b3e3;
127
+ --primary-dark: #3d5b8f;
128
+ --secondary-color: #f08080;
129
+ --text-color: #333;
130
+ --text-secondary: #777;
131
+ --bg-color: #fff;
132
+ }
133
+ body {
134
+ font-family: "Noto Sans SC", "Arial", sans-serif;
135
+ color: var(--text-color);
136
+ margin: 0;
137
+ padding: 0;
138
+ display: flex;
139
+ justify-content: center;
140
+ background-color: #fafafa;
141
+ background-image: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
142
+ position: relative;
143
+ overflow: hidden;
144
+ }
145
+ body::before {
146
+ content: "";
147
+ position: absolute;
148
+ top: 0;
149
+ left: 0;
150
+ right: 0;
151
+ bottom: 0;
152
+ background: repeating-radial-gradient(
153
+ circle at 50% 50%,
154
+ rgba(255, 255, 255, 0.8) 0%,
155
+ rgba(255, 255, 255, 0.8) 2%,
156
+ transparent 2%,
157
+ transparent 4%,
158
+ rgba(255, 255, 255, 0.8) 4%,
159
+ rgba(255, 255, 255, 0.8) 6%,
160
+ transparent 6%,
161
+ transparent 8%,
162
+ rgba(255, 255, 255, 0.8) 8%,
163
+ rgba(255, 255, 255, 0.8) 10%,
164
+ transparent 10%
165
+ ),
166
+ repeating-linear-gradient(
167
+ 45deg,
168
+ #d4f4ff 0%,
169
+ #d4f4ff 5%,
170
+ #e6f9ff 5%,
171
+ #e6f9ff 10%,
172
+ #f0faff 10%,
173
+ #f0faff 15%,
174
+ #e6f9ff 15%,
175
+ #e6f9ff 20%,
176
+ #d4f4ff 20%,
177
+ #d4f4ff 25%
178
+ );
179
+ background-blend-mode: multiply;
180
+ opacity: 0.8;
181
+ z-index: -1;
182
+ animation: glitch 15s infinite;
183
+ }
184
+ .container {
185
+ display: flex;
186
+ max-width: 1200px;
187
+ width: 100%;
188
+ margin: 40px;
189
+ background: #fff;
190
+ border-radius: 12px;
191
+ position: relative;
192
+ background-color: rgba(255, 255, 255, 0.8);
193
+ z-index: 1;
194
+ }
195
+ @keyframes glitch {
196
+ 0% {
197
+ background-position: 0 0, 0 0;
198
+ filter: hue-rotate(0deg);
199
+ }
200
+ 50% {
201
+ background-position: 10px 10px, -10px 10px;
202
+ filter: hue-rotate(360deg);
203
+ }
204
+ 100% {
205
+ background-position: 0 0, 0 0;
206
+ filter: hue-rotate(0deg);
207
+ }
208
+ }
209
+ .input-area,
210
+ .output-area {
211
+ padding: 30px;
212
+ width: 50%;
213
+ }
214
+ .input-area {
215
+ border-right: 1px solid #e0e0e0;
216
+ }
217
+ h1 {
218
+ font-size: 36px;
219
+ color: var(--primary-color);
220
+ margin-bottom: 30px;
221
+ }
222
+ .filter-section {
223
+ margin-bottom: 30px;
224
+ }
225
+ .filter-section label {
226
+ display: block;
227
+ font-size: 16px;
228
+ color: var(--text-secondary);
229
+ margin-bottom: 10px;
230
+ }
231
+ .filter-section input {
232
+ font-size: 16px;
233
+ padding: 10px 15px;
234
+ border: 2px solid var(--primary-light);
235
+ border-radius: 8px;
236
+ outline: none;
237
+ transition: border-color 0.3s, box-shadow 0.3s;
238
+ width: 100%;
239
+ box-sizing: border-box;
240
+ }
241
+ .filter-section input:focus {
242
+ border-color: var(--primary-color);
243
+ box-shadow: 0 0 0 2px var(--primary-light);
244
+ }
245
+ .slider-container {
246
+ margin-bottom: 30px;
247
+ }
248
+ .slider-container label {
249
+ display: block;
250
+ font-size: 16px;
251
+ color: var(--text-secondary);
252
+ margin-bottom: 10px;
253
+ }
254
+ .slider {
255
+ -webkit-appearance: none;
256
+ width: 100%;
257
+ height: 10px;
258
+ border-radius: 5px;
259
+ background: linear-gradient(
260
+ to right,
261
+ var(--secondary-color) 0%,
262
+ var(--primary-color) 50%,
263
+ var(--primary-light) 100%
264
+ );
265
+ box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1),
266
+ 0 1px rgba(255, 255, 255, 0.1);
267
+ outline: none;
268
+ opacity: 0.7;
269
+ -webkit-transition: 0.2s;
270
+ transition: opacity 0.2s;
271
+ margin-bottom: 10px;
272
+ }
273
+ .slider:hover {
274
+ opacity: 1;
275
+ }
276
+ .slider::-webkit-slider-thumb {
277
+ -webkit-appearance: none;
278
+ appearance: none;
279
+ width: 20px;
280
+ height: 20px;
281
+ border-radius: 50%;
282
+ background: #fff;
283
+ border: 2px solid var(--primary-color);
284
+ cursor: pointer;
285
+ }
286
+ .slider::-moz-range-thumb {
287
+ width: 20px;
288
+ height: 20px;
289
+ border-radius: 50%;
290
+ background: #fff;
291
+ border: 2px solid var(--primary-color);
292
+ cursor: pointer;
293
+ }
294
+ .slider-value {
295
+ font-size: 14px;
296
+ color: var(--text-secondary);
297
+ }
298
+ .textarea-container {
299
+ margin-bottom: 30px;
300
+ }
301
+ .textarea-container label {
302
+ display: block;
303
+ font-size: 18px;
304
+ margin-bottom: 10px;
305
+ }
306
+ .textarea-container textarea {
307
+ width: 100%;
308
+ padding: 10px;
309
+ font-size: 16px;
310
+ border: 2px solid var(--primary-light);
311
+ border-radius: 8px;
312
+ outline: none;
313
+ resize: vertical;
314
+ transition: border-color 0.3s, box-shadow 0.3s;
315
+ box-sizing: border-box;
316
+ height: 200px;
317
+ }
318
+ .textarea-container textarea:focus {
319
+ border-color: var(--primary-color);
320
+ box-shadow: 0 0 0 2px var(--primary-light);
321
+ }
322
+ .voice-group {
323
+ margin-bottom: 20px;
324
+ border: 2px solid var(--primary-light);
325
+ border-radius: 12px;
326
+ overflow: hidden;
327
+ cursor: move;
328
+ background: #fff;
329
+ }
330
+ .voice-header {
331
+ padding: 15px 20px;
332
+ font-size: 18px;
333
+ background: var(--primary-light);
334
+ color: #fff;
335
+ cursor: pointer;
336
+ display: flex;
337
+ justify-content: space-between;
338
+ align-items: center;
339
+ }
340
+ .voice-header:hover {
341
+ background: var(--primary-color);
342
+ }
343
+ .voice-buttons {
344
+ padding: 20px;
345
+ display: none;
346
+ gap: 12px;
347
+ flex-wrap: wrap;
348
+ }
349
+ .voice-button {
350
+ background: var(--secondary-color);
351
+ color: #fff;
352
+ border: none;
353
+ padding: 10px 20px;
354
+ border-radius: 50px;
355
+ cursor: pointer;
356
+ transition: filter 0.3s;
357
+ }
358
+ .voice-button:hover {
359
+ filter: brightness(0.9);
360
+ }
361
+ .chevron {
362
+ transition: transform 0.3s;
363
+ }
364
+ .voice-group.open .voice-buttons {
365
+ display: flex;
366
+ }
367
+ .voice-group.open .chevron {
368
+ transform: rotate(180deg);
369
+ }
370
+ .dragging {
371
+ opacity: 0.5;
372
+ }
373
+ </style>
374
+ </head>
375
+ <body>
376
+ <div class="container">
377
+ <div class="input-area">
378
+ <div class="filter-section">
379
+ <label for="keywords">фильтр по языкам:</label
380
+ ><input type="text" id="keywords" value="multilingual,-RU" />
381
+ </div>
382
+ <div class="slider-container">
383
+ <label for="rate">скорость:</label
384
+ ><input
385
+ type="range"
386
+ min="-1"
387
+ max="1"
388
+ step="0.1"
389
+ value="-0.1"
390
+ class="slider"
391
+ id="rate"
392
+ />
393
+ <div class="slider-value" id="rateValue">-0.1</div>
394
+ <label for="pitch">тон:</label
395
+ ><input
396
+ type="range"
397
+ min="-1"
398
+ max="1"
399
+ step="0.1"
400
+ value="0.1"
401
+ class="slider"
402
+ id="pitch"
403
+ />
404
+ <div class="slider-value" id="pitchValue">0.1</div>
405
+ </div>
406
+ <div class="textarea-container">
407
+ <label for="inputText">текст:</label
408
+ ><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
409
+ </div>
410
+ </div>
411
+ <div class="output-area">
412
+ <h1>голос</h1>
413
+ <div id="voices"></div>
414
+ </div>
415
+ </div>" +
416
+ "<script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='&#9660;';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script>" +
417
+ "</body></html>`;
418
+
419
+ return new Response(html, {
420
+ headers: { "Content-Type": "text/html" },
421
+ });
422
+ }
423
+
424
+
425
+ serve(async (req) => {
426
+ try {
427
+ const url = new URL(req.url);
428
+
429
+ if (url.pathname === "/") {
430
+ return handleDemoRequest(req);
431
+ }
432
+
433
+ if (url.pathname === "/tts") {
434
+ return handleDebugRequest(req);
435
+ }
436
+
437
+ if (url.pathname !== "/v1/audio/speech") {
438
+ console.log(`Unhandled path ${url.pathname}`);
439
+ return new Response("Not Found", { status: 404 });
440
+ }
441
+
442
+ return handleSynthesisRequest(req);
443
+ } catch (err) {
444
+ console.error(`Error processing request: ${err.message}`);
445
+ return new Response(`Internal Server Error\n${err.message}`, {
446
+ status: 500,
447
+ });
448
+ }
449
  });