gnilets commited on
Commit
a6b1dda
·
verified ·
1 Parent(s): 726bf86

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +113 -100
main.ts CHANGED
@@ -15,6 +15,119 @@ async function fetchVoiceList() {
15
  }, {});
16
  }
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  async function handleDemoRequest(req: Request) {
19
  const html = `<!DOCTYPE html>
20
  <html lang="en">
@@ -132,106 +245,6 @@ async function handleDemoRequest(req: Request) {
132
  }
133
 
134
 
135
- function validateContentType(req: Request, expected: string) {
136
- const contentType = req.headers.get("Content-Type");
137
- if (contentType !== expected) {
138
- console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
139
- return new Response("Bad Request", { status: 400 });
140
- }
141
- }
142
-
143
- async function handleDebugRequest(req: Request) {
144
- const url = new URL(req.url);
145
- const voice = url.searchParams.get("voice") || "";
146
- const model = url.searchParams.get("model") || "";
147
- const text = url.searchParams.get("text") || "";
148
-
149
- console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
150
-
151
- if (!voice || !model || !text) {
152
- console.log("Missing required parameters");
153
- return new Response("Bad Request", { status: 400 });
154
- }
155
-
156
- return synthesizeSpeech(model, voice, text);
157
- }
158
-
159
- async function handleSynthesisRequest(req: Request) {
160
-
161
- if (req.method !== "POST") {
162
- console.log(`Invalid method ${req.method}, expected POST`);
163
- return new Response("Method Not Allowed", { status: 405 });
164
- }
165
-
166
- const invalidContentType = validateContentType(req, "application/json");
167
- if (invalidContentType) return invalidContentType;
168
-
169
- const { model, input, voice } = await req.json();
170
- console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
171
-
172
- return synthesizeSpeech(model, voice, input);
173
- }
174
-
175
-
176
- async function handleDemoRequest(req: Request) {
177
- const groupedVoiceList = await fetchVoiceList();
178
-
179
- const html = `<!DOCTYPE html>
180
- <html lang="en">
181
- <head>
182
- <meta charset="UTF-8" />
183
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
184
- <title>tts</title>
185
- </head>
186
- <body>
187
- <div class="container">
188
- <div class="input-area">
189
- <div class="filter-section">
190
- <label for="keywords">фильтр по языкам:</label
191
- ><input type="text" id="keywords" value="multilingual,-RU" />
192
- </div>
193
- <div class="slider-container">
194
- <label for="rate">скорость:</label
195
- ><input
196
- type="range"
197
- min="-1"
198
- max="1"
199
- step="0.1"
200
- value="-0.1"
201
- class="slider"
202
- id="rate"
203
- />
204
- <div class="slider-value" id="rateValue">-0.1</div>
205
- <label for="pitch">тон:</label
206
- ><input
207
- type="range"
208
- min="-1"
209
- max="1"
210
- step="0.1"
211
- value="0.1"
212
- class="slider"
213
- id="pitch"
214
- />
215
- <div class="slider-value" id="pitchValue">0.1</div>
216
- </div>
217
- <div class="textarea-container">
218
- <label for="inputText">текст:</label
219
- ><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
220
- </div>
221
- </div>
222
- <div class="output-area">
223
- <h1>голос</h1>
224
- <div id="voices"></div>
225
- </div>
226
- </div>
227
- <script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='&#9660;';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script>
228
- </body></html>`;
229
-
230
- return new Response(html, {
231
- headers: { "Content-Type": "text/html" },
232
- });
233
- }
234
-
235
 
236
  serve(async (req) => {
237
  try {
 
15
  }, {});
16
  }
17
 
18
+ async function synthesizeSpeech(model: string, voice: string, text: string) {
19
+ let voiceName;
20
+ let rate = 0;
21
+ let pitch = 0;
22
+
23
+ if (!model.includes("Neural")) {
24
+ rate = 0.1;
25
+ pitch = 0.2;
26
+
27
+ switch (model) {
28
+ case "ava":
29
+ voiceName = "en-US-AvaMultilingualNeural";
30
+ break;
31
+ case "andrew":
32
+ voiceName = "en-US-AndrewMultilingualNeural";
33
+ break;
34
+ case "emma":
35
+ voiceName = "en-US-EmmaMultilingualNeural";
36
+ break;
37
+ case "brian":
38
+ voiceName = "en-US-BrianMultilingualNeural";
39
+ break;
40
+ case "vivienne":
41
+ voiceName = "fr-FR-VivienneMultilingualNeural";
42
+ break;
43
+ case "remy":
44
+ voiceName = "fr-FR-RemyMultilingualNeural";
45
+ break;
46
+ case "seraphina":
47
+ voiceName = "de-DE-SeraphinaMultilingualNeural";
48
+ break;
49
+ case "florian":
50
+ voiceName = "de-DE-FlorianMultilingualNeural";
51
+ break;
52
+ case "dmitry":
53
+ voiceName = "ru-RU-DmitryNeural";
54
+ break;
55
+ case "svetlana":
56
+ voiceName = "ru-RU-SvetlanaNeural";
57
+ break;
58
+ default:
59
+ voiceName = "en-US-BrianMultilingualNeural";
60
+ break;
61
+ }
62
+ } else {
63
+ voiceName = model;
64
+ const params = Object.fromEntries(
65
+ voice.split("|").map((p) => p.split(":") as [string, string])
66
+ );
67
+ rate = Number(params["rate"] || 0);
68
+ pitch = Number(params["pitch"] || 0);
69
+ }
70
+
71
+ const tts = new EdgeSpeechTTS();
72
+
73
+ const payload = {
74
+ input: text,
75
+ options: {
76
+ rate: rate,
77
+ pitch: pitch,
78
+ voice: voiceName
79
+ },
80
+ };
81
+ const response = await tts.create(payload);
82
+ const mp3Buffer = new Uint8Array(await response.arrayBuffer());
83
+
84
+ console.log(`Successfully synthesized speech, returning audio/mpeg response`);
85
+ return new Response(mp3Buffer, {
86
+ headers: { "Content-Type": "audio/mpeg" },
87
+ });
88
+ }
89
+
90
+ function validateContentType(req: Request, expected: string) {
91
+ const contentType = req.headers.get("Content-Type");
92
+ if (contentType !== expected) {
93
+ console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
94
+ return new Response("Bad Request", { status: 400 });
95
+ }
96
+ }
97
+
98
+ async function handleDebugRequest(req: Request) {
99
+ const url = new URL(req.url);
100
+ const voice = url.searchParams.get("voice") || "";
101
+ const model = url.searchParams.get("model") || "";
102
+ const text = url.searchParams.get("text") || "";
103
+
104
+ console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
105
+
106
+ if (!voice || !model || !text) {
107
+ console.log("Missing required parameters");
108
+ return new Response("Bad Request", { status: 400 });
109
+ }
110
+
111
+ return synthesizeSpeech(model, voice, text);
112
+ }
113
+
114
+ async function handleSynthesisRequest(req: Request) {
115
+
116
+ if (req.method !== "POST") {
117
+ console.log(`Invalid method ${req.method}, expected POST`);
118
+ return new Response("Method Not Allowed", { status: 405 });
119
+ }
120
+
121
+ const invalidContentType = validateContentType(req, "application/json");
122
+ if (invalidContentType) return invalidContentType;
123
+
124
+ const { model, input, voice } = await req.json();
125
+ console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
126
+
127
+ return synthesizeSpeech(model, voice, input);
128
+ }
129
+
130
+
131
  async function handleDemoRequest(req: Request) {
132
  const html = `<!DOCTYPE html>
133
  <html lang="en">
 
245
  }
246
 
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
  serve(async (req) => {
250
  try {