Spaces:

gnilets
/

tts

Running

tts

File size: 10,305 Bytes

import { serve } from "https://deno.land/std/http/server.ts";
import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";


const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";

async function fetchVoiceList() {
  const response = await fetch(VOICES_URL);
  const voices = await response.json();
  return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
    const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
    if (!acc[locale]) acc[locale] = [];
    acc[locale].push({ model, name, friendlyName, locale });
    return acc;
  }, {});
}

async function synthesizeSpeech(model: string, voice: string, text: string) {
  let voiceName;
  let rate = 0;
  let pitch = 0;

  if (!model.includes("Neural")) {
    switch (model) {
      case "ava":
        voiceName = "en-US-AvaMultilingualNeural";
        break;
      case "andrew":
        voiceName = "en-US-AndrewMultilingualNeural";
        break;
      case "emma":
        voiceName = "en-US-EmmaMultilingualNeural";
        break;
      case "brian":
        voiceName = "en-US-BrianMultilingualNeural";
        break;
      case "vivienne":
        voiceName = "fr-FR-VivienneMultilingualNeural";
        break;
      case "remy":
        voiceName = "fr-FR-RemyMultilingualNeural";
        break;
      case "seraphina":
        voiceName = "de-DE-SeraphinaMultilingualNeural";
        break;
      case "florian":
        voiceName = "de-DE-FlorianMultilingualNeural";
        break;
      case "dmitry":
        voiceName = "ru-RU-DmitryNeural";
        break;
      case "svetlana":
        voiceName = "ru-RU-SvetlanaNeural";
        break;
      default:
        voiceName = "en-US-BrianMultilingualNeural";
        break;
    }
  } else {
    voiceName = model;
    const params = Object.fromEntries(
      voice.split("|").map((p) => p.split(":") as [string, string])
    );
    rate = Number(params["rate"] || 0);
    pitch = Number(params["pitch"] || 0);
  }

  const tts = new EdgeSpeechTTS();

  const payload = {
    input: text,
    options: {
      rate: rate,
      pitch: pitch,
      voice: voiceName
     },
  };
  const response = await tts.create(payload);
  const mp3Buffer = new Uint8Array(await response.arrayBuffer());

  console.log(`Successfully synthesized speech, returning audio/mpeg response`);
  return new Response(mp3Buffer, {
    headers: { "Content-Type": "audio/mpeg" },
  });
}

function validateContentType(req: Request, expected: string) {
  const contentType = req.headers.get("Content-Type");
  if (contentType !== expected) {
    console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
    return new Response("Bad Request", { status: 400 });
  }
}

async function handleDebugRequest(req: Request) {
  const url = new URL(req.url);
  const voice = url.searchParams.get("voice") || "";
  const model = url.searchParams.get("model") || "";
  const text = url.searchParams.get("text") || "";

  console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);

  if (!voice || !model || !text) {
    console.log("Missing required parameters");
    return new Response("Bad Request", { status: 400 });
  }

  return synthesizeSpeech(model, voice, text);
}

async function handleSynthesisRequest(req: Request) {

  if (req.method !== "POST") {
    console.log(`Invalid method ${req.method}, expected POST`);
    return new Response("Method Not Allowed", { status: 405 });
  }

  const invalidContentType = validateContentType(req, "application/json");
  if (invalidContentType) return invalidContentType;

  const { model, input, voice } = await req.json();
  console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);

  return synthesizeSpeech(model, voice, input);
}


async function handleDemoRequest(req: Request) {
    const html = `<!DOCTYPE html>
  <html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>tts</title>
    <style>
  body {
    background-color: #121212;
    color: #e0e0e0;
    font-family: Arial, sans-serif;
    margin: 0;
    padding: 20px;
  }

  .container {
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
    background-color: #1e1e1e;
    border-radius: 8px;
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
  }

  .input-area, .output-area {
    margin-bottom: 20px;
  }

  .slider-container, .textarea-container, .dropdown-container {
    margin-bottom: 20px;
  }

  label {
    display: block;
    margin-bottom: 8px;
    font-weight: bold;
  }

  input[type="range"] {
    width: 100%;
  }

  .slider-value {
    text-align: center;
    margin-top: 8px;
  }

  textarea {
    max-width: 780px;
    width: calc(100% - 20px);
    height: 100px;
    padding: 10px;
    border: 1px solid #333;
    border-radius: 4px;
    background-color: #2e2e2e;
    color: #e0e0e0;
    resize: none;
    }

  select {
    width: 100%;
    padding: 10px;
    border: 1px solid #333;
    border-radius: 4px;
    background-color: #2e2e2e;
    color: #e0e0e0;
  }

  button {
    width: 100%;
    padding: 10px;
    border: none;
    border-radius: 4px;
    background-color: #6200ea;
    color: #fff;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.3s;
  }

  button:hover {
    background-color: #3700b3;
  }

  h1 {
    font-size: 24px;
    margin-bottom: 20px;
  }

  a {
    color: #bb86fc;
    text-decoration: none;
  }

  a:hover {
    text-decoration: underline;
  }

  #audioPlayerContainer {
    text-align: center; /* Центрируем содержимое контейнера */
  }

  audio {
    width: 100%;
    max-width: 600px; /* Ограничиваем максимальную ширину плеера */
    margin: 10px 0;
  }

  a {
    display: block;
    margin: 10px 0;
  }
</style>

  </head>
  <body>
    <div class="container">
      <div class="input-area">
        <div class="textarea-container">
          <label for="inputText">текст:</label
          ><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
        </div>
        <div class="dropdown-container">
          <label for="voiceSelect">выбери голос:</label>
          <select id="voiceSelect">
            <option value="ava">ava</option>
            <option value="andrew">andrew</option>
            <option value="emma">emma</option>
            <option value="brian">brian</option>
            <option value="vivienne">vivienne</option>
            <option value="remy">remy</option>
            <option value="seraphina">seraphina</option>
            <option value="florian">florian</option>
            <option value="dmitry">dmitry</option>
            <option value="svetlana">svetlana</option>
          </select>
        </div>
        <button id="synthesizeButton">синтезировать</button>
      </div>
      <div class="output-area">
        <div id="audioPlayerContainer"></div>
      </div>
    </div>
 <script>
    let audio = null;

    document.getElementById('synthesizeButton').addEventListener('click', () => {
        const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?';
        const rate = '0.0';
        const pitch = '0.0';
        const voice = \`rate:\${rate}|pitch:\${pitch}\`;
        const model = document.getElementById('voiceSelect').value;

        if (audio) {
            audio.pause();
            audio.currentTime = 0;
        }

        fetch('/v1/audio/speech', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ model, input: text, voice })
        })
        .then(response => response.blob())
        .then(blob => {
        const audioUrl = URL.createObjectURL(blob);
        const audioPlayerContainer = document.getElementById('audioPlayerContainer');
    
        // Удаляем старый аудиоплеер, если он существует
        if (audio) {
            audio.pause();
            audioPlayerContainer.innerHTML = '';
        }
    
        // Создаем новый аудиоплеер
        audio = new Audio(audioUrl);
        audio.controls = true;
        audioPlayerContainer.appendChild(audio);
    
        // Создаем ссылку для скачивания
        const downloadLink = document.createElement('a');
        downloadLink.href = audioUrl;
        downloadLink.download = 'synthesized_voice.mp3';
        downloadLink.textContent = 'Скачать аудио';
        downloadLink.style.display = 'block';
        downloadLink.style.marginTop = '10px';
    
        // Добавляем ссылку для скачивания в контейнер
        audioPlayerContainer.appendChild(downloadLink);
    
        // Воспроизводим аудио
        audio.play();
    });

    });

    const rateSlider = document.getElementById('rate');
    const rateValue = document.getElementById('rateValue');
    rateSlider.oninput = function() {
        rateValue.innerHTML = this.value;
    };

    const pitchSlider = document.getElementById('pitch');
    const pitchValue = document.getElementById('pitchValue');
    pitchSlider.oninput = function() {
        pitchValue.innerHTML = this.value;
    };
 </script>
 </body></html>`;

    return new Response(html, {
      headers: { "Content-Type": "text/html" },
    });
}



serve(async (req) => {
  try {
    const url = new URL(req.url);

    if (url.pathname === "/") {
      return handleDemoRequest(req);
    }

    if (url.pathname === "/tts") {
      return handleDebugRequest(req);
    }

    if (url.pathname !== "/v1/audio/speech") {
      console.log(`Unhandled path ${url.pathname}`);
      return new Response("Not Found", { status: 404 });
    }

    return handleSynthesisRequest(req);
  } catch (err) {
    console.error(`Error processing request: ${err.message}`);
    return new Response(`Internal Server Error\n${err.message}`, {
      status: 500,
    });
  }
});