Update README.md
Browse files
README.md
CHANGED
@@ -1,6 +1,58 @@
|
|
1 |
# EGTTS V0.1
|
2 |
EGTTS V0.1 is a cutting-edge text-to-speech (TTS) model specifically designed for Egyptian Arabic. Built on the XTTS v2 architecture, it transforms written Egyptian Arabic text into natural-sounding speech, enabling seamless communication in various applications such as voice assistants, educational tools, and chatbots.
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
```bibtex
|
5 |
@misc{omarsamir,
|
6 |
author = {Omar Samir, Youssef Waleed, Youssef Tamer ,and Amir Mohamed},
|
|
|
1 |
# EGTTS V0.1
|
2 |
EGTTS V0.1 is a cutting-edge text-to-speech (TTS) model specifically designed for Egyptian Arabic. Built on the XTTS v2 architecture, it transforms written Egyptian Arabic text into natural-sounding speech, enabling seamless communication in various applications such as voice assistants, educational tools, and chatbots.
|
3 |
|
4 |
+
## Quick Start
|
5 |
+
### Dependencies to install
|
6 |
+
```bash
|
7 |
+
pip install git+https://github.com/coqui-ai/TTS
|
8 |
+
|
9 |
+
pip install transformers
|
10 |
+
|
11 |
+
pip install deepspeed
|
12 |
+
```
|
13 |
+
### Inference
|
14 |
+
#### Load the model
|
15 |
+
```pyhon
|
16 |
+
import os
|
17 |
+
import torch
|
18 |
+
import torchaudio
|
19 |
+
from TTS.tts.configs.xtts_config import XttsConfig
|
20 |
+
from TTS.tts.models.xtts import Xtts
|
21 |
+
|
22 |
+
CONFIG_FILE_PATH = 'path/to/config.json'
|
23 |
+
VOCAB_FILE_PATH = 'path/to/vocab.json'
|
24 |
+
MODEL_PATH = 'path/to/model'
|
25 |
+
SPEAKER_AUDIO_PATH = 'path/to/speaker.wav'
|
26 |
+
|
27 |
+
print("Loading model...")
|
28 |
+
config = XttsConfig()
|
29 |
+
config.load_json(CONFIG_FILE_PATH)
|
30 |
+
model = Xtts.init_from_config(config)
|
31 |
+
model.load_checkpoint(config, checkpoint_dir=MODEL_PATH, use_deepspeed=True, vocab_path=VOCAB_FILE_PATH)
|
32 |
+
model.cuda()
|
33 |
+
|
34 |
+
print("Computing speaker latents...")
|
35 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[SPEAKER_AUDIO_PATH])
|
36 |
+
```
|
37 |
+
|
38 |
+
#### Run the model
|
39 |
+
```python
|
40 |
+
from IPython.display import Audio, display
|
41 |
+
|
42 |
+
text = "صباح الخير"
|
43 |
+
print("Inference...")
|
44 |
+
out = model.inference(
|
45 |
+
text,
|
46 |
+
"ar",
|
47 |
+
gpt_cond_latent,
|
48 |
+
speaker_embedding,
|
49 |
+
temperature=0.75,
|
50 |
+
)
|
51 |
+
|
52 |
+
torchaudio.save("xtts_audio.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
53 |
+
display(Audio('/content/xtts_audio.wav', autoplay=True))
|
54 |
+
```
|
55 |
+
|
56 |
```bibtex
|
57 |
@misc{omarsamir,
|
58 |
author = {Omar Samir, Youssef Waleed, Youssef Tamer ,and Amir Mohamed},
|