Spaces:
Running
on
A100
Running
on
A100
Update app.py
Browse files
app.py
CHANGED
@@ -67,19 +67,10 @@ def empty_output_folder(output_dir):
|
|
67 |
|
68 |
# Function to create a temporary file with string content
|
69 |
def create_temp_file(content, prefix, suffix=".txt"):
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
temp_file.write(content)
|
75 |
-
temp_file.close()
|
76 |
-
|
77 |
-
# Debug: Print file contents
|
78 |
-
print(f"\nContent written to {prefix}{suffix}:")
|
79 |
-
print(content)
|
80 |
-
print("---")
|
81 |
-
|
82 |
-
return temp_file.name
|
83 |
|
84 |
def get_last_mp3_file(output_dir):
|
85 |
# List all files in the output directory
|
@@ -121,13 +112,13 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
|
|
121 |
"python", "infer.py",
|
122 |
"--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
|
123 |
"--stage2_model", "m-a-p/YuE-s2-1B-general",
|
124 |
-
"--genre_txt", f"{genre_txt_path}",
|
125 |
-
"--lyrics_txt", f"{lyrics_txt_path}",
|
126 |
-
"--run_n_segments",
|
127 |
"--stage2_batch_size", "4",
|
128 |
-
"--output_dir", f"{output_dir}",
|
129 |
"--cuda_idx", "0",
|
130 |
-
"--max_new_tokens",
|
131 |
"--disable_offload_model"
|
132 |
]
|
133 |
|
@@ -191,16 +182,38 @@ with gr.Blocks() as demo:
|
|
191 |
""")
|
192 |
with gr.Row():
|
193 |
with gr.Column():
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
with gr.Column():
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
num_segments = gr.Number(label="Number of Song Segments", value=2, interactive=True)
|
203 |
-
max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="24000", step=500, value=3000, interactive=True)
|
204 |
submit_btn = gr.Button("Submit")
|
205 |
music_out = gr.Audio(label="Audio Result")
|
206 |
|
|
|
67 |
|
68 |
# Function to create a temporary file with string content
|
69 |
def create_temp_file(content, prefix, suffix=".txt"):
|
70 |
+
fd, path = tempfile.mkstemp(prefix=prefix, suffix=suffix)
|
71 |
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
72 |
+
f.write(content)
|
73 |
+
return path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
def get_last_mp3_file(output_dir):
|
76 |
# List all files in the output directory
|
|
|
112 |
"python", "infer.py",
|
113 |
"--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
|
114 |
"--stage2_model", "m-a-p/YuE-s2-1B-general",
|
115 |
+
"--genre_txt", f"'{genre_txt_path}'",
|
116 |
+
"--lyrics_txt", f"'{lyrics_txt_path}'",
|
117 |
+
"--run_n_segments", str(num_segments),
|
118 |
"--stage2_batch_size", "4",
|
119 |
+
"--output_dir", f"'{output_dir}'",
|
120 |
"--cuda_idx", "0",
|
121 |
+
"--max_new_tokens", str(max_new_tokens),
|
122 |
"--disable_offload_model"
|
123 |
]
|
124 |
|
|
|
182 |
""")
|
183 |
with gr.Row():
|
184 |
with gr.Column():
|
185 |
+
with gr.Accordion("Pro Tips", open=False):
|
186 |
+
gr.Markdown(f"""
|
187 |
+
**Tips:**
|
188 |
+
1. `genres` should include details like instruments, genre, mood, vocal timbre, and vocal gender.
|
189 |
+
2. The length of `lyrics` segments and the `--max_new_tokens` value should be matched. For example, if `--max_new_tokens` is set to 3000, the maximum duration for a segment is around 30 seconds. Ensure your lyrics fit this time frame.
|
190 |
+
|
191 |
+
|
192 |
+
**Notice:**
|
193 |
+
1. A suitable [Genre] tag consists of five components: genre, instrument, mood, gender, and timbre. All five should be included if possible, separated by spaces. The values of timbre should include "vocal" (e.g., "bright vocal").
|
194 |
+
|
195 |
+
2. Although our tags have an open vocabulary, we have provided the 200 most commonly used <a href="https://github.com/multimodal-art-projection/YuE/blob/main/top_200_tags.json" id="tags_link" target="_blank">tags</a>. It is recommended to select tags from this list for more stable results.
|
196 |
+
|
197 |
+
3. The order of the tags is flexible. For example, a stable genre control string might look like: "inspiring female uplifting pop airy vocal electronic bright vocal vocal."
|
198 |
+
|
199 |
+
4. Additionally, we have introduced the "Mandarin" and "Cantonese" tags to distinguish between Mandarin and Cantonese, as their lyrics often share similarities.
|
200 |
+
""")
|
201 |
+
genre_txt = gr.Textbox(
|
202 |
+
label="Genre",
|
203 |
+
placeholder="Example: inspiring female uplifting pop airy vocal...",
|
204 |
+
info="Text containing genre tags that describe the musical style or characteristics (e.g., instrumental, genre, mood, vocal timbre, vocal gender). This is used as part of the generation prompt."
|
205 |
+
)
|
206 |
+
lyrics_txt = gr.Textbox(
|
207 |
+
label="Lyrics", lines=12,
|
208 |
+
placeholder="Type the lyrics here...",
|
209 |
+
info="Text containing the lyrics for the music generation. These lyrics will be processed and split into structured segments to guide the generation process."
|
210 |
+
)
|
211 |
|
212 |
with gr.Column():
|
213 |
+
|
214 |
+
num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
|
215 |
+
max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
|
216 |
+
|
|
|
|
|
217 |
submit_btn = gr.Button("Submit")
|
218 |
music_out = gr.Audio(label="Audio Result")
|
219 |
|