fffiloni commited on
Commit
40d0762
·
verified ·
1 Parent(s): 6481c18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -26
app.py CHANGED
@@ -67,19 +67,10 @@ def empty_output_folder(output_dir):
67
 
68
  # Function to create a temporary file with string content
69
  def create_temp_file(content, prefix, suffix=".txt"):
70
- temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w", prefix=prefix, suffix=suffix)
71
- # Ensure content ends with newline and normalize line endings
72
- content = content.strip() + "\n\n" # Add extra newline at end
73
- content = content.replace("\r\n", "\n").replace("\r", "\n")
74
- temp_file.write(content)
75
- temp_file.close()
76
-
77
- # Debug: Print file contents
78
- print(f"\nContent written to {prefix}{suffix}:")
79
- print(content)
80
- print("---")
81
-
82
- return temp_file.name
83
 
84
  def get_last_mp3_file(output_dir):
85
  # List all files in the output directory
@@ -121,13 +112,13 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
121
  "python", "infer.py",
122
  "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
123
  "--stage2_model", "m-a-p/YuE-s2-1B-general",
124
- "--genre_txt", f"{genre_txt_path}",
125
- "--lyrics_txt", f"{lyrics_txt_path}",
126
- "--run_n_segments", f"{num_segments}",
127
  "--stage2_batch_size", "4",
128
- "--output_dir", f"{output_dir}",
129
  "--cuda_idx", "0",
130
- "--max_new_tokens", f"{max_new_tokens}",
131
  "--disable_offload_model"
132
  ]
133
 
@@ -191,16 +182,38 @@ with gr.Blocks() as demo:
191
  """)
192
  with gr.Row():
193
  with gr.Column():
194
- genre_txt = gr.Textbox(label="Genre")
195
- lyrics_txt = gr.Textbox(label="Lyrics")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  with gr.Column():
198
- if is_shared_ui:
199
- num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
200
- max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
201
- else:
202
- num_segments = gr.Number(label="Number of Song Segments", value=2, interactive=True)
203
- max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="24000", step=500, value=3000, interactive=True)
204
  submit_btn = gr.Button("Submit")
205
  music_out = gr.Audio(label="Audio Result")
206
 
 
67
 
68
  # Function to create a temporary file with string content
69
  def create_temp_file(content, prefix, suffix=".txt"):
70
+ fd, path = tempfile.mkstemp(prefix=prefix, suffix=suffix)
71
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
72
+ f.write(content)
73
+ return path
 
 
 
 
 
 
 
 
 
74
 
75
  def get_last_mp3_file(output_dir):
76
  # List all files in the output directory
 
112
  "python", "infer.py",
113
  "--stage1_model", "m-a-p/YuE-s1-7B-anneal-en-cot",
114
  "--stage2_model", "m-a-p/YuE-s2-1B-general",
115
+ "--genre_txt", f"'{genre_txt_path}'",
116
+ "--lyrics_txt", f"'{lyrics_txt_path}'",
117
+ "--run_n_segments", str(num_segments),
118
  "--stage2_batch_size", "4",
119
+ "--output_dir", f"'{output_dir}'",
120
  "--cuda_idx", "0",
121
+ "--max_new_tokens", str(max_new_tokens),
122
  "--disable_offload_model"
123
  ]
124
 
 
182
  """)
183
  with gr.Row():
184
  with gr.Column():
185
+ with gr.Accordion("Pro Tips", open=False):
186
+ gr.Markdown(f"""
187
+ **Tips:**
188
+ 1. `genres` should include details like instruments, genre, mood, vocal timbre, and vocal gender.
189
+ 2. The length of `lyrics` segments and the `--max_new_tokens` value should be matched. For example, if `--max_new_tokens` is set to 3000, the maximum duration for a segment is around 30 seconds. Ensure your lyrics fit this time frame.
190
+
191
+
192
+ **Notice:**
193
+ 1. A suitable [Genre] tag consists of five components: genre, instrument, mood, gender, and timbre. All five should be included if possible, separated by spaces. The values of timbre should include "vocal" (e.g., "bright vocal").
194
+
195
+ 2. Although our tags have an open vocabulary, we have provided the 200 most commonly used <a href="https://github.com/multimodal-art-projection/YuE/blob/main/top_200_tags.json" id="tags_link" target="_blank">tags</a>. It is recommended to select tags from this list for more stable results.
196
+
197
+ 3. The order of the tags is flexible. For example, a stable genre control string might look like: "inspiring female uplifting pop airy vocal electronic bright vocal vocal."
198
+
199
+ 4. Additionally, we have introduced the "Mandarin" and "Cantonese" tags to distinguish between Mandarin and Cantonese, as their lyrics often share similarities.
200
+ """)
201
+ genre_txt = gr.Textbox(
202
+ label="Genre",
203
+ placeholder="Example: inspiring female uplifting pop airy vocal...",
204
+ info="Text containing genre tags that describe the musical style or characteristics (e.g., instrumental, genre, mood, vocal timbre, vocal gender). This is used as part of the generation prompt."
205
+ )
206
+ lyrics_txt = gr.Textbox(
207
+ label="Lyrics", lines=12,
208
+ placeholder="Type the lyrics here...",
209
+ info="Text containing the lyrics for the music generation. These lyrics will be processed and split into structured segments to guide the generation process."
210
+ )
211
 
212
  with gr.Column():
213
+
214
+ num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
215
+ max_new_tokens = gr.Slider(label="Max New Tokens", minimum=500, maximum="3000", step=500, value=1500, interactive=True)
216
+
 
 
217
  submit_btn = gr.Button("Submit")
218
  music_out = gr.Audio(label="Audio Result")
219