Spaces:
Running
on
Zero
Running
on
Zero
ButterCream
commited on
Commit
·
a81d387
1
Parent(s):
43feca4
attempt s curve to further reduce artefacts
Browse files
app.py
CHANGED
@@ -201,9 +201,50 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
201 |
embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
|
202 |
speed=speed, t=0.8)
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
audio = np.concatenate((audio, synthaud))
|
208 |
scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)
|
209 |
|
|
|
201 |
embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
|
202 |
speed=speed, t=0.8)
|
203 |
|
204 |
+
|
205 |
+
# S-Curve
|
206 |
+
np_log_99 = np.log(99)
|
207 |
+
def s_curve(p):
|
208 |
+
assert 0 <= p and p <= 1, p
|
209 |
+
if p == 0 or p == 1:
|
210 |
+
return p
|
211 |
+
p = (2*p - 1) * np_log_99
|
212 |
+
s = 1 / (1 + np.exp(-p))
|
213 |
+
s = (s - 0.01) * 50 / 49
|
214 |
+
assert 0 <= s and s <= 1, s
|
215 |
+
return s
|
216 |
+
|
217 |
+
# Post-Processing
|
218 |
+
thresh = np.percentile(np.abs(synthaud), 95)
|
219 |
+
CUT_SAMPLES = 10000 # max samples to cut, in practice only 4-6k are actually cut
|
220 |
+
|
221 |
+
# Leading artefact removal
|
222 |
+
left = CUT_SAMPLES + 1000
|
223 |
+
for j in range(left):
|
224 |
+
if abs(synthaud[j]) > thresh:
|
225 |
+
left = j
|
226 |
+
break
|
227 |
+
|
228 |
+
left = max(0, min(left - 1000, CUT_SAMPLES))
|
229 |
+
synthaud[:left] = 0
|
230 |
+
for k in range(1000):
|
231 |
+
s = s_curve(k / 1000)
|
232 |
+
synthaud[k + left] *= s
|
233 |
+
|
234 |
+
# Trailing artefact removal
|
235 |
+
right = len(synthaud) - CUT_SAMPLES - 1000
|
236 |
+
for j in range(len(synthaud) - 1, right, -1):
|
237 |
+
if abs(synthaud[j]) > thresh:
|
238 |
+
right = j
|
239 |
+
break
|
240 |
+
|
241 |
+
right = min(len(synthaud), max(right + 1000, len(synthaud) - CUT_SAMPLES))
|
242 |
+
synthaud[right:] = 0
|
243 |
+
for k in range(1000):
|
244 |
+
s = s_curve(k / 1000)
|
245 |
+
synthaud[right - 1000 + k] *= s
|
246 |
+
|
247 |
+
|
248 |
audio = np.concatenate((audio, synthaud))
|
249 |
scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)
|
250 |
|