ButterCream commited on
Commit
a81d387
·
1 Parent(s): 43feca4

attempt s curve to further reduce artefacts

Browse files
Files changed (1) hide show
  1. app.py +44 -3
app.py CHANGED
@@ -201,9 +201,50 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
201
  embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
202
  speed=speed, t=0.8)
203
 
204
- n_trim = int(len(synthaud) * 0.009)
205
- synthaud[:n_trim] = 0
206
- synthaud[-n_trim:] = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  audio = np.concatenate((audio, synthaud))
208
  scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)
209
 
 
201
  embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
202
  speed=speed, t=0.8)
203
 
204
+
205
+ # S-Curve
206
+ np_log_99 = np.log(99)
207
+ def s_curve(p):
208
+ assert 0 <= p and p <= 1, p
209
+ if p == 0 or p == 1:
210
+ return p
211
+ p = (2*p - 1) * np_log_99
212
+ s = 1 / (1 + np.exp(-p))
213
+ s = (s - 0.01) * 50 / 49
214
+ assert 0 <= s and s <= 1, s
215
+ return s
216
+
217
+ # Post-Processing
218
+ thresh = np.percentile(np.abs(synthaud), 95)
219
+ CUT_SAMPLES = 10000 # max samples to cut, in practice only 4-6k are actually cut
220
+
221
+ # Leading artefact removal
222
+ left = CUT_SAMPLES + 1000
223
+ for j in range(left):
224
+ if abs(synthaud[j]) > thresh:
225
+ left = j
226
+ break
227
+
228
+ left = max(0, min(left - 1000, CUT_SAMPLES))
229
+ synthaud[:left] = 0
230
+ for k in range(1000):
231
+ s = s_curve(k / 1000)
232
+ synthaud[k + left] *= s
233
+
234
+ # Trailing artefact removal
235
+ right = len(synthaud) - CUT_SAMPLES - 1000
236
+ for j in range(len(synthaud) - 1, right, -1):
237
+ if abs(synthaud[j]) > thresh:
238
+ right = j
239
+ break
240
+
241
+ right = min(len(synthaud), max(right + 1000, len(synthaud) - CUT_SAMPLES))
242
+ synthaud[right:] = 0
243
+ for k in range(1000):
244
+ s = s_curve(k / 1000)
245
+ synthaud[right - 1000 + k] *= s
246
+
247
+
248
  audio = np.concatenate((audio, synthaud))
249
  scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)
250