Spaces:

ShoukanLabs
/

Vokan

Running on Zero

App Files Files Community

ButterCream commited on Sep 14, 2024

Commit

a81d387

1 Parent(s): 43feca4

attempt s curve to further reduce artefacts

Browse files

Files changed (1) hide show

app.py +44 -3

app.py CHANGED Viewed

@@ -201,9 +201,50 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
                                                             embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
                                                             speed=speed, t=0.8)
-        n_trim = int(len(synthaud) * 0.009)
-        synthaud[:n_trim] = 0
-        synthaud[-n_trim:] = 0
         audio = np.concatenate((audio, synthaud))
     scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)

                                                             embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
                                                             speed=speed, t=0.8)
+        # S-Curve
+        np_log_99 = np.log(99)
+        def s_curve(p):
+            assert 0 <= p and p <= 1, p
+            if p == 0 or p == 1:
+                return p
+            p = (2*p - 1) * np_log_99
+            s = 1 / (1 + np.exp(-p))
+            s = (s - 0.01) * 50 / 49
+            assert 0 <= s and s <= 1, s
+            return s
+        # Post-Processing
+        thresh = np.percentile(np.abs(synthaud), 95)
+        CUT_SAMPLES = 10000  # max samples to cut, in practice only 4-6k are actually cut
+        # Leading artefact removal
+        left = CUT_SAMPLES + 1000
+        for j in range(left):
+            if abs(synthaud[j]) > thresh:
+                left = j
+                break
+        left = max(0, min(left - 1000, CUT_SAMPLES))
+        synthaud[:left] = 0
+        for k in range(1000):
+            s = s_curve(k / 1000)
+            synthaud[k + left] *= s
+        # Trailing artefact removal
+        right = len(synthaud) - CUT_SAMPLES - 1000
+        for j in range(len(synthaud) - 1, right, -1):
+            if abs(synthaud[j]) > thresh:
+                right = j
+                break
+        right = min(len(synthaud), max(right + 1000, len(synthaud) - CUT_SAMPLES))
+        synthaud[right:] = 0
+        for k in range(1000):
+            s = s_curve(k / 1000)
+            synthaud[right - 1000 + k] *= s
         audio = np.concatenate((audio, synthaud))
     scaled = np.int16(audio / np.max(np.abs(audio)) * 32767)