Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,35 +1,45 @@
|
|
1 |
import streamlit as st
|
2 |
from faster_whisper import WhisperModel
|
3 |
from transformers import pipeline
|
4 |
-
import numpy as np
|
5 |
from pydub import AudioSegment
|
6 |
-
|
7 |
|
|
|
8 |
def initialize_model():
|
9 |
"""Initialize the Whisper model and AI detection pipeline."""
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
st.session_state.ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
|
14 |
|
15 |
-
def
|
16 |
-
"""
|
17 |
-
# Convert uploaded file to a WAV file
|
18 |
audio = AudioSegment.from_file(uploaded_file)
|
19 |
-
audio = audio.set_frame_rate(16000).set_channels(1)
|
20 |
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
|
|
|
21 |
|
22 |
-
|
23 |
-
|
|
|
24 |
return [segment.text for segment in segments]
|
25 |
|
26 |
-
def
|
27 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
if len(text.split()) < 5:
|
29 |
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"}
|
|
|
30 |
result = ai_detector(text)[0]
|
|
|
31 |
return {
|
32 |
-
"classification":
|
33 |
"probability": result["score"],
|
34 |
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low"
|
35 |
}
|
@@ -43,8 +53,8 @@ def run_app():
|
|
43 |
Supported audio formats: **.wav**, **.mp3**.
|
44 |
""")
|
45 |
|
46 |
-
#
|
47 |
-
initialize_model()
|
48 |
|
49 |
# File uploader
|
50 |
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
|
@@ -52,15 +62,19 @@ def run_app():
|
|
52 |
if uploaded_file:
|
53 |
st.info("Processing audio... Please wait.")
|
54 |
try:
|
55 |
-
#
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
58 |
st.text_area("Transcription", value=full_transcript, height=300)
|
59 |
|
60 |
-
# AI Detection
|
61 |
st.subheader("AI Detection Results")
|
62 |
-
for text in
|
63 |
-
detection_result =
|
64 |
st.write(f"**Text:** {text}")
|
65 |
st.write(f"- **Classification:** {detection_result['classification']}")
|
66 |
st.write(f"- **Probability:** {detection_result['probability']:.2f}")
|
|
|
1 |
import streamlit as st
|
2 |
from faster_whisper import WhisperModel
|
3 |
from transformers import pipeline
|
|
|
4 |
from pydub import AudioSegment
|
5 |
+
import numpy as np
|
6 |
|
7 |
+
@st.cache_resource
|
8 |
def initialize_model():
|
9 |
"""Initialize the Whisper model and AI detection pipeline."""
|
10 |
+
model = WhisperModel("medium", device="cpu", compute_type="int8")
|
11 |
+
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
|
12 |
+
return model, ai_detector
|
|
|
13 |
|
14 |
+
def preprocess_audio(uploaded_file):
|
15 |
+
"""Preprocess uploaded audio file for transcription."""
|
|
|
16 |
audio = AudioSegment.from_file(uploaded_file)
|
17 |
+
audio = audio.set_frame_rate(16000).set_channels(1).normalize()
|
18 |
samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0
|
19 |
+
return samples
|
20 |
|
21 |
+
def transcribe_audio(samples, model):
|
22 |
+
"""Transcribe audio using Whisper."""
|
23 |
+
segments, _ = model.transcribe(samples, language="en", vad_filter=True, beam_size=3)
|
24 |
return [segment.text for segment in segments]
|
25 |
|
26 |
+
def combine_sentences(transcriptions, group_size=3):
|
27 |
+
"""Combine 2-3 sentences into a single chunk."""
|
28 |
+
combined = []
|
29 |
+
for i in range(0, len(transcriptions), group_size):
|
30 |
+
chunk = " ".join(transcriptions[i:i + group_size])
|
31 |
+
combined.append(chunk)
|
32 |
+
return combined
|
33 |
+
|
34 |
+
def ai_detection(text, ai_detector):
|
35 |
+
"""Perform AI detection on combined text."""
|
36 |
if len(text.split()) < 5:
|
37 |
return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"}
|
38 |
+
|
39 |
result = ai_detector(text)[0]
|
40 |
+
label = "Human" if result["label"] == "Real" else "AI" # Map labels
|
41 |
return {
|
42 |
+
"classification": label,
|
43 |
"probability": result["score"],
|
44 |
"confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low"
|
45 |
}
|
|
|
53 |
Supported audio formats: **.wav**, **.mp3**.
|
54 |
""")
|
55 |
|
56 |
+
# Load models
|
57 |
+
model, ai_detector = initialize_model()
|
58 |
|
59 |
# File uploader
|
60 |
uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
|
|
|
62 |
if uploaded_file:
|
63 |
st.info("Processing audio... Please wait.")
|
64 |
try:
|
65 |
+
# Preprocess and transcribe
|
66 |
+
samples = preprocess_audio(uploaded_file)
|
67 |
+
transcription = transcribe_audio(samples, model)
|
68 |
+
|
69 |
+
# Combine sentences
|
70 |
+
combined_transcription = combine_sentences(transcription, group_size=3)
|
71 |
+
full_transcript = "\n".join(combined_transcription)
|
72 |
st.text_area("Transcription", value=full_transcript, height=300)
|
73 |
|
74 |
+
# AI Detection on combined sentences
|
75 |
st.subheader("AI Detection Results")
|
76 |
+
for text in combined_transcription:
|
77 |
+
detection_result = ai_detection(text, ai_detector)
|
78 |
st.write(f"**Text:** {text}")
|
79 |
st.write(f"- **Classification:** {detection_result['classification']}")
|
80 |
st.write(f"- **Probability:** {detection_result['probability']:.2f}")
|