Spaces:
Sleeping
Sleeping
File size: 2,727 Bytes
ffde8eb 9aaba9b ffde8eb c2024d5 fcb35fa bda9523 0bcd58d 90d99db 1b59031 90d99db 1b59031 90d99db c2024d5 5561bb0 c2024d5 90d99db d9b10e7 c2024d5 ffde8eb 0f893bd 0bcd58d bda9523 c2024d5 1b59031 ffde8eb b893b69 ad23ad1 ee00b19 ad23ad1 ee00b19 459f414 ffde8eb 9e391aa ee00b19 90d99db c2024d5 0bcd58d 459f414 ffde8eb 339c973 ffde8eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import onnxruntime
from transformers import AutoTokenizer
import torch
import os
from transformers import pipeline
import subprocess
import moviepy.editor as mp
import base64
token = AutoTokenizer.from_pretrained('ProsusAI/finbert')
inf_session = onnxruntime.InferenceSession('classifier-quantized2.onnx')
input_name = inf_session.get_inputs()[0].name
output_name = inf_session.get_outputs()[0].name
classes = ['Art', 'Astrology', 'Biology', 'Chemistry', 'Economics', 'History', 'Literature', 'Philosophy', 'Physics', 'Politics', 'Psychology', 'Sociology']
### --- Audio/Video to txt ---###
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("automatic-speech-recognition",
model="openai/whisper-tiny.en",
chunk_length_s=30, device=device)
### --- Text Summary --- ###
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
def video_identity(video):
transcription = pipe(video)["text"]
return transcription
def summary(text):
text = text.split('.')
max_chunk = 500
current_chunk = 0
chunks = []
for t in text:
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk]) + len(t.split(' ')) <= max_chunk:
chunks[current_chunk].extend(t.split(' '))
else:
current_chunk += 1
chunks.append(t.split(' '))
else:
chunks.append(t.split(' '))
for chunk in range(len(chunks)):
chunks[chunk] =' '.join(chunks[chunk])
summ = summarizer(chunks,max_length = 100)
return summ
def classify(video_file,encoded_video):
if encoded_video != "":
decoded_file_data = base64.b64decode(encoded_video)
with open("temp_video.mp4", "wb") as f:
f.write(decoded_file_data)
video_file = "temp_video.mp4"
clip = mp.VideoFileClip(video_file)
clip.audio.write_audiofile(r"audio.wav")
full_text = video_identity(r"audio.wav")
sum = summary(full_text)[0]['summary_text']
input_ids = token(sum)['input_ids'][:512]
logits = inf_session.run([output_name],{input_name : [input_ids]})[0]
logits = torch.FloatTensor(logits)
probs = torch.sigmoid(logits)[0]
probs = list(probs)
label = classes[probs.index(max(probs))]
final = {
'text':full_text,
'summary':sum,
'label':label,
}
return final
text1 = gr.Textbox(label="Text")
text2 = gr.Textbox(label="Summary")
iface = gr.Interface(fn=classify,
inputs=['video','text'],
outputs = ['json'])
iface.launch(inline=False)
|