import gradio as gr from logging_config import log_buffer from transcription_tool import TranscriptTool # Assuming TranscriptionTool is in `transcription_tool.py` # smolagent transcription tool transcript_tool = TranscriptTool() def transcribe_and_stream_logs(file): # Use the path to the uploaded file temp_file_path = file.name # Perform transcription transcription_result = transcript_tool.forward(temp_file_path) # Stream logs log_buffer.seek(0) logs = log_buffer.read() return transcription_result, logs with gr.Blocks() as app: gr.Markdown("# TranscriptTool: Transcribe Audio/Video") gr.Markdown("TranscriptTool is a smolagent tool used to transcribe audio and video files into text. Leveraging OpenAI's Whisper and `ffmpeg`, this tool empowers agents to process multimedia inputs efficiently. It supports robust file handling, dynamic device selection (CPU or GPU), and easy use within smolagents via the Hugging Face API.") file_input = gr.File(label="Upload Audio/Video File", file_types=["audio", "video"]) transcribe_button = gr.Button("Transcribe") transcription_output = gr.Textbox(label="Transcription", lines=10) log_output = gr.Textbox(label="Logs", lines=15) transcribe_button.click( fn=transcribe_and_stream_logs, inputs=file_input, outputs=[transcription_output, log_output] ) if __name__ == "__main__": app.launch()