Spaces:
Running
Running
Sharan Thakur
commited on
Commit
·
f8c4214
1
Parent(s):
6e522f0
Add initial implementation of YouTube audio summarizer with Gemini API integration
Browse files- .gitignore +3 -0
- README.md +1 -3
- ai_client.py +80 -0
- app.py +51 -0
- extract_audio.py +101 -0
- models.py +31 -0
- requirements.txt +86 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
output/
|
3 |
+
*.m4a
|
README.md
CHANGED
@@ -7,7 +7,5 @@ sdk: gradio
|
|
7 |
sdk_version: 5.12.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
short_description:
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
7 |
sdk_version: 5.12.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
short_description: YouTube Summarizer is a tool that helps you quickly get the gist of YouTube videos by providing concise summaries.
|
11 |
---
|
|
|
|
ai_client.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import os
|
3 |
+
import google.generativeai as genai
|
4 |
+
from typing import Generator
|
5 |
+
from logging import getLogger
|
6 |
+
|
7 |
+
logger = getLogger(__name__)
|
8 |
+
|
9 |
+
|
10 |
+
class Gemini:
|
11 |
+
def __init__(self):
|
12 |
+
load_dotenv()
|
13 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
14 |
+
if api_key is None:
|
15 |
+
raise ValueError("GEMINI_API_KEY is not set in the environment variables")
|
16 |
+
genai.configure(api_key=api_key)
|
17 |
+
|
18 |
+
# Create the model
|
19 |
+
self.generation_config = {
|
20 |
+
"temperature": 1,
|
21 |
+
"top_p": 0.95,
|
22 |
+
"top_k": 64,
|
23 |
+
"max_output_tokens": 8192,
|
24 |
+
"response_mime_type": "text/plain",
|
25 |
+
}
|
26 |
+
|
27 |
+
self.model = genai.GenerativeModel(
|
28 |
+
model_name="gemini-1.5-pro",
|
29 |
+
generation_config=self.generation_config,
|
30 |
+
)
|
31 |
+
|
32 |
+
def generate_text(
|
33 |
+
self, local_file: str, id: str, uploader: str
|
34 |
+
) -> Generator[str, None, None]:
|
35 |
+
responses = self.model.generate_content(
|
36 |
+
[
|
37 |
+
{
|
38 |
+
"role": "user",
|
39 |
+
"parts": [
|
40 |
+
self.__upload_to_gemini(
|
41 |
+
id=id, path=local_file, mime_type="audio/m4a"
|
42 |
+
),
|
43 |
+
f"""
|
44 |
+
Summarize the audio's content to sound like a podcast.\n
|
45 |
+
Add fun facts to the summary too.\n
|
46 |
+
The uploader of the audio is the following: {uploader}\n
|
47 |
+
Add a nice title to the summary too.\n
|
48 |
+
""",
|
49 |
+
],
|
50 |
+
},
|
51 |
+
],
|
52 |
+
stream=True,
|
53 |
+
)
|
54 |
+
for response in responses:
|
55 |
+
yield response.text
|
56 |
+
|
57 |
+
def __upload_to_gemini(self, id: str, path: str, mime_type=None) -> str:
|
58 |
+
"""Uploads the given file to Gemini.
|
59 |
+
|
60 |
+
See https://ai.google.dev/gemini-api/docs/prompting_with_media
|
61 |
+
"""
|
62 |
+
file = genai.upload_file(
|
63 |
+
path,
|
64 |
+
mime_type=mime_type,
|
65 |
+
)
|
66 |
+
logger.info(f"Uploaded file '{file.display_name}' as: {file.uri}")
|
67 |
+
return file
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
from extract_audio import simple_download_audio_from_youtube
|
72 |
+
|
73 |
+
gemini = Gemini()
|
74 |
+
yt_link = input("Enter YouTube link: ")
|
75 |
+
yt_res = simple_download_audio_from_youtube(yt_link)
|
76 |
+
|
77 |
+
for chunk in gemini.generate_text(
|
78 |
+
yt_res.get_local_file_path(), yt_res.id, yt_res.uploader
|
79 |
+
):
|
80 |
+
print(chunk)
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ai_client import Gemini
|
2 |
+
from extract_audio import simple_download_audio_from_youtube
|
3 |
+
from models import YTResultWithTranscript
|
4 |
+
import gradio as gr
|
5 |
+
import os
|
6 |
+
|
7 |
+
gemini = Gemini()
|
8 |
+
|
9 |
+
|
10 |
+
def summarize_audio(youtube_link: str):
|
11 |
+
yt_res = simple_download_audio_from_youtube(youtube_link)
|
12 |
+
yt_transcript = YTResultWithTranscript(
|
13 |
+
**yt_res.model_dump(), transcript="This is a transcript of the audio."
|
14 |
+
)
|
15 |
+
for chunk in gemini.generate_text(
|
16 |
+
yt_res.get_local_file_path(),
|
17 |
+
yt_res.id,
|
18 |
+
yt_res.uploader,
|
19 |
+
):
|
20 |
+
yt_transcript.transcript += chunk
|
21 |
+
yield yt_transcript.model_outputs()
|
22 |
+
|
23 |
+
|
24 |
+
demo = gr.Interface(
|
25 |
+
fn=summarize_audio,
|
26 |
+
inputs=gr.Textbox(label="YouTube Link"),
|
27 |
+
outputs=[
|
28 |
+
gr.Textbox(lines=1, label="ID"),
|
29 |
+
# title
|
30 |
+
gr.Textbox(lines=1, label="Title"),
|
31 |
+
# thumbnail_link
|
32 |
+
gr.Image(label="Thumbnail Link", type='filepath', show_download_button=True),
|
33 |
+
# uploader
|
34 |
+
gr.Textbox(lines=1, label="Uploader"),
|
35 |
+
# transcript
|
36 |
+
gr.Markdown(lines=5, label="Transcript", show_copy_button=True),
|
37 |
+
],
|
38 |
+
title="Summarize Audio",
|
39 |
+
description="Summarize the content of an audio from a YouTube link.",
|
40 |
+
flagging_mode="never",
|
41 |
+
api_name="summarize",
|
42 |
+
)
|
43 |
+
|
44 |
+
|
45 |
+
def auth_handler(usr, pwd) -> bool:
|
46 |
+
username = os.environ.get("USERNAME")
|
47 |
+
password = os.environ.get("PASSWORD")
|
48 |
+
return usr == username and pwd == password
|
49 |
+
|
50 |
+
|
51 |
+
demo.launch(auth=auth_handler, pwa=True)
|
extract_audio.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from logging import getLogger
|
2 |
+
from typing import Generator, Optional
|
3 |
+
from models import YTResult
|
4 |
+
import yt_dlp
|
5 |
+
|
6 |
+
logger = getLogger(__name__)
|
7 |
+
|
8 |
+
|
9 |
+
def __get_audio(result: YTResult) -> Optional[YTResult]:
|
10 |
+
try:
|
11 |
+
with open(f"output/{result.id}.m4a", "rb") as f:
|
12 |
+
return result
|
13 |
+
except FileNotFoundError:
|
14 |
+
return None
|
15 |
+
|
16 |
+
|
17 |
+
def __my_hook(d):
|
18 |
+
if d["status"] == "error":
|
19 |
+
logger.info("Error downloading video")
|
20 |
+
elif d["status"] == "downloading":
|
21 |
+
downloaded_bytes = d.get("downloaded_bytes", 0)
|
22 |
+
total_bytes_estimate = d.get("total_bytes_estimate", 1)
|
23 |
+
percent = downloaded_bytes / total_bytes_estimate * 100
|
24 |
+
logger.info(f"Downloaded {percent:.2f}%")
|
25 |
+
elif d["status"] == "finished":
|
26 |
+
logger.info("Download finished")
|
27 |
+
|
28 |
+
|
29 |
+
def __get_options():
|
30 |
+
return {
|
31 |
+
"format": "m4a/bestaudio/best",
|
32 |
+
"outtmpl": "output/%(id)s.%(ext)s",
|
33 |
+
"progress_hooks": [__my_hook],
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
def extract_info(link: str) -> YTResult:
|
38 |
+
with yt_dlp.YoutubeDL(__get_options()) as ydl:
|
39 |
+
info = ydl.extract_info(link, download=False)
|
40 |
+
info_dict = ydl.sanitize_info(info)
|
41 |
+
|
42 |
+
return YTResult(
|
43 |
+
id=info_dict["id"],
|
44 |
+
title=info_dict["title"],
|
45 |
+
thumbnail_link=info_dict["thumbnail"],
|
46 |
+
uploader=info_dict["uploader"],
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
def simple_download_audio_from_youtube(link: str) -> YTResult:
|
51 |
+
with yt_dlp.YoutubeDL(__get_options()) as ydl:
|
52 |
+
info = ydl.extract_info(link, download=False)
|
53 |
+
info_dict = ydl.sanitize_info(info)
|
54 |
+
|
55 |
+
res = YTResult(
|
56 |
+
id=info_dict["id"],
|
57 |
+
title=info_dict["title"],
|
58 |
+
thumbnail_link=info_dict["thumbnail"],
|
59 |
+
uploader=info_dict["uploader"],
|
60 |
+
)
|
61 |
+
|
62 |
+
local_link = __get_audio(res)
|
63 |
+
if local_link:
|
64 |
+
return res
|
65 |
+
|
66 |
+
error_code = ydl.download([link])
|
67 |
+
|
68 |
+
res.error_code = error_code
|
69 |
+
return res
|
70 |
+
|
71 |
+
|
72 |
+
def download_audio_from_youtube(link: str) -> Generator[YTResult, None, None]:
|
73 |
+
with yt_dlp.YoutubeDL(__get_options()) as ydl:
|
74 |
+
info = ydl.extract_info(link, download=False)
|
75 |
+
info_dict = ydl.sanitize_info(info)
|
76 |
+
|
77 |
+
# Yield video metadata
|
78 |
+
yield YTResult(
|
79 |
+
id=info_dict["id"],
|
80 |
+
title=info_dict["title"],
|
81 |
+
thumbnail_link=info_dict["thumbnail"],
|
82 |
+
uploader=info_dict["uploader"],
|
83 |
+
)
|
84 |
+
|
85 |
+
# Start downloading and yield progress updates
|
86 |
+
error_code = ydl.download([link])
|
87 |
+
|
88 |
+
yield YTResult(
|
89 |
+
id=info_dict["id"],
|
90 |
+
title=info_dict["title"],
|
91 |
+
thumbnail_link=info_dict["thumbnail"],
|
92 |
+
uploader=info_dict["uploader"],
|
93 |
+
error_code=error_code,
|
94 |
+
)
|
95 |
+
|
96 |
+
|
97 |
+
# Example Usage
|
98 |
+
if __name__ == "__main__":
|
99 |
+
yt_link = "https://www.youtube.com/watch?v=vf7bI5nZyi8"
|
100 |
+
for update in download_audio_from_youtube(yt_link):
|
101 |
+
logger.info(f"Video Info: {update}")
|
models.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
|
3 |
+
from pydantic import BaseModel, Field
|
4 |
+
|
5 |
+
|
6 |
+
class YTRequest(BaseModel):
|
7 |
+
yt_link: str = Field(description="The YouTube video link to be processed")
|
8 |
+
|
9 |
+
|
10 |
+
class YTResult(BaseModel):
|
11 |
+
id: str = Field(description="The YouTube video ID")
|
12 |
+
title: str = Field(description="The YouTube video title")
|
13 |
+
thumbnail_link: str = Field(description="The YouTube video thumbnail link")
|
14 |
+
uploader: str = Field(description="The YouTube video uploader")
|
15 |
+
error_code: Optional[int] = Field(description="The error code if any", default=None)
|
16 |
+
|
17 |
+
def get_local_file_path(self) -> str:
|
18 |
+
return f"output/{self.id}.m4a"
|
19 |
+
|
20 |
+
|
21 |
+
class YTResultWithTranscript(YTResult):
|
22 |
+
transcript: str = Field(description="The YouTube video transcript")
|
23 |
+
|
24 |
+
def model_outputs(self) -> list:
|
25 |
+
return [
|
26 |
+
self.id,
|
27 |
+
self.title,
|
28 |
+
self.thumbnail_link,
|
29 |
+
self.uploader,
|
30 |
+
self.transcript,
|
31 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
annotated-types==0.7.0
|
3 |
+
anthropic==0.43.1
|
4 |
+
anyio==4.8.0
|
5 |
+
cachetools==5.5.0
|
6 |
+
certifi==2024.12.14
|
7 |
+
charset-normalizer==3.4.1
|
8 |
+
click==8.1.8
|
9 |
+
colorama==0.4.6
|
10 |
+
distro==1.9.0
|
11 |
+
eval_type_backport==0.2.2
|
12 |
+
fastapi==0.115.6
|
13 |
+
ffmpy==0.5.0
|
14 |
+
filelock==3.16.1
|
15 |
+
fsspec==2024.12.0
|
16 |
+
google-ai-generativelanguage==0.6.10
|
17 |
+
google-api-core==2.24.0
|
18 |
+
google-api-python-client==2.157.0
|
19 |
+
google-auth==2.37.0
|
20 |
+
google-auth-httplib2==0.2.0
|
21 |
+
google-generativeai==0.8.3
|
22 |
+
googleapis-common-protos==1.66.0
|
23 |
+
gradio==5.12.0
|
24 |
+
gradio_client==1.5.4
|
25 |
+
griffe==1.5.5
|
26 |
+
groq==0.15.0
|
27 |
+
grpcio==1.69.0
|
28 |
+
grpcio-status==1.69.0
|
29 |
+
h11==0.14.0
|
30 |
+
httpcore==1.0.7
|
31 |
+
httplib2==0.22.0
|
32 |
+
httpx==0.28.1
|
33 |
+
huggingface-hub==0.27.1
|
34 |
+
idna==3.10
|
35 |
+
Jinja2==3.1.5
|
36 |
+
jiter==0.8.2
|
37 |
+
jsonpath-python==1.0.6
|
38 |
+
logfire-api==3.2.0
|
39 |
+
markdown-it-py==3.0.0
|
40 |
+
MarkupSafe==2.1.5
|
41 |
+
mdurl==0.1.2
|
42 |
+
mistralai==1.4.0
|
43 |
+
mypy-extensions==1.0.0
|
44 |
+
numpy==2.2.2
|
45 |
+
openai==1.59.9
|
46 |
+
orjson==3.10.15
|
47 |
+
packaging==24.2
|
48 |
+
pandas==2.2.3
|
49 |
+
pillow==11.1.0
|
50 |
+
proto-plus==1.25.0
|
51 |
+
protobuf==5.29.3
|
52 |
+
pyasn1==0.6.1
|
53 |
+
pyasn1_modules==0.4.1
|
54 |
+
pydantic==2.10.5
|
55 |
+
pydantic-ai-slim==0.0.19
|
56 |
+
pydantic-graph==0.0.19
|
57 |
+
pydantic_core==2.27.2
|
58 |
+
pydub==0.25.1
|
59 |
+
Pygments==2.19.1
|
60 |
+
pyparsing==3.2.1
|
61 |
+
python-dateutil==2.9.0.post0
|
62 |
+
python-dotenv==1.0.1
|
63 |
+
python-multipart==0.0.20
|
64 |
+
pytz==2024.2
|
65 |
+
PyYAML==6.0.2
|
66 |
+
requests==2.32.3
|
67 |
+
rich==13.9.4
|
68 |
+
rsa==4.9
|
69 |
+
ruff==0.9.2
|
70 |
+
safehttpx==0.1.6
|
71 |
+
semantic-version==2.10.0
|
72 |
+
shellingham==1.5.4
|
73 |
+
six==1.17.0
|
74 |
+
sniffio==1.3.1
|
75 |
+
starlette==0.41.3
|
76 |
+
tomlkit==0.13.2
|
77 |
+
tqdm==4.67.1
|
78 |
+
typer==0.15.1
|
79 |
+
typing-inspect==0.9.0
|
80 |
+
typing_extensions==4.12.2
|
81 |
+
tzdata==2024.2
|
82 |
+
uritemplate==4.1.1
|
83 |
+
urllib3==2.3.0
|
84 |
+
uvicorn==0.34.0
|
85 |
+
websockets==14.2
|
86 |
+
yt-dlp==2025.1.15
|