Sharan Thakur commited on
Commit
f8c4214
·
1 Parent(s): 6e522f0

Add initial implementation of YouTube audio summarizer with Gemini API integration

Browse files
Files changed (7) hide show
  1. .gitignore +3 -0
  2. README.md +1 -3
  3. ai_client.py +80 -0
  4. app.py +51 -0
  5. extract_audio.py +101 -0
  6. models.py +31 -0
  7. requirements.txt +86 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv/
2
+ output/
3
+ *.m4a
README.md CHANGED
@@ -7,7 +7,5 @@ sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: A summarizer for youtube videos using GeminiAPI
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: YouTube Summarizer is a tool that helps you quickly get the gist of YouTube videos by providing concise summaries.
11
  ---
 
 
ai_client.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ import google.generativeai as genai
4
+ from typing import Generator
5
+ from logging import getLogger
6
+
7
+ logger = getLogger(__name__)
8
+
9
+
10
+ class Gemini:
11
+ def __init__(self):
12
+ load_dotenv()
13
+ api_key = os.getenv("GEMINI_API_KEY")
14
+ if api_key is None:
15
+ raise ValueError("GEMINI_API_KEY is not set in the environment variables")
16
+ genai.configure(api_key=api_key)
17
+
18
+ # Create the model
19
+ self.generation_config = {
20
+ "temperature": 1,
21
+ "top_p": 0.95,
22
+ "top_k": 64,
23
+ "max_output_tokens": 8192,
24
+ "response_mime_type": "text/plain",
25
+ }
26
+
27
+ self.model = genai.GenerativeModel(
28
+ model_name="gemini-1.5-pro",
29
+ generation_config=self.generation_config,
30
+ )
31
+
32
+ def generate_text(
33
+ self, local_file: str, id: str, uploader: str
34
+ ) -> Generator[str, None, None]:
35
+ responses = self.model.generate_content(
36
+ [
37
+ {
38
+ "role": "user",
39
+ "parts": [
40
+ self.__upload_to_gemini(
41
+ id=id, path=local_file, mime_type="audio/m4a"
42
+ ),
43
+ f"""
44
+ Summarize the audio's content to sound like a podcast.\n
45
+ Add fun facts to the summary too.\n
46
+ The uploader of the audio is the following: {uploader}\n
47
+ Add a nice title to the summary too.\n
48
+ """,
49
+ ],
50
+ },
51
+ ],
52
+ stream=True,
53
+ )
54
+ for response in responses:
55
+ yield response.text
56
+
57
+ def __upload_to_gemini(self, id: str, path: str, mime_type=None) -> str:
58
+ """Uploads the given file to Gemini.
59
+
60
+ See https://ai.google.dev/gemini-api/docs/prompting_with_media
61
+ """
62
+ file = genai.upload_file(
63
+ path,
64
+ mime_type=mime_type,
65
+ )
66
+ logger.info(f"Uploaded file '{file.display_name}' as: {file.uri}")
67
+ return file
68
+
69
+
70
+ if __name__ == "__main__":
71
+ from extract_audio import simple_download_audio_from_youtube
72
+
73
+ gemini = Gemini()
74
+ yt_link = input("Enter YouTube link: ")
75
+ yt_res = simple_download_audio_from_youtube(yt_link)
76
+
77
+ for chunk in gemini.generate_text(
78
+ yt_res.get_local_file_path(), yt_res.id, yt_res.uploader
79
+ ):
80
+ print(chunk)
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ai_client import Gemini
2
+ from extract_audio import simple_download_audio_from_youtube
3
+ from models import YTResultWithTranscript
4
+ import gradio as gr
5
+ import os
6
+
7
+ gemini = Gemini()
8
+
9
+
10
+ def summarize_audio(youtube_link: str):
11
+ yt_res = simple_download_audio_from_youtube(youtube_link)
12
+ yt_transcript = YTResultWithTranscript(
13
+ **yt_res.model_dump(), transcript="This is a transcript of the audio."
14
+ )
15
+ for chunk in gemini.generate_text(
16
+ yt_res.get_local_file_path(),
17
+ yt_res.id,
18
+ yt_res.uploader,
19
+ ):
20
+ yt_transcript.transcript += chunk
21
+ yield yt_transcript.model_outputs()
22
+
23
+
24
+ demo = gr.Interface(
25
+ fn=summarize_audio,
26
+ inputs=gr.Textbox(label="YouTube Link"),
27
+ outputs=[
28
+ gr.Textbox(lines=1, label="ID"),
29
+ # title
30
+ gr.Textbox(lines=1, label="Title"),
31
+ # thumbnail_link
32
+ gr.Image(label="Thumbnail Link", type='filepath', show_download_button=True),
33
+ # uploader
34
+ gr.Textbox(lines=1, label="Uploader"),
35
+ # transcript
36
+ gr.Markdown(lines=5, label="Transcript", show_copy_button=True),
37
+ ],
38
+ title="Summarize Audio",
39
+ description="Summarize the content of an audio from a YouTube link.",
40
+ flagging_mode="never",
41
+ api_name="summarize",
42
+ )
43
+
44
+
45
+ def auth_handler(usr, pwd) -> bool:
46
+ username = os.environ.get("USERNAME")
47
+ password = os.environ.get("PASSWORD")
48
+ return usr == username and pwd == password
49
+
50
+
51
+ demo.launch(auth=auth_handler, pwa=True)
extract_audio.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from logging import getLogger
2
+ from typing import Generator, Optional
3
+ from models import YTResult
4
+ import yt_dlp
5
+
6
+ logger = getLogger(__name__)
7
+
8
+
9
+ def __get_audio(result: YTResult) -> Optional[YTResult]:
10
+ try:
11
+ with open(f"output/{result.id}.m4a", "rb") as f:
12
+ return result
13
+ except FileNotFoundError:
14
+ return None
15
+
16
+
17
+ def __my_hook(d):
18
+ if d["status"] == "error":
19
+ logger.info("Error downloading video")
20
+ elif d["status"] == "downloading":
21
+ downloaded_bytes = d.get("downloaded_bytes", 0)
22
+ total_bytes_estimate = d.get("total_bytes_estimate", 1)
23
+ percent = downloaded_bytes / total_bytes_estimate * 100
24
+ logger.info(f"Downloaded {percent:.2f}%")
25
+ elif d["status"] == "finished":
26
+ logger.info("Download finished")
27
+
28
+
29
+ def __get_options():
30
+ return {
31
+ "format": "m4a/bestaudio/best",
32
+ "outtmpl": "output/%(id)s.%(ext)s",
33
+ "progress_hooks": [__my_hook],
34
+ }
35
+
36
+
37
+ def extract_info(link: str) -> YTResult:
38
+ with yt_dlp.YoutubeDL(__get_options()) as ydl:
39
+ info = ydl.extract_info(link, download=False)
40
+ info_dict = ydl.sanitize_info(info)
41
+
42
+ return YTResult(
43
+ id=info_dict["id"],
44
+ title=info_dict["title"],
45
+ thumbnail_link=info_dict["thumbnail"],
46
+ uploader=info_dict["uploader"],
47
+ )
48
+
49
+
50
+ def simple_download_audio_from_youtube(link: str) -> YTResult:
51
+ with yt_dlp.YoutubeDL(__get_options()) as ydl:
52
+ info = ydl.extract_info(link, download=False)
53
+ info_dict = ydl.sanitize_info(info)
54
+
55
+ res = YTResult(
56
+ id=info_dict["id"],
57
+ title=info_dict["title"],
58
+ thumbnail_link=info_dict["thumbnail"],
59
+ uploader=info_dict["uploader"],
60
+ )
61
+
62
+ local_link = __get_audio(res)
63
+ if local_link:
64
+ return res
65
+
66
+ error_code = ydl.download([link])
67
+
68
+ res.error_code = error_code
69
+ return res
70
+
71
+
72
+ def download_audio_from_youtube(link: str) -> Generator[YTResult, None, None]:
73
+ with yt_dlp.YoutubeDL(__get_options()) as ydl:
74
+ info = ydl.extract_info(link, download=False)
75
+ info_dict = ydl.sanitize_info(info)
76
+
77
+ # Yield video metadata
78
+ yield YTResult(
79
+ id=info_dict["id"],
80
+ title=info_dict["title"],
81
+ thumbnail_link=info_dict["thumbnail"],
82
+ uploader=info_dict["uploader"],
83
+ )
84
+
85
+ # Start downloading and yield progress updates
86
+ error_code = ydl.download([link])
87
+
88
+ yield YTResult(
89
+ id=info_dict["id"],
90
+ title=info_dict["title"],
91
+ thumbnail_link=info_dict["thumbnail"],
92
+ uploader=info_dict["uploader"],
93
+ error_code=error_code,
94
+ )
95
+
96
+
97
+ # Example Usage
98
+ if __name__ == "__main__":
99
+ yt_link = "https://www.youtube.com/watch?v=vf7bI5nZyi8"
100
+ for update in download_audio_from_youtube(yt_link):
101
+ logger.info(f"Video Info: {update}")
models.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class YTRequest(BaseModel):
7
+ yt_link: str = Field(description="The YouTube video link to be processed")
8
+
9
+
10
+ class YTResult(BaseModel):
11
+ id: str = Field(description="The YouTube video ID")
12
+ title: str = Field(description="The YouTube video title")
13
+ thumbnail_link: str = Field(description="The YouTube video thumbnail link")
14
+ uploader: str = Field(description="The YouTube video uploader")
15
+ error_code: Optional[int] = Field(description="The error code if any", default=None)
16
+
17
+ def get_local_file_path(self) -> str:
18
+ return f"output/{self.id}.m4a"
19
+
20
+
21
+ class YTResultWithTranscript(YTResult):
22
+ transcript: str = Field(description="The YouTube video transcript")
23
+
24
+ def model_outputs(self) -> list:
25
+ return [
26
+ self.id,
27
+ self.title,
28
+ self.thumbnail_link,
29
+ self.uploader,
30
+ self.transcript,
31
+ ]
requirements.txt ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anthropic==0.43.1
4
+ anyio==4.8.0
5
+ cachetools==5.5.0
6
+ certifi==2024.12.14
7
+ charset-normalizer==3.4.1
8
+ click==8.1.8
9
+ colorama==0.4.6
10
+ distro==1.9.0
11
+ eval_type_backport==0.2.2
12
+ fastapi==0.115.6
13
+ ffmpy==0.5.0
14
+ filelock==3.16.1
15
+ fsspec==2024.12.0
16
+ google-ai-generativelanguage==0.6.10
17
+ google-api-core==2.24.0
18
+ google-api-python-client==2.157.0
19
+ google-auth==2.37.0
20
+ google-auth-httplib2==0.2.0
21
+ google-generativeai==0.8.3
22
+ googleapis-common-protos==1.66.0
23
+ gradio==5.12.0
24
+ gradio_client==1.5.4
25
+ griffe==1.5.5
26
+ groq==0.15.0
27
+ grpcio==1.69.0
28
+ grpcio-status==1.69.0
29
+ h11==0.14.0
30
+ httpcore==1.0.7
31
+ httplib2==0.22.0
32
+ httpx==0.28.1
33
+ huggingface-hub==0.27.1
34
+ idna==3.10
35
+ Jinja2==3.1.5
36
+ jiter==0.8.2
37
+ jsonpath-python==1.0.6
38
+ logfire-api==3.2.0
39
+ markdown-it-py==3.0.0
40
+ MarkupSafe==2.1.5
41
+ mdurl==0.1.2
42
+ mistralai==1.4.0
43
+ mypy-extensions==1.0.0
44
+ numpy==2.2.2
45
+ openai==1.59.9
46
+ orjson==3.10.15
47
+ packaging==24.2
48
+ pandas==2.2.3
49
+ pillow==11.1.0
50
+ proto-plus==1.25.0
51
+ protobuf==5.29.3
52
+ pyasn1==0.6.1
53
+ pyasn1_modules==0.4.1
54
+ pydantic==2.10.5
55
+ pydantic-ai-slim==0.0.19
56
+ pydantic-graph==0.0.19
57
+ pydantic_core==2.27.2
58
+ pydub==0.25.1
59
+ Pygments==2.19.1
60
+ pyparsing==3.2.1
61
+ python-dateutil==2.9.0.post0
62
+ python-dotenv==1.0.1
63
+ python-multipart==0.0.20
64
+ pytz==2024.2
65
+ PyYAML==6.0.2
66
+ requests==2.32.3
67
+ rich==13.9.4
68
+ rsa==4.9
69
+ ruff==0.9.2
70
+ safehttpx==0.1.6
71
+ semantic-version==2.10.0
72
+ shellingham==1.5.4
73
+ six==1.17.0
74
+ sniffio==1.3.1
75
+ starlette==0.41.3
76
+ tomlkit==0.13.2
77
+ tqdm==4.67.1
78
+ typer==0.15.1
79
+ typing-inspect==0.9.0
80
+ typing_extensions==4.12.2
81
+ tzdata==2024.2
82
+ uritemplate==4.1.1
83
+ urllib3==2.3.0
84
+ uvicorn==0.34.0
85
+ websockets==14.2
86
+ yt-dlp==2025.1.15