eHemink commited on
Commit
11b8b80
·
1 Parent(s): cdeccc8

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -85
app.py DELETED
@@ -1,85 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """app.py.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1zk7xuWSf7ii7zowOqNVLy0FwXYVHYE2V
8
- """
9
-
10
- # Imports
11
- ! pip install PyPDF2
12
- import PyPDF2
13
- import re
14
- ! pip install transformers
15
- import transformers
16
- from transformers import pipeline
17
- ! pip install git+https://github.com/suno-ai/bark.git
18
- from bark import SAMPLE_RATE, generate_audio, preload_models
19
- from scipy.io.wavfile import write as write_wav
20
- from IPython.display import Audio
21
- import gradio as gr
22
-
23
- # Code
24
-
25
- def abstract_to_audio(insert_pdf):
26
- # Extracting the abstract text from the article pdf
27
- def extract_abstract(pdf_file):
28
- # Open the PDF file in read-binary mode
29
- with open(pdf_file, 'rb') as file:
30
- # Create a PDF reader object
31
- pdf_reader = PyPDF2.PdfReader(file)
32
-
33
- # Initialize an empty string to store abstract content
34
- abstract_text = ''
35
-
36
- # Loop through each page in the PDF
37
- for page_num in range(len(pdf_reader.pages)):
38
- # Get the text from the current page
39
- page = pdf_reader.pages[page_num]
40
- text = page.extract_text()
41
-
42
- # Use regular expression to find the "Abstract" section
43
- abstract_match = re.search(r'\bAbstract\b', text, re.IGNORECASE)
44
- if abstract_match:
45
- # Get the text after the "Abstract" heading until the next section, indicated by "Introduction" heading
46
- start_index = abstract_match.end()
47
- next_section_match = re.search(r'\bIntroduction\b', text[start_index:])
48
- if next_section_match:
49
- end_index = start_index + next_section_match.start()
50
- abstract_text = text[start_index:end_index]
51
- else:
52
- # If no next section found, extract text till the end
53
- abstract_text = text[start_index:]
54
- break # Exit loop once abstract is found
55
-
56
- return abstract_text.strip()
57
-
58
-
59
- abstract = extract_abstract(insert_pdf)
60
-
61
- # Creating a summarization pipeline
62
- model = "lidiya/bart-large-xsum-samsum"
63
- pipeline1 = pipeline(task = "summarization", model = model)
64
-
65
- # Summarizing the extracted abstract
66
- summarized = pipeline1(abstract)
67
- print(summarized[0]['summary_text'])
68
- tss_prompt = summarized[0]['summary_text']
69
-
70
- # Generate audio file that speaks the generated sentence using Bark
71
- # download and load all models
72
- preload_models()
73
-
74
- # generate audio from text
75
- text_prompt = tss_prompt
76
- audio_array = generate_audio(text_prompt)
77
-
78
- # play text in notebook
79
- return Audio(audio_array, rate=SAMPLE_RATE)
80
-
81
-
82
-
83
-
84
- my_app = gr.Interface(fn=abstract_to_audio, inputs='file', outputs='audio')
85
- my_app.launch(share=True)