Spaces:
Sleeping
Sleeping
Commit
·
17f036f
1
Parent(s):
1b48acb
Upload 3 files
Browse files- app.py +67 -0
- requirements.txt +0 -0
- scraper.py +15 -0
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# streamlit app
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd
|
5 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
6 |
+
from transformers import pipeline
|
7 |
+
from scraper import get_latest_news
|
8 |
+
|
9 |
+
# Load FinBERT model and tokenizer
|
10 |
+
finbert = BertForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", num_labels=3)
|
11 |
+
tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")
|
12 |
+
|
13 |
+
# Create sentiment analysis pipeline
|
14 |
+
nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)
|
15 |
+
|
16 |
+
# Function to perform sentiment analysis
|
17 |
+
def analyze_sentiment(text):
|
18 |
+
results = nlp(text)
|
19 |
+
sentiment_label = results[0]["label"]
|
20 |
+
return sentiment_label
|
21 |
+
|
22 |
+
# Function to get sentiment labels for a list of headlines
|
23 |
+
def get_sentiment_labels(headlines_list):
|
24 |
+
sentiment_labels = []
|
25 |
+
for headline in headlines_list:
|
26 |
+
label = analyze_sentiment(headline)
|
27 |
+
sentiment_labels.append(label)
|
28 |
+
return sentiment_labels
|
29 |
+
|
30 |
+
# Function to print a Streamlit table with news headlines and sentiment labels
|
31 |
+
def display_news_sentiment_table(headlines_list, sentiment_labels):
|
32 |
+
df = pd.DataFrame({
|
33 |
+
"Headlines": headlines_list,
|
34 |
+
"Sentiment": sentiment_labels
|
35 |
+
})
|
36 |
+
|
37 |
+
# Function to apply background colors based on sentiment labels
|
38 |
+
def style_func(val):
|
39 |
+
color_dict = {
|
40 |
+
"negative": 'red',
|
41 |
+
"positive": 'green',
|
42 |
+
"neutral": 'gray'
|
43 |
+
}
|
44 |
+
return f"background-color: {color_dict[val.lower()]}"
|
45 |
+
|
46 |
+
# Display the table
|
47 |
+
st.dataframe(df.set_index("Headlines").style.applymap(style_func, subset=["Sentiment"]))
|
48 |
+
|
49 |
+
# Streamlit app
|
50 |
+
st.title("Financial News Sentiment Analysis")
|
51 |
+
|
52 |
+
# Get the latest news headlines and sentiment labels using the scraper
|
53 |
+
latest_news_headlines = get_latest_news()
|
54 |
+
sentiment_labels = get_sentiment_labels(latest_news_headlines)
|
55 |
+
|
56 |
+
# Display the table in the Streamlit app
|
57 |
+
display_news_sentiment_table(latest_news_headlines, sentiment_labels)
|
58 |
+
|
59 |
+
# Refresh button
|
60 |
+
if st.button("Refresh"):
|
61 |
+
st.experimental_rerun()
|
62 |
+
|
63 |
+
# App Description
|
64 |
+
st.markdown("---")
|
65 |
+
st.subheader("Description")
|
66 |
+
st.info("This app uses the [FinBERT](https://huggingface.co/yiyanghkust/finbert-tone) model from Hugging Face to perform sentiment analysis on financial news headlines. The headlines are scraped in real-time from [Finviz](https://finviz.com/). The news headlines displayed on the web app are the latest, and you can click the 'Refresh' button to update the headlines and sentiment analysis.")
|
67 |
+
st.markdown("---")
|
requirements.txt
ADDED
Binary file (7.67 kB). View file
|
|
scraper.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# scraper.py
|
2 |
+
|
3 |
+
# Scrape news headlines using BeautifulSoup
|
4 |
+
from urllib.request import urlopen, Request
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
|
7 |
+
def get_latest_news():
|
8 |
+
url = "https://finviz.com/news.ashx" # Link to news webpage
|
9 |
+
req = Request(url=url, headers={"user-agent": "my-app"})
|
10 |
+
response = urlopen(req)
|
11 |
+
html_content = response.read()
|
12 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
13 |
+
headline_elements = soup.find_all("td", class_="news_link-cell")
|
14 |
+
latest_headlines = [headline_element.find("a", class_="tab-link").text.strip() for headline_element in headline_elements[:15]]
|
15 |
+
return latest_headlines
|