cyberosa
commited on
Commit
·
4f94489
1
Parent(s):
ee43c61
adding plots for the first app and requirements
Browse files- README.md +3 -3
- app.py +75 -0
- requirements.txt +15 -0
- tabs/__pycache__/market_plots.cpython-311.pyc +0 -0
- tabs/market_plots.py +126 -0
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
title: Prediction Markets Ranking
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.14.0
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
title: Prediction Markets Ranking
|
3 |
+
emoji: 🌬️
|
4 |
+
colorFrom: white
|
5 |
+
colorTo: amber
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.14.0
|
8 |
app_file: app.py
|
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from tabs.market_plots import (
|
4 |
+
plot_top_10_ranking_by_nr_trades,
|
5 |
+
plot_trades_and_traders_ranking,
|
6 |
+
plot_wordcloud_topics,
|
7 |
+
)
|
8 |
+
import logging
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
+
|
11 |
+
|
12 |
+
def get_logger():
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
logger.setLevel(logging.DEBUG)
|
15 |
+
# stream handler and formatter
|
16 |
+
stream_handler = logging.StreamHandler()
|
17 |
+
stream_handler.setLevel(logging.DEBUG)
|
18 |
+
formatter = logging.Formatter(
|
19 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
20 |
+
)
|
21 |
+
stream_handler.setFormatter(formatter)
|
22 |
+
logger.addHandler(stream_handler)
|
23 |
+
return logger
|
24 |
+
|
25 |
+
|
26 |
+
def load_data():
|
27 |
+
# closed_markets metrics
|
28 |
+
closed_markets_df = hf_hub_download(
|
29 |
+
repo_id="valory/Olas-predict-dataset",
|
30 |
+
filename="closed_market_metrics.parquet",
|
31 |
+
repo_type="dataset",
|
32 |
+
)
|
33 |
+
df = pd.read_parquet(closed_markets_df)
|
34 |
+
|
35 |
+
return df
|
36 |
+
|
37 |
+
|
38 |
+
logger = get_logger()
|
39 |
+
logger.info("Loading data from Olas predict dataset")
|
40 |
+
market_metrics = load_data()
|
41 |
+
demo = gr.Blocks()
|
42 |
+
with demo:
|
43 |
+
gr.HTML("<h1>Prediction markets popularity dashboard </h1>")
|
44 |
+
gr.Markdown(
|
45 |
+
"""This app shows the popularity ranking of prediction markets in Olas Predict. Popularity based on two main metrics:
|
46 |
+
* number of generated trades on the market
|
47 |
+
* number of traders active on the market.
|
48 |
+
|
49 |
+
These are computed only for closed markets."""
|
50 |
+
)
|
51 |
+
|
52 |
+
with gr.Tabs():
|
53 |
+
with gr.TabItem("🔥 Popularity metrics"):
|
54 |
+
with gr.Row():
|
55 |
+
gr.Markdown("# 🔝 Top 10 markets based on number of trades")
|
56 |
+
with gr.Row():
|
57 |
+
top_10_plot = plot_top_10_ranking_by_nr_trades(
|
58 |
+
market_metrics=market_metrics
|
59 |
+
)
|
60 |
+
with gr.Row():
|
61 |
+
gr.Markdown(
|
62 |
+
"# 🏁 Classification based on nr of trades and nr of traders"
|
63 |
+
)
|
64 |
+
with gr.Row():
|
65 |
+
scatterplot = plot_trades_and_traders_ranking(
|
66 |
+
market_metrics=market_metrics
|
67 |
+
)
|
68 |
+
with gr.Row():
|
69 |
+
gr.Markdown(
|
70 |
+
"# ☁️ Wordcloud composed with words from most popular markets"
|
71 |
+
)
|
72 |
+
with gr.Row():
|
73 |
+
wordcloud = plot_wordcloud_topics(market_metrics=market_metrics)
|
74 |
+
|
75 |
+
demo.queue(default_concurrency_limit=40).launch()
|
requirements.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
seaborn
|
3 |
+
matplotlib
|
4 |
+
huggingface-hub
|
5 |
+
pyarrow
|
6 |
+
requests
|
7 |
+
gradio==5.0.0
|
8 |
+
plotly
|
9 |
+
pydantic
|
10 |
+
pydantic_core
|
11 |
+
nbformat
|
12 |
+
pytz
|
13 |
+
duckdb
|
14 |
+
wordcloud
|
15 |
+
scikit-learn
|
tabs/__pycache__/market_plots.cpython-311.pyc
ADDED
Binary file (4.87 kB). View file
|
|
tabs/market_plots.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import gradio as gr
|
3 |
+
import plotly.express as px
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from wordcloud import WordCloud
|
6 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
7 |
+
import numpy as np
|
8 |
+
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
|
9 |
+
|
10 |
+
# words to remove
|
11 |
+
months = [
|
12 |
+
"january",
|
13 |
+
"february",
|
14 |
+
"march",
|
15 |
+
"april",
|
16 |
+
"may",
|
17 |
+
"june",
|
18 |
+
"july",
|
19 |
+
"august",
|
20 |
+
"september",
|
21 |
+
"october",
|
22 |
+
"november",
|
23 |
+
"december",
|
24 |
+
]
|
25 |
+
years = ["2024", "2025"]
|
26 |
+
filter_words = []
|
27 |
+
filter_words.extend(months)
|
28 |
+
filter_words.extend(years)
|
29 |
+
|
30 |
+
|
31 |
+
def plot_top_10_ranking_by_nr_trades(market_metrics: pd.DataFrame) -> gr.Plot:
|
32 |
+
market_metrics_sorted_by_trades = market_metrics.sort_values(
|
33 |
+
by="nr_trades", ascending=False
|
34 |
+
)
|
35 |
+
top_10_markets = market_metrics_sorted_by_trades.head(10)
|
36 |
+
|
37 |
+
# Create a hover text column that combines market and nr_trades
|
38 |
+
top_10_markets["hover_text"] = (
|
39 |
+
top_10_markets["title"]
|
40 |
+
+ "<br>Number of Traders: "
|
41 |
+
+ top_10_markets["total_traders"].astype(str)
|
42 |
+
)
|
43 |
+
|
44 |
+
fig = px.bar(
|
45 |
+
top_10_markets,
|
46 |
+
x="market_id",
|
47 |
+
y="nr_trades",
|
48 |
+
hover_data=["hover_text"],
|
49 |
+
title="Ranking of Markets by Number of Trades",
|
50 |
+
)
|
51 |
+
|
52 |
+
fig.update_layout(
|
53 |
+
xaxis_title="Markets",
|
54 |
+
yaxis_title="Number of Trades",
|
55 |
+
xaxis={"showticklabels": False},
|
56 |
+
)
|
57 |
+
|
58 |
+
return gr.Plot(
|
59 |
+
value=fig,
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
def plot_trades_and_traders_ranking(market_metrics: pd.DataFrame) -> gr.Plot:
|
64 |
+
fig = px.scatter(
|
65 |
+
market_metrics,
|
66 |
+
x="total_traders",
|
67 |
+
y="nr_trades",
|
68 |
+
color="nr_trades",
|
69 |
+
color_continuous_scale="viridis",
|
70 |
+
custom_data=["title"],
|
71 |
+
)
|
72 |
+
|
73 |
+
fig.update_layout(
|
74 |
+
xaxis_title="Total Number of Traders",
|
75 |
+
yaxis_title="Total Number of Trades",
|
76 |
+
margin=dict(l=50, r=50, t=70, b=50), # Adjust margins for better spacing
|
77 |
+
)
|
78 |
+
fig.update_traces(
|
79 |
+
hovertemplate="Title: %{customdata[0]}<br>"
|
80 |
+
+ "Nr trades: %{y}<br>"
|
81 |
+
+ "Total traders: %{x}<br>",
|
82 |
+
)
|
83 |
+
|
84 |
+
return gr.Plot(
|
85 |
+
value=fig,
|
86 |
+
)
|
87 |
+
|
88 |
+
|
89 |
+
def plot_wordcloud_topics(market_metrics: pd.DataFrame) -> gr.Plot:
|
90 |
+
# Sort the data by 'nr_trades' in descending order
|
91 |
+
market_metrics_sorted = market_metrics.sort_values(by="nr_trades", ascending=False)
|
92 |
+
# Get the titles of the top 100 markets
|
93 |
+
top_100_titles = market_metrics_sorted["title"].head(100)
|
94 |
+
# Combine standard English stop words with custom filter words
|
95 |
+
all_stop_words = list(set(ENGLISH_STOP_WORDS).union(filter_words))
|
96 |
+
|
97 |
+
# Create and configure TF-IDF Vectorizer
|
98 |
+
tfidf = TfidfVectorizer(
|
99 |
+
stop_words=all_stop_words, max_features=100, max_df=0.95, min_df=1
|
100 |
+
)
|
101 |
+
# Fit and transform the titles
|
102 |
+
tfidf_matrix = tfidf.fit_transform(top_100_titles)
|
103 |
+
|
104 |
+
# Get feature names (terms)
|
105 |
+
terms = tfidf.get_feature_names_out()
|
106 |
+
# Calculate average TF-IDF scores for each term
|
107 |
+
avg_scores = np.mean(tfidf_matrix.toarray(), axis=0)
|
108 |
+
word_scores = dict(zip(terms, avg_scores))
|
109 |
+
|
110 |
+
# Create and generate a word cloud
|
111 |
+
wordcloud = WordCloud(
|
112 |
+
width=800,
|
113 |
+
height=400,
|
114 |
+
background_color="white",
|
115 |
+
max_words=50,
|
116 |
+
prefer_horizontal=0.7,
|
117 |
+
).generate_from_frequencies(word_scores)
|
118 |
+
|
119 |
+
# Display the word cloud
|
120 |
+
fig = plt.figure(figsize=(10, 5))
|
121 |
+
plt.imshow(wordcloud, interpolation="bilinear")
|
122 |
+
plt.axis("off")
|
123 |
+
plt.title("Word Cloud of Market Titles")
|
124 |
+
return gr.Plot(
|
125 |
+
value=fig,
|
126 |
+
)
|