cyberosa commited on
Commit
4f94489
·
1 Parent(s): ee43c61

adding plots for the first app and requirements

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Prediction Markets Ranking
3
- emoji: 📚
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.14.0
8
  app_file: app.py
 
1
  ---
2
  title: Prediction Markets Ranking
3
+ emoji: 🌬️
4
+ colorFrom: white
5
+ colorTo: amber
6
  sdk: gradio
7
  sdk_version: 5.14.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from tabs.market_plots import (
4
+ plot_top_10_ranking_by_nr_trades,
5
+ plot_trades_and_traders_ranking,
6
+ plot_wordcloud_topics,
7
+ )
8
+ import logging
9
+ from huggingface_hub import hf_hub_download
10
+
11
+
12
+ def get_logger():
13
+ logger = logging.getLogger(__name__)
14
+ logger.setLevel(logging.DEBUG)
15
+ # stream handler and formatter
16
+ stream_handler = logging.StreamHandler()
17
+ stream_handler.setLevel(logging.DEBUG)
18
+ formatter = logging.Formatter(
19
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
20
+ )
21
+ stream_handler.setFormatter(formatter)
22
+ logger.addHandler(stream_handler)
23
+ return logger
24
+
25
+
26
+ def load_data():
27
+ # closed_markets metrics
28
+ closed_markets_df = hf_hub_download(
29
+ repo_id="valory/Olas-predict-dataset",
30
+ filename="closed_market_metrics.parquet",
31
+ repo_type="dataset",
32
+ )
33
+ df = pd.read_parquet(closed_markets_df)
34
+
35
+ return df
36
+
37
+
38
+ logger = get_logger()
39
+ logger.info("Loading data from Olas predict dataset")
40
+ market_metrics = load_data()
41
+ demo = gr.Blocks()
42
+ with demo:
43
+ gr.HTML("<h1>Prediction markets popularity dashboard </h1>")
44
+ gr.Markdown(
45
+ """This app shows the popularity ranking of prediction markets in Olas Predict. Popularity based on two main metrics:
46
+ * number of generated trades on the market
47
+ * number of traders active on the market.
48
+
49
+ These are computed only for closed markets."""
50
+ )
51
+
52
+ with gr.Tabs():
53
+ with gr.TabItem("🔥 Popularity metrics"):
54
+ with gr.Row():
55
+ gr.Markdown("# 🔝 Top 10 markets based on number of trades")
56
+ with gr.Row():
57
+ top_10_plot = plot_top_10_ranking_by_nr_trades(
58
+ market_metrics=market_metrics
59
+ )
60
+ with gr.Row():
61
+ gr.Markdown(
62
+ "# 🏁 Classification based on nr of trades and nr of traders"
63
+ )
64
+ with gr.Row():
65
+ scatterplot = plot_trades_and_traders_ranking(
66
+ market_metrics=market_metrics
67
+ )
68
+ with gr.Row():
69
+ gr.Markdown(
70
+ "# ☁️ Wordcloud composed with words from most popular markets"
71
+ )
72
+ with gr.Row():
73
+ wordcloud = plot_wordcloud_topics(market_metrics=market_metrics)
74
+
75
+ demo.queue(default_concurrency_limit=40).launch()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ seaborn
3
+ matplotlib
4
+ huggingface-hub
5
+ pyarrow
6
+ requests
7
+ gradio==5.0.0
8
+ plotly
9
+ pydantic
10
+ pydantic_core
11
+ nbformat
12
+ pytz
13
+ duckdb
14
+ wordcloud
15
+ scikit-learn
tabs/__pycache__/market_plots.cpython-311.pyc ADDED
Binary file (4.87 kB). View file
 
tabs/market_plots.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import plotly.express as px
4
+ import matplotlib.pyplot as plt
5
+ from wordcloud import WordCloud
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ import numpy as np
8
+ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
9
+
10
+ # words to remove
11
+ months = [
12
+ "january",
13
+ "february",
14
+ "march",
15
+ "april",
16
+ "may",
17
+ "june",
18
+ "july",
19
+ "august",
20
+ "september",
21
+ "october",
22
+ "november",
23
+ "december",
24
+ ]
25
+ years = ["2024", "2025"]
26
+ filter_words = []
27
+ filter_words.extend(months)
28
+ filter_words.extend(years)
29
+
30
+
31
+ def plot_top_10_ranking_by_nr_trades(market_metrics: pd.DataFrame) -> gr.Plot:
32
+ market_metrics_sorted_by_trades = market_metrics.sort_values(
33
+ by="nr_trades", ascending=False
34
+ )
35
+ top_10_markets = market_metrics_sorted_by_trades.head(10)
36
+
37
+ # Create a hover text column that combines market and nr_trades
38
+ top_10_markets["hover_text"] = (
39
+ top_10_markets["title"]
40
+ + "<br>Number of Traders: "
41
+ + top_10_markets["total_traders"].astype(str)
42
+ )
43
+
44
+ fig = px.bar(
45
+ top_10_markets,
46
+ x="market_id",
47
+ y="nr_trades",
48
+ hover_data=["hover_text"],
49
+ title="Ranking of Markets by Number of Trades",
50
+ )
51
+
52
+ fig.update_layout(
53
+ xaxis_title="Markets",
54
+ yaxis_title="Number of Trades",
55
+ xaxis={"showticklabels": False},
56
+ )
57
+
58
+ return gr.Plot(
59
+ value=fig,
60
+ )
61
+
62
+
63
+ def plot_trades_and_traders_ranking(market_metrics: pd.DataFrame) -> gr.Plot:
64
+ fig = px.scatter(
65
+ market_metrics,
66
+ x="total_traders",
67
+ y="nr_trades",
68
+ color="nr_trades",
69
+ color_continuous_scale="viridis",
70
+ custom_data=["title"],
71
+ )
72
+
73
+ fig.update_layout(
74
+ xaxis_title="Total Number of Traders",
75
+ yaxis_title="Total Number of Trades",
76
+ margin=dict(l=50, r=50, t=70, b=50), # Adjust margins for better spacing
77
+ )
78
+ fig.update_traces(
79
+ hovertemplate="Title: %{customdata[0]}<br>"
80
+ + "Nr trades: %{y}<br>"
81
+ + "Total traders: %{x}<br>",
82
+ )
83
+
84
+ return gr.Plot(
85
+ value=fig,
86
+ )
87
+
88
+
89
+ def plot_wordcloud_topics(market_metrics: pd.DataFrame) -> gr.Plot:
90
+ # Sort the data by 'nr_trades' in descending order
91
+ market_metrics_sorted = market_metrics.sort_values(by="nr_trades", ascending=False)
92
+ # Get the titles of the top 100 markets
93
+ top_100_titles = market_metrics_sorted["title"].head(100)
94
+ # Combine standard English stop words with custom filter words
95
+ all_stop_words = list(set(ENGLISH_STOP_WORDS).union(filter_words))
96
+
97
+ # Create and configure TF-IDF Vectorizer
98
+ tfidf = TfidfVectorizer(
99
+ stop_words=all_stop_words, max_features=100, max_df=0.95, min_df=1
100
+ )
101
+ # Fit and transform the titles
102
+ tfidf_matrix = tfidf.fit_transform(top_100_titles)
103
+
104
+ # Get feature names (terms)
105
+ terms = tfidf.get_feature_names_out()
106
+ # Calculate average TF-IDF scores for each term
107
+ avg_scores = np.mean(tfidf_matrix.toarray(), axis=0)
108
+ word_scores = dict(zip(terms, avg_scores))
109
+
110
+ # Create and generate a word cloud
111
+ wordcloud = WordCloud(
112
+ width=800,
113
+ height=400,
114
+ background_color="white",
115
+ max_words=50,
116
+ prefer_horizontal=0.7,
117
+ ).generate_from_frequencies(word_scores)
118
+
119
+ # Display the word cloud
120
+ fig = plt.figure(figsize=(10, 5))
121
+ plt.imshow(wordcloud, interpolation="bilinear")
122
+ plt.axis("off")
123
+ plt.title("Word Cloud of Market Titles")
124
+ return gr.Plot(
125
+ value=fig,
126
+ )