Spaces:

valory
/

prediction_markets_ranking

Running

App Files Files Community

cyberosa commited on 14 days ago

Commit

4f94489

1 Parent(s): ee43c61

adding plots for the first app and requirements

Browse files

Files changed (5) hide show

README.md +3 -3
app.py +75 -0
requirements.txt +15 -0
tabs/__pycache__/market_plots.cpython-311.pyc +0 -0
tabs/market_plots.py +126 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Prediction Markets Ranking
-emoji: 📚
-colorFrom: pink
-colorTo: blue
 sdk: gradio
 sdk_version: 5.14.0
 app_file: app.py

 ---
 title: Prediction Markets Ranking
+emoji: 🌬️
+colorFrom: white
+colorTo: amber
 sdk: gradio
 sdk_version: 5.14.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import pandas as pd
+from tabs.market_plots import (
+    plot_top_10_ranking_by_nr_trades,
+    plot_trades_and_traders_ranking,
+    plot_wordcloud_topics,
+)
+import logging
+from huggingface_hub import hf_hub_download
+def get_logger():
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.DEBUG)
+    # stream handler and formatter
+    stream_handler = logging.StreamHandler()
+    stream_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    stream_handler.setFormatter(formatter)
+    logger.addHandler(stream_handler)
+    return logger
+def load_data():
+    # closed_markets metrics
+    closed_markets_df = hf_hub_download(
+        repo_id="valory/Olas-predict-dataset",
+        filename="closed_market_metrics.parquet",
+        repo_type="dataset",
+    )
+    df = pd.read_parquet(closed_markets_df)
+    return df
+logger = get_logger()
+logger.info("Loading data from Olas predict dataset")
+market_metrics = load_data()
+demo = gr.Blocks()
+with demo:
+    gr.HTML("<h1>Prediction markets popularity dashboard </h1>")
+    gr.Markdown(
+        """This app shows the popularity ranking of prediction markets in Olas Predict. Popularity based on two main metrics:
+                * number of generated trades on the market
+                * number of traders active on the market.
+                These are computed only for closed markets."""
+    )
+    with gr.Tabs():
+        with gr.TabItem("🔥 Popularity metrics"):
+            with gr.Row():
+                gr.Markdown("# 🔝 Top 10 markets based on number of trades")
+            with gr.Row():
+                top_10_plot = plot_top_10_ranking_by_nr_trades(
+                    market_metrics=market_metrics
+                )
+            with gr.Row():
+                gr.Markdown(
+                    "# 🏁 Classification based on nr of trades and nr of traders"
+                )
+            with gr.Row():
+                scatterplot = plot_trades_and_traders_ranking(
+                    market_metrics=market_metrics
+                )
+            with gr.Row():
+                gr.Markdown(
+                    "# ☁️ Wordcloud composed with words from most popular markets"
+                )
+            with gr.Row():
+                wordcloud = plot_wordcloud_topics(market_metrics=market_metrics)
+demo.queue(default_concurrency_limit=40).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+pandas
+seaborn
+matplotlib
+huggingface-hub
+pyarrow
+requests
+gradio==5.0.0
+plotly
+pydantic
+pydantic_core
+nbformat
+pytz
+duckdb
+wordcloud
+scikit-learn

tabs/__pycache__/market_plots.cpython-311.pyc ADDED Viewed

Binary file (4.87 kB). View file

tabs/market_plots.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import pandas as pd
+import gradio as gr
+import plotly.express as px
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+from sklearn.feature_extraction.text import TfidfVectorizer
+import numpy as np
+from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
+# words to remove
+months = [
+    "january",
+    "february",
+    "march",
+    "april",
+    "may",
+    "june",
+    "july",
+    "august",
+    "september",
+    "october",
+    "november",
+    "december",
+]
+years = ["2024", "2025"]
+filter_words = []
+filter_words.extend(months)
+filter_words.extend(years)
+def plot_top_10_ranking_by_nr_trades(market_metrics: pd.DataFrame) -> gr.Plot:
+    market_metrics_sorted_by_trades = market_metrics.sort_values(
+        by="nr_trades", ascending=False
+    )
+    top_10_markets = market_metrics_sorted_by_trades.head(10)
+    # Create a hover text column that combines market and nr_trades
+    top_10_markets["hover_text"] = (
+        top_10_markets["title"]
+        + "<br>Number of Traders: "
+        + top_10_markets["total_traders"].astype(str)
+    )
+    fig = px.bar(
+        top_10_markets,
+        x="market_id",
+        y="nr_trades",
+        hover_data=["hover_text"],
+        title="Ranking of Markets by Number of Trades",
+    )
+    fig.update_layout(
+        xaxis_title="Markets",
+        yaxis_title="Number of Trades",
+        xaxis={"showticklabels": False},
+    )
+    return gr.Plot(
+        value=fig,
+    )
+def plot_trades_and_traders_ranking(market_metrics: pd.DataFrame) -> gr.Plot:
+    fig = px.scatter(
+        market_metrics,
+        x="total_traders",
+        y="nr_trades",
+        color="nr_trades",
+        color_continuous_scale="viridis",
+        custom_data=["title"],
+    )
+    fig.update_layout(
+        xaxis_title="Total Number of Traders",
+        yaxis_title="Total Number of Trades",
+        margin=dict(l=50, r=50, t=70, b=50),  # Adjust margins for better spacing
+    )
+    fig.update_traces(
+        hovertemplate="Title: %{customdata[0]}<br>"
+        + "Nr trades: %{y}<br>"
+        + "Total traders: %{x}<br>",
+    )
+    return gr.Plot(
+        value=fig,
+    )
+def plot_wordcloud_topics(market_metrics: pd.DataFrame) -> gr.Plot:
+    # Sort the data by 'nr_trades' in descending order
+    market_metrics_sorted = market_metrics.sort_values(by="nr_trades", ascending=False)
+    # Get the titles of the top 100 markets
+    top_100_titles = market_metrics_sorted["title"].head(100)
+    # Combine standard English stop words with custom filter words
+    all_stop_words = list(set(ENGLISH_STOP_WORDS).union(filter_words))
+    # Create and configure TF-IDF Vectorizer
+    tfidf = TfidfVectorizer(
+        stop_words=all_stop_words, max_features=100, max_df=0.95, min_df=1
+    )
+    # Fit and transform the titles
+    tfidf_matrix = tfidf.fit_transform(top_100_titles)
+    # Get feature names (terms)
+    terms = tfidf.get_feature_names_out()
+    # Calculate average TF-IDF scores for each term
+    avg_scores = np.mean(tfidf_matrix.toarray(), axis=0)
+    word_scores = dict(zip(terms, avg_scores))
+    # Create and generate a word cloud
+    wordcloud = WordCloud(
+        width=800,
+        height=400,
+        background_color="white",
+        max_words=50,
+        prefer_horizontal=0.7,
+    ).generate_from_frequencies(word_scores)
+    # Display the word cloud
+    fig = plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation="bilinear")
+    plt.axis("off")
+    plt.title("Word Cloud of Market Titles")
+    return gr.Plot(
+        value=fig,
+    )