Spaces:

PuristanLabs1
/

GitInsight

Sleeping

App Files Files Community

PuristanLabs1 commited on Jan 14

Commit

db569a8

verified ·

1 Parent(s): a67e492

Create app.py

Browse files

Files changed (1) hide show

app.py +797 -0

app.py ADDED Viewed

	@@ -0,0 +1,797 @@

+import requests
+import pandas as pd
+from datetime import datetime
+import gradio as gr
+import pickle
+from sentence_transformers import SentenceTransformer, util
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import base64
+from io import BytesIO
+import json
+from openai import OpenAI
+from graphviz import Source
+import re
+from PIL import Image
+import os
+import uuid
+# Fixed directory for images
+IMAGE_DIR = "/content/images" #to save the diagram png images
+os.makedirs(IMAGE_DIR, exist_ok=True)  # Create the directory if it doesn't exist
+# Constants for GitHub API
+GITHUB_API_URL = "https://api.github.com/search/repositories"
+ACCESS_TOKEN = os.getenv("github_pat")
+if not ACCESS_TOKEN:
+    raise ValueError("Missing GitHub Personal Access Token.")
+HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
+# Access OpenAI API key from secrets
+OPENAI_API_KEY = os.getenv("openai_key")
+if not OPENAI_API_KEY:
+    raise ValueError("Missing OpenAI API Key. Please set it as a secret in Hugging Face.")
+# Initialize OpenAI client once
+client = OpenAI(api_key=OPENAI_API_KEY)
+# Global variable for allowed extensions
+ALLOWED_EXTENSIONS = [".py", ".js", ".md", ".toml", ".yaml"]
+# Load topic embeddings
+with open("github_topics_embeddings.pkl", "rb") as f:
+    topic_data = pickle.load(f)
+topics = topic_data["topics"]
+embeddings = topic_data["embeddings"]
+discovered_repos = []  # Format: ["owner/repo_name", ...]
+# Function to search for similar topics
+def search_similar_topics(input_text):
+    if not input_text.strip():
+        return "Enter topics to see suggestions."
+    try:
+        model = SentenceTransformer('all-MiniLM-L6-v2')
+        query_embedding = model.encode(input_text, convert_to_tensor=True)
+        similarities = util.pytorch_cos_sim(query_embedding, embeddings)
+        top_indices = similarities[0].argsort(descending=True)[:10]  # Top 5 matches
+        return ", ".join([topics[i] for i in top_indices])
+    except Exception as e:
+        return f"Error in generating suggestions: {str(e)}"
+# Function to fetch repositories with pagination
+def search_repositories(query, sort="stars", order="desc", total_repos=10):
+    all_repos = []
+    per_page = 100 if total_repos > 100 else total_repos
+    total_pages = (total_repos // per_page) + 1
+    for page in range(1, total_pages + 1):
+        params = {
+            "q": query,
+            "sort": sort,
+            "order": order,
+            "per_page": per_page,
+            "page": page,
+        }
+        response = requests.get(GITHUB_API_URL, headers=HEADERS, params=params)
+        print(f"Query: {query}, Status Code: {response.status_code}")
+        print(f"Response: {response.json()}")
+        if response.status_code != 200:
+            raise Exception(f"GitHub API error: {response.status_code} {response.text}")
+        items = response.json().get("items", [])
+        if not items:
+            break
+        all_repos.extend(items)
+        if len(all_repos) >= total_repos:
+            break
+    return all_repos[:total_repos]
+# Function to calculate additional metrics
+def calculate_additional_metrics(repo):
+    created_date = datetime.strptime(repo["created_at"], "%Y-%m-%dT%H:%M:%SZ")
+    updated_date = datetime.strptime(repo["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
+    days_since_creation = (datetime.utcnow() - created_date).days
+    days_since_update = (datetime.utcnow() - updated_date).days
+    star_velocity = repo["stargazers_count"] / days_since_creation if days_since_creation > 0 else 0
+    fork_to_star_ratio = (repo["forks_count"] / repo["stargazers_count"] * 100) if repo["stargazers_count"] > 0 else 0
+    hidden_gem = "Yes" if repo["stargazers_count"] < 500 and repo["forks_count"] < 50 else "No"
+    hidden_gem_trend = "Rising" if star_velocity > 1 else "Stable"
+    rising_score = ((star_velocity * 10) +
+                    (repo["forks_count"] * 0.2) +
+                    (repo.get("watchers_count", 0) * 0.3) +
+                    (1 / (days_since_update + 1) * 20) -
+                    (repo["open_issues_count"] * 0.01))
+    legacy_score = (repo["stargazers_count"] * 0.6) + \
+                   (repo["forks_count"] * 0.3) + \
+                   (repo.get("watchers_count", 0) * 0.1) - \
+                   (repo["open_issues_count"] * 0.05)
+    owner, repo_name = repo["owner"]["login"], repo["name"]
+    repo_details_url = f"https://api.github.com/repos/{owner}/{repo_name}"
+    response = requests.get(repo_details_url, headers=HEADERS)
+    if response.status_code == 200:
+        repo_details = response.json()
+        actual_watchers = repo_details.get("subscribers_count", 0)
+    else:
+        actual_watchers = 0
+    watcher_to_stars_ratio = (actual_watchers / repo["stargazers_count"]) * 100 if repo["stargazers_count"] > 0 else 0
+    return {
+        "Rising Score": round(rising_score, 2),
+        "Legacy Score": round(legacy_score, 2),
+        "Star Velocity (Stars/Day)": round(star_velocity, 2),
+        "Fork-to-Star Ratio (%)": round(fork_to_star_ratio, 2),
+        "Watchers": actual_watchers,
+        "Watcher-to-Stars Ratio (%)": round(watcher_to_stars_ratio, 2),
+        "Language": repo.get("language", "N/A"),
+        "Topics": ", ".join(repo.get("topics", [])),
+        "Hidden Gem": hidden_gem,
+        "Hidden Gem Trend": hidden_gem_trend,
+        "Open Issues": repo["open_issues_count"],
+        "Created At": repo["created_at"],
+        "Last Updated": repo["pushed_at"],
+        "days_since_creation": round(days_since_creation, 2),
+        "days_since_update": round(days_since_update, 2),
+        "URL": repo["html_url"],
+    }
+# Repository Discovery Interface
+def gradio_interface(topics, start_date, language_filter, stars_min, stars_max, forks_min, forks_max, total_repos, sort_order):
+    global discovered_repos
+    if not topics.strip() and not start_date.strip():
+        # If neither topics nor start_date are provided, return a validation error
+        return pd.DataFrame(), "Please provide at least a topic or a start date."
+    topics_list = [topic.strip() for topic in topics.split(",") if topic.strip()]
+    stars_range = (stars_min, stars_max)
+    forks_range = (forks_min, forks_max)
+    df = pd.DataFrame()
+    all_repos_data = []
+    try:
+        # If no topics are provided, fetch repositories by filters only
+        if not topics_list:
+            query = f"stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
+            if start_date.strip():
+                query += f" created:>{start_date.strip()}"
+            if language_filter:
+                query += f" language:{language_filter}"
+            # Fetch repositories
+            repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
+            for repo in repos:
+                repo_data = {
+                    "Name": repo["name"],
+                    "Owner": repo["owner"]["login"],
+                    "Stars": repo["stargazers_count"],
+                    "Forks": repo["forks_count"],
+                    "Description": repo.get("description", "N/A"),
+                }
+                repo_data.update(calculate_additional_metrics(repo))
+                all_repos_data.append(repo_data)
+        else:
+            for topic in topics_list:
+               # Construct query
+                query = f"topic:{topic} stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
+                if start_date.strip():
+                   query += f" created:>{start_date.strip()}"
+                if language_filter:
+                   query += f" language:{language_filter}"
+            # Fetch repositories
+                repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
+                for repo in repos:
+                   repo_data = {
+                    "Name": repo["name"],
+                    "Owner": repo["owner"]["login"],
+                    "Stars": repo["stargazers_count"],
+                    "Forks": repo["forks_count"],
+                    "Description": repo.get("description", "N/A"),
+                   }
+                   repo_data.update(calculate_additional_metrics(repo))
+                   all_repos_data.append(repo_data)
+                 #Add repository to discovered_repos
+                   discovered_repos.append(f"{repo['owner']['login']}/{repo['name']}")
+        if not all_repos_data:
+            return pd.DataFrame(), "No repositories found matching the criteria."
+        # Remove duplicates from discovered_repos
+        discovered_repos = list(set(discovered_repos))
+        # Create DataFrame
+        df = pd.DataFrame(all_repos_data)
+    except Exception as e:
+        print(f"Error: {e}")
+        return pd.DataFrame(), f"Error fetching repositories: {str(e)}"
+    csv_file = None
+    if not df.empty:
+        csv_file = "discovered_repositories.csv"
+        df.to_csv(csv_file, index=False)
+    return df, csv_file
+    #return df, gr.File.update(visible=True, value=csv_file)
+#Organization Watch Interface
+def fetch_org_repositories(org_names, language_filter, stars_min, stars_max, forks_min, forks_max, sort_order, total_repos):
+    try:
+        org_list = [org.strip() for org in org_names.split(",") if org.strip()]
+        if not org_list:
+            return pd.DataFrame(), "Enter at least one organization."
+        all_repos_data = []
+        for org in org_list:
+            # Query repositories for each organization
+            query = f"user:{org} stars:{stars_min}..{stars_max} forks:{forks_min}..{forks_max}"
+            if language_filter:
+                query += f" language:{language_filter}"
+            repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
+            for repo in repos:
+                repo_data = {
+                    "Name": repo["name"],
+                    "Owner": repo["owner"]["login"],
+                    "Stars": repo["stargazers_count"],
+                    "Forks": repo["forks_count"],
+                    "Description": repo.get("description", "N/A"),
+                }
+                repo_data.update(calculate_additional_metrics(repo))
+                all_repos_data.append(repo_data)
+        if not all_repos_data:
+            return pd.DataFrame(), "No repositories found for the specified organizations."
+        # Create DataFrame
+        df = pd.DataFrame(all_repos_data)
+        csv_file = "organization_repositories.csv"
+        df.to_csv(csv_file, index=False)
+        return df, csv_file
+    except Exception as e:
+        print(f"Error in fetch_org_repositories: {e}")
+        return pd.DataFrame(), f"Error: {str(e)}"
+# Function to fetch discovered repositories for the dropdown
+def get_discovered_repos():
+    global discovered_repos
+    return discovered_repos
+def process_readme(owner, repo, branch):
+    # Fetch README content from the specified branch
+    #url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/README.md"
+    url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md"
+    response = requests.get(url, headers=HEADERS)
+    if response.status_code == 200:
+        readme_content = response.text
+    else:
+        #return "Failed to fetch README content.", "", "", None
+        return f"Failed to fetch README content from branch {branch}.", "", "", None
+    # Process README content with OpenAI
+    MODEL = "gpt-4o-mini"
+    completion = client.chat.completions.create(
+        model=MODEL,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant that extracts keywords, named entities, and generates summaries from text."},
+            {"role": "user", "content": f"""
+                Perform the following tasks on the following README file:
+                1. Extract the top 25 most important keywords from the text only.
+                2. Extract named entities (e.g., people, organizations, technologies).
+                3. Summarize the content in one paragraph.
+                Return the results in the following JSON format:
+                {{
+                    "keywords": ["keyword1", "keyword2", ...],
+                    "entities": ["entity1", "entity2", ...],
+                    "summary": "A concise summary of the README."
+                }}
+                README file:
+                {readme_content}
+            """}
+        ],
+        response_format={"type": "json_object"}
+    )
+    result = completion.choices[0].message.content
+    result_json = json.loads(result)
+    keywords = ", ".join(result_json["keywords"])
+    entities = ", ".join(result_json["entities"])
+    summary = result_json["summary"]
+    # Generate word cloud
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(keywords)
+    plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')
+    return keywords, entities, summary, plt
+# Function to get all branches of a repository
+def get_branches(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}/branches"
+    response = requests.get(url, headers=HEADERS)
+    if response.status_code == 200:
+        branches = [branch["name"] for branch in response.json()]
+        return branches
+    else:
+        return []
+# Function to get the default branch of a repository
+def get_default_branch(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}"
+    response = requests.get(url, headers=HEADERS)
+    if response.status_code == 200:
+        repo_data = response.json()
+        return repo_data["default_branch"]
+    else:
+        return None
+def fetch_files(owner, repo, path=""):
+     # Base URL for the GitHub API
+    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" if path else f"https://api.github.com/repos/{owner}/{repo}/contents"
+    response = requests.get(url, headers=HEADERS)
+    if response.status_code != 200:
+        return f"Failed to fetch files: {response.status_code}", []
+    files = []
+    for item in response.json():
+        if item["type"] == "file":  # Only add files
+            # Use the globally defined allowed extensions
+            if any(item["name"].endswith(ext) for ext in ALLOWED_EXTENSIONS):
+                files.append({
+                    "name": item["name"],
+                    "path": item["path"],
+                    "download_url": item["download_url"]
+                })
+        elif item["type"] == "dir":
+            # Recursively fetch files in subdirectories
+            sub_files = fetch_files(owner, repo, item["path"])
+            files.extend(sub_files)
+    return files
+# Function to fetch the content of a specific file
+def fetch_file_content(owner, repo, branch, file_path):
+    file_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{file_path}"
+    response = requests.get(file_url)
+    if response.status_code == 200:
+        return response.text
+    else:
+        return f"Failed to fetch file content: {response.status_code}"
+# Function to query GPT-4o-mini
+def ask_code_question(code_content, question):
+    if not code_content.strip():
+        return "No code content available to analyze."
+    if not question.strip():
+        return "Please enter a question about the code."
+    # Construct the prompt
+    prompt = f"""
+    Here is a Python file from a GitHub repository:
+    {code_content}
+    Please answer the following question about this file:
+    - {question}
+    """
+    try:
+        # Query GPT-4o-mini
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant skilled in understanding code."},
+                {"role": "user", "content": prompt}
+            ]
+        )
+        # Extract and return GPT's response
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        return f"Error querying GPT-4o-mini: {str(e)}"
+from graphviz import Source
+import re
+# Function to generate and clean Graphviz diagrams using GPT-4o-mini
+def generate_dot_code_from_code(code_content, diagram_type):
+    if not code_content.strip():
+        return "No code content available to analyze."
+    # Construct the prompt dynamically based on diagram type
+    prompt = f"""
+    Here is some Python code from a GitHub repository:
+    {code_content}
+    Please generate a {diagram_type} for this code in Graphviz DOT/digraph format. Ensure the DOT code is valid and renderable.
+    Don't include any other text. Don't provide any other explainatory commentry.
+    Ensure the DOT code includes all necessary opening and closing brackets {"brackets"} for graphs and subgraphs.
+    """
+    #Ensure that the output of the code starts with "@startuml" and Ends with "@enduml".
+    try:
+        # Query GPT-4o-mini
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that generates Graphviz DOT code for visualizing Python code. You are restricted to only generate Graphviz Code starting with digraph & ending with }"},
+                {"role": "user", "content": prompt}
+            ]
+        )
+        raw_dot_code = response.choices[0].message.content.strip()
+        validated_dot_code = validate_and_fix_dot_code(raw_dot_code)  # Fix any missing brackets
+        pattern = r"digraph\b[\s\S]*?^\}"
+        match = re.search(pattern, validated_dot_code,re.MULTILINE | re.DOTALL)
+        if match:
+            validated_dot_code = match.group(0)  # Extract the matched content
+        else:
+            return "Failed to extract valid Graphviz code."
+        return validated_dot_code
+    except Exception as e:
+        return f"Error querying GPT-4o-mini: {str(e)}"
+def validate_and_fix_dot_code(dot_code):
+    # Check for unbalanced brackets
+    open_brackets = dot_code.count("{")
+    close_brackets = dot_code.count("}")
+    # If there are missing closing brackets, add them at the end
+    if open_brackets > close_brackets:
+        missing_brackets = open_brackets - close_brackets
+        dot_code += "}" * missing_brackets
+    return dot_code
+def render_dot_code(dot_code, filename=None):
+      """
+    Renders Graphviz DOT code and saves it as a PNG image.
+    Args:
+        dot_code (str): The DOT code to render.
+        filename (str): Name for the output PNG file (without extension).
+    Returns:
+        str: Path to the generated PNG image.
+    """
+    # Ensure the images directory exists
+      output_dir = "/content/images"
+      os.makedirs(output_dir, exist_ok=True)
+    # Save and render the diagram
+      output_path = os.path.join(output_dir, f"{filename}")
+      try:
+        src = Source(dot_code, format="png")
+        rendered_path = src.render(output_path, cleanup=True)
+        # The `rendered_path` will have an extra `.png` extension
+        #png_path = f"{rendered_path}.png"
+        png_path = f"{rendered_path}"
+        # Remove the unnecessary file without the extension
+        #if os.path.exists(rendered_path):
+         #   os.remove(rendered_path)
+        return png_path
+      except Exception as e:
+        return f"Error rendering diagram: {str(e)}"
+import time
+def handle_generate_diagram(code_content, diagram_type, retries=5, wait_time=1):
+    """
+    Handles diagram generation and returns the rendered image for display.
+    Args:
+        code_content (str): The source code to analyze.
+        diagram_type (str): Type of diagram to generate.
+        retries (int): Number of times to retry checking for the file.
+        wait_time (float): Time (in seconds) to wait between retries.
+    Returns:
+        PIL.Image.Image or str: The generated diagram or an error message.
+    """
+    print("Code content received:", code_content)  # Debugging print
+    # Generate and render the diagram
+    image_path = generate_and_render_diagram(code_content, diagram_type)
+    print(f"Generated image path: {image_path}")  # Debugging print
+    # Retry logic for checking file existence
+    for attempt in range(retries):
+        if os.path.exists(image_path):
+            try:
+                return Image.open(image_path)  # Return the image if found
+            except Exception as e:
+                print(f"Error opening image on attempt {attempt + 1}: {e}")
+        else:
+            print(f"Image not found. Retrying... ({attempt + 1}/{retries})")
+            time.sleep(wait_time)  # Wait before the next check
+    # If the image is still not found after retries
+    print(f"Failed to generate image after {retries} retries: {image_path}")
+    return f"Failed to generate image: {image_path}"
+# Gradio Interface
+with gr.Blocks() as demo:
+    # Tab 1: Repository Discovery
+    with gr.Tab("Repository Discovery"):
+        with gr.Row():
+            topics_input = gr.Textbox(
+                label="Topics (comma-separated, leave empty to fetch by date only)",
+                placeholder="e.g., machine-learning, deep-learning (leave empty for date-based search)"
+            )
+            similar_topics = gr.Textbox(
+                label="Similar Topics (based on embeddings)",
+                interactive=False
+            )
+            gr.Button("Get Similar Topics").click(
+                search_similar_topics,
+                inputs=[topics_input],
+                outputs=[similar_topics]
+            )
+        with gr.Row():
+            start_date_input = gr.Textbox(
+                label="Start Date (YYYY-MM-DD, leave empty if not filtering by date)",
+                placeholder="Set to filter recent repositories by date or leave empty"
+            )
+            language_filter = gr.Dropdown(
+                choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
+                label="Language Filter",
+                value=""
+            )
+            stars_min = gr.Number(label="Stars Min", value=10)
+            stars_max = gr.Number(label="Stars Max", value=1000)
+        with gr.Row():
+            forks_min = gr.Number(label="Forks Min", value=0)
+            forks_max = gr.Number(label="Forks Max", value=500)
+            total_repos = gr.Number(label="Total Repositories", value=10, step=10)
+            sort_order = gr.Dropdown(
+                choices=["stars", "forks", "updated"],
+                label="Sort Order",
+                value="stars"
+            )
+        with gr.Row():
+            output_data = gr.Dataframe(label="Discovered Repositories")
+            output_file = gr.File(label="Download CSV", file_count="single")
+        gr.Button("Discover Repositories").click(
+            gradio_interface,
+            inputs=[
+                topics_input, start_date_input, language_filter, stars_min, stars_max,
+                forks_min, forks_max, total_repos, sort_order
+            ],
+            outputs=[output_data, output_file]
+        )
+    # Tab 2: Organization Watch
+    with gr.Tab("Organization Watch"):
+        with gr.Row():
+            org_input = gr.Textbox(
+                label="Organizations (comma-separated)",
+                placeholder="e.g., facebookresearch, openai"
+            )
+        with gr.Row():
+            language_filter = gr.Dropdown(
+                choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
+                label="Language Filter",
+                value=""
+            )
+            stars_min = gr.Number(label="Stars Min", value=10)
+            stars_max = gr.Number(label="Stars Max", value=1000)
+        with gr.Row():
+            forks_min = gr.Number(label="Forks Min", value=0)
+            forks_max = gr.Number(label="Forks Max", value=500)
+            total_repos = gr.Number(label="Total Repositories", value=10, step=10)
+            sort_order = gr.Dropdown(
+                choices=["stars", "forks", "updated"],
+                label="Sort Order",
+                value="stars"
+            )
+        with gr.Row():
+            output_data = gr.Dataframe(label="Repositories by Organizations")
+            output_file = gr.File(label="Download CSV", file_count="single")
+        gr.Button("Fetch Organization Repositories").click(
+            fetch_org_repositories,
+            inputs=[
+                org_input, language_filter, stars_min, stars_max, forks_min, forks_max,
+                sort_order, total_repos
+            ],
+            outputs=[output_data, output_file]
+        )
+    # Tab 3: Code Analysis
+    # Gradio Interface for Code Analysis (Updated)
+    with gr.Tab("Code Analysis"):
+      with gr.Row():
+        repo_dropdown = gr.Dropdown(
+            label="Select Repository",
+            choices=[],
+            interactive=True
+        )
+        refresh_button = gr.Button("Refresh Repositories")
+      with gr.Row():
+        branch_dropdown = gr.Dropdown(
+            label="Select Branch",
+            choices=[],
+            interactive=True
+        )
+      with gr.Row():
+        keywords_output = gr.Textbox(label="Keywords")
+        entities_output = gr.Textbox(label="Entities")
+      with gr.Row():
+        summary_output = gr.Textbox(label="Summary")
+        wordcloud_output = gr.Plot(label="Word Cloud")  # Use Plot instead of Image
+      # New components for displaying files
+      with gr.Row():
+        files_list = gr.Dropdown(
+            label="Files in Repository",
+            choices=[],
+            interactive=True
+       )
+      with gr.Row():
+        file_content_box = gr.Textbox(
+            label="File Content",
+            lines=20,
+            interactive=True
+      )
+      with gr.Row():  # Combine question input and button in the same row
+        question_input = gr.Textbox(
+            label="Ask a Question",
+            placeholder="Enter your question about the code...",
+            lines=1
+        )
+        question_button = gr.Button("Get Answer")
+      with gr.Row():
+        answer_output = gr.Textbox(label="Bot's Answer", lines=10, interactive=False)
+    # Diagram generation interface
+      with gr.Row():
+        diagram_type = gr.Dropdown(
+            label="Select Diagram Type",
+            choices=["Call Graph", "Data Flow Diagram", "Sequence Diagram", "Class Diagram", "Component Diagram", "Workflow Diagram"],
+            value="Call Graph"
+        )
+        generate_diagram_button = gr.Button("Generate Diagram")
+      with gr.Row():
+        #diagram_output = gr.Image(label="Generated Diagram", type="pil")
+        diagram_output = gr.Image(
+          label="Generated Diagram",
+          type="pil",  # Ensures compatibility with PIL.Image.Image
+          elem_id="diagram_output",  # Add an ID for custom styling if needed
+          interactive=False,  # No need for user interaction on the output
+          show_label=True,
+          height=600,  # Set a larger default height
+          width=800,  # Set a larger default width
+        )
+      # Hook up the question button to ask_code_question
+    question_button.click(
+        ask_code_question,
+        inputs=[file_content_box, question_input],  # Inputs: Code content and user question
+        outputs=[answer_output]  # Output: Answer from LLM
+    )
+# Callback to generate and render the diagram
+    def generate_and_render_diagram(code_content, diagram_type):
+        # Generate DOT code
+        dot_code = generate_dot_code_from_code(code_content, diagram_type)
+        # Check for valid DOT code
+        if not dot_code.strip().startswith("digraph"):
+          return "Invalid DOT code generated."
+        unique_filename = f"diagram_{uuid.uuid4().hex}"  # Generate a unique filename
+        return render_dot_code(dot_code, filename=unique_filename)  # Render the diagram
+    generate_diagram_button.click(
+      handle_generate_diagram,
+      inputs=[file_content_box, diagram_type],  # Use file_content_box instead of answer_output
+      outputs=[diagram_output] # Output: PNG file path
+    )
+    # Refresh repository list
+    refresh_button.click(
+          lambda: gr.update(choices=get_discovered_repos()),
+          inputs=[],
+          outputs=[repo_dropdown]
+    )
+    # Update branch dropdown when a repository is selected
+    def update_branches(repo):
+        if repo:
+            owner, repo_name = repo.split("/")
+            branches = get_branches(owner, repo_name)
+            default_branch = get_default_branch(owner, repo_name)
+            return gr.update(choices=branches, value=default_branch)
+        return gr.update(choices=[], value=None)
+    repo_dropdown.change(
+        update_branches,
+        inputs=[repo_dropdown],
+        outputs=[branch_dropdown]
+    )
+    # Analyze README content based on the selected repository and branch
+    def analyze_readme(repo, branch):
+        if repo and branch:
+          owner, repo_name = repo.split("/")  # Extract the owner and repo name.
+        # Pass branch to analyze specific README
+          return process_readme(owner, repo_name, branch)
+        return "No repository or branch selected.", "", "", None
+    repo_dropdown.change(
+        analyze_readme,
+        inputs=[repo_dropdown, branch_dropdown],
+        outputs=[keywords_output, entities_output, summary_output, wordcloud_output]
+    )
+    branch_dropdown.change(
+       analyze_readme,  # Function to call when branch changes
+       inputs=[repo_dropdown, branch_dropdown],  # Pass both repo and branch as inputs
+       outputs=[keywords_output, entities_output, summary_output, wordcloud_output]  # Update outputs
+    )
+      # Fetch files in the selected repository
+    def update_files(repo):
+          global files_data  # To store fetched files for later use
+          if repo:
+            owner, repo_name = repo.split("/")  # Extract owner and repo
+            print("Selected repository:", repo)
+            files = fetch_files(owner, repo_name)  # Call with default path=""
+            files_data = files  # Store the fetched files for later use
+            file_names = [f"{file['name']} ({file['path']})" for file in files]  # Prepare dropdown labels
+            print("Fetched files:", files)  # Debugging to ensure files are fetched correctly
+            print("File names for dropdown:", file_names)  # Debugging to ensure dropdown labels are created
+            return gr.update(choices=file_names, value=None)  # Update the dropdown
+          files_data = []  # Clear files_data if no repo is selected
+          return gr.update(choices=[], value=None)
+    repo_dropdown.change(
+        lambda repo: update_files(repo),
+        inputs=[repo_dropdown],
+        outputs=[files_list]  # Update both files_list and file_content_box
+    )
+   # Fetch and display file content
+    def display_file_content(repo, branch, selected_file):
+        if repo and branch and selected_file:
+            owner, repo_name = repo.split("/")
+            file_path = selected_file.split(" (")[1][:-1]  # Extract the file path from the dropdown label
+            content = fetch_file_content(owner, repo_name, branch, file_path)
+            return content
+        return "No file selected."
+    files_list.change(
+        display_file_content,
+        inputs=[repo_dropdown, branch_dropdown, files_list],
+        outputs=[file_content_box]
+    )
+demo.launch()