Spaces:

PuristanLabs1
/

GitInsight

Sleeping

App Files Files Community

GitInsight / app.py

PuristanLabs1

Create app.py

db569a8 verified about 1 month ago

raw

history blame

31.3 kB

	import requests
	import pandas as pd
	from datetime import datetime
	import gradio as gr
	import pickle
	from sentence_transformers import SentenceTransformer, util
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import base64
	from io import BytesIO
	import json
	from openai import OpenAI
	from graphviz import Source
	import re
	from PIL import Image
	import os
	import uuid

	# Fixed directory for images
	IMAGE_DIR = "/content/images" #to save the diagram png images
	os.makedirs(IMAGE_DIR, exist_ok=True) # Create the directory if it doesn't exist

	# Constants for GitHub API
	GITHUB_API_URL = "https://api.github.com/search/repositories"
	ACCESS_TOKEN = os.getenv("github_pat")
	if not ACCESS_TOKEN:
	raise ValueError("Missing GitHub Personal Access Token.")
	HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
	# Access OpenAI API key from secrets
	OPENAI_API_KEY = os.getenv("openai_key")
	if not OPENAI_API_KEY:
	raise ValueError("Missing OpenAI API Key. Please set it as a secret in Hugging Face.")

	# Initialize OpenAI client once
	client = OpenAI(api_key=OPENAI_API_KEY)

	# Global variable for allowed extensions
	ALLOWED_EXTENSIONS = [".py", ".js", ".md", ".toml", ".yaml"]

	# Load topic embeddings
	with open("github_topics_embeddings.pkl", "rb") as f:
	topic_data = pickle.load(f)

	topics = topic_data["topics"]
	embeddings = topic_data["embeddings"]

	discovered_repos = [] # Format: ["owner/repo_name", ...]

	# Function to search for similar topics
	def search_similar_topics(input_text):
	if not input_text.strip():
	return "Enter topics to see suggestions."
	try:
	model = SentenceTransformer('all-MiniLM-L6-v2')
	query_embedding = model.encode(input_text, convert_to_tensor=True)
	similarities = util.pytorch_cos_sim(query_embedding, embeddings)
	top_indices = similarities[0].argsort(descending=True)[:10] # Top 5 matches
	return ", ".join([topics[i] for i in top_indices])
	except Exception as e:
	return f"Error in generating suggestions: {str(e)}"

	# Function to fetch repositories with pagination
	def search_repositories(query, sort="stars", order="desc", total_repos=10):
	all_repos = []
	per_page = 100 if total_repos > 100 else total_repos
	total_pages = (total_repos // per_page) + 1

	for page in range(1, total_pages + 1):
	params = {
	"q": query,
	"sort": sort,
	"order": order,
	"per_page": per_page,
	"page": page,
	}
	response = requests.get(GITHUB_API_URL, headers=HEADERS, params=params)
	print(f"Query: {query}, Status Code: {response.status_code}")
	print(f"Response: {response.json()}")

	if response.status_code != 200:
	raise Exception(f"GitHub API error: {response.status_code} {response.text}")

	items = response.json().get("items", [])
	if not items:
	break

	all_repos.extend(items)
	if len(all_repos) >= total_repos:
	break

	return all_repos[:total_repos]

	# Function to calculate additional metrics
	def calculate_additional_metrics(repo):
	created_date = datetime.strptime(repo["created_at"], "%Y-%m-%dT%H:%M:%SZ")
	updated_date = datetime.strptime(repo["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
	days_since_creation = (datetime.utcnow() - created_date).days
	days_since_update = (datetime.utcnow() - updated_date).days
	star_velocity = repo["stargazers_count"] / days_since_creation if days_since_creation > 0 else 0
	fork_to_star_ratio = (repo["forks_count"] / repo["stargazers_count"] * 100) if repo["stargazers_count"] > 0 else 0
	hidden_gem = "Yes" if repo["stargazers_count"] < 500 and repo["forks_count"] < 50 else "No"
	hidden_gem_trend = "Rising" if star_velocity > 1 else "Stable"
	rising_score = ((star_velocity * 10) +
	(repo["forks_count"] * 0.2) +
	(repo.get("watchers_count", 0) * 0.3) +
	(1 / (days_since_update + 1) * 20) -
	(repo["open_issues_count"] * 0.01))
	legacy_score = (repo["stargazers_count"] * 0.6) + \
	(repo["forks_count"] * 0.3) + \
	(repo.get("watchers_count", 0) * 0.1) - \
	(repo["open_issues_count"] * 0.05)
	owner, repo_name = repo["owner"]["login"], repo["name"]
	repo_details_url = f"https://api.github.com/repos/{owner}/{repo_name}"
	response = requests.get(repo_details_url, headers=HEADERS)
	if response.status_code == 200:
	repo_details = response.json()
	actual_watchers = repo_details.get("subscribers_count", 0)
	else:
	actual_watchers = 0
	watcher_to_stars_ratio = (actual_watchers / repo["stargazers_count"]) * 100 if repo["stargazers_count"] > 0 else 0

	return {
	"Rising Score": round(rising_score, 2),
	"Legacy Score": round(legacy_score, 2),
	"Star Velocity (Stars/Day)": round(star_velocity, 2),
	"Fork-to-Star Ratio (%)": round(fork_to_star_ratio, 2),
	"Watchers": actual_watchers,
	"Watcher-to-Stars Ratio (%)": round(watcher_to_stars_ratio, 2),
	"Language": repo.get("language", "N/A"),
	"Topics": ", ".join(repo.get("topics", [])),
	"Hidden Gem": hidden_gem,
	"Hidden Gem Trend": hidden_gem_trend,
	"Open Issues": repo["open_issues_count"],
	"Created At": repo["created_at"],
	"Last Updated": repo["pushed_at"],
	"days_since_creation": round(days_since_creation, 2),
	"days_since_update": round(days_since_update, 2),
	"URL": repo["html_url"],
	}

	# Repository Discovery Interface
	def gradio_interface(topics, start_date, language_filter, stars_min, stars_max, forks_min, forks_max, total_repos, sort_order):
	global discovered_repos

	if not topics.strip() and not start_date.strip():
	# If neither topics nor start_date are provided, return a validation error
	return pd.DataFrame(), "Please provide at least a topic or a start date."

	topics_list = [topic.strip() for topic in topics.split(",") if topic.strip()]
	stars_range = (stars_min, stars_max)
	forks_range = (forks_min, forks_max)
	df = pd.DataFrame()
	all_repos_data = []

	try:
	# If no topics are provided, fetch repositories by filters only
	if not topics_list:
	query = f"stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
	if start_date.strip():
	query += f" created:>{start_date.strip()}"
	if language_filter:
	query += f" language:{language_filter}"

	# Fetch repositories
	repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
	for repo in repos:
	repo_data = {
	"Name": repo["name"],
	"Owner": repo["owner"]["login"],
	"Stars": repo["stargazers_count"],
	"Forks": repo["forks_count"],
	"Description": repo.get("description", "N/A"),
	}
	repo_data.update(calculate_additional_metrics(repo))
	all_repos_data.append(repo_data)
	else:
	for topic in topics_list:
	# Construct query
	query = f"topic:{topic} stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
	if start_date.strip():
	query += f" created:>{start_date.strip()}"
	if language_filter:
	query += f" language:{language_filter}"

	# Fetch repositories
	repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
	for repo in repos:
	repo_data = {
	"Name": repo["name"],
	"Owner": repo["owner"]["login"],
	"Stars": repo["stargazers_count"],
	"Forks": repo["forks_count"],
	"Description": repo.get("description", "N/A"),
	}
	repo_data.update(calculate_additional_metrics(repo))
	all_repos_data.append(repo_data)
	#Add repository to discovered_repos
	discovered_repos.append(f"{repo['owner']['login']}/{repo['name']}")

	if not all_repos_data:
	return pd.DataFrame(), "No repositories found matching the criteria."



	# Remove duplicates from discovered_repos
	discovered_repos = list(set(discovered_repos))

	# Create DataFrame
	df = pd.DataFrame(all_repos_data)

	except Exception as e:
	print(f"Error: {e}")
	return pd.DataFrame(), f"Error fetching repositories: {str(e)}"

	csv_file = None
	if not df.empty:
	csv_file = "discovered_repositories.csv"
	df.to_csv(csv_file, index=False)
	return df, csv_file
	#return df, gr.File.update(visible=True, value=csv_file)

	#Organization Watch Interface
	def fetch_org_repositories(org_names, language_filter, stars_min, stars_max, forks_min, forks_max, sort_order, total_repos):
	try:
	org_list = [org.strip() for org in org_names.split(",") if org.strip()]
	if not org_list:
	return pd.DataFrame(), "Enter at least one organization."

	all_repos_data = []
	for org in org_list:
	# Query repositories for each organization
	query = f"user:{org} stars:{stars_min}..{stars_max} forks:{forks_min}..{forks_max}"
	if language_filter:
	query += f" language:{language_filter}"

	repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)

	for repo in repos:
	repo_data = {
	"Name": repo["name"],
	"Owner": repo["owner"]["login"],
	"Stars": repo["stargazers_count"],
	"Forks": repo["forks_count"],
	"Description": repo.get("description", "N/A"),
	}
	repo_data.update(calculate_additional_metrics(repo))
	all_repos_data.append(repo_data)

	if not all_repos_data:
	return pd.DataFrame(), "No repositories found for the specified organizations."

	# Create DataFrame
	df = pd.DataFrame(all_repos_data)
	csv_file = "organization_repositories.csv"
	df.to_csv(csv_file, index=False)
	return df, csv_file

	except Exception as e:
	print(f"Error in fetch_org_repositories: {e}")
	return pd.DataFrame(), f"Error: {str(e)}"

	# Function to fetch discovered repositories for the dropdown
	def get_discovered_repos():
	global discovered_repos
	return discovered_repos

	def process_readme(owner, repo, branch):
	# Fetch README content from the specified branch
	#url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/README.md"
	url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md"
	response = requests.get(url, headers=HEADERS)
	if response.status_code == 200:
	readme_content = response.text
	else:
	#return "Failed to fetch README content.", "", "", None
	return f"Failed to fetch README content from branch {branch}.", "", "", None

	# Process README content with OpenAI
	MODEL = "gpt-4o-mini"

	completion = client.chat.completions.create(
	model=MODEL,
	messages=[
	{"role": "system", "content": "You are a helpful assistant that extracts keywords, named entities, and generates summaries from text."},
	{"role": "user", "content": f"""
	Perform the following tasks on the following README file:
	1. Extract the top 25 most important keywords from the text only.
	2. Extract named entities (e.g., people, organizations, technologies).
	3. Summarize the content in one paragraph.

	Return the results in the following JSON format:
	{{
	"keywords": ["keyword1", "keyword2", ...],
	"entities": ["entity1", "entity2", ...],
	"summary": "A concise summary of the README."
	}}

	README file:
	{readme_content}
	"""}
	],
	response_format={"type": "json_object"}
	)

	result = completion.choices[0].message.content
	result_json = json.loads(result)

	keywords = ", ".join(result_json["keywords"])
	entities = ", ".join(result_json["entities"])
	summary = result_json["summary"]

	# Generate word cloud
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(keywords)
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')

	return keywords, entities, summary, plt

	# Function to get all branches of a repository
	def get_branches(owner, repo):
	url = f"https://api.github.com/repos/{owner}/{repo}/branches"
	response = requests.get(url, headers=HEADERS)
	if response.status_code == 200:
	branches = [branch["name"] for branch in response.json()]
	return branches
	else:
	return []

	# Function to get the default branch of a repository
	def get_default_branch(owner, repo):
	url = f"https://api.github.com/repos/{owner}/{repo}"
	response = requests.get(url, headers=HEADERS)
	if response.status_code == 200:
	repo_data = response.json()
	return repo_data["default_branch"]
	else:
	return None

	def fetch_files(owner, repo, path=""):

	# Base URL for the GitHub API
	url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" if path else f"https://api.github.com/repos/{owner}/{repo}/contents"
	response = requests.get(url, headers=HEADERS)

	if response.status_code != 200:
	return f"Failed to fetch files: {response.status_code}", []

	files = []
	for item in response.json():
	if item["type"] == "file": # Only add files
	# Use the globally defined allowed extensions
	if any(item["name"].endswith(ext) for ext in ALLOWED_EXTENSIONS):
	files.append({
	"name": item["name"],
	"path": item["path"],
	"download_url": item["download_url"]
	})
	elif item["type"] == "dir":
	# Recursively fetch files in subdirectories
	sub_files = fetch_files(owner, repo, item["path"])
	files.extend(sub_files)
	return files


	# Function to fetch the content of a specific file
	def fetch_file_content(owner, repo, branch, file_path):
	file_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{file_path}"
	response = requests.get(file_url)

	if response.status_code == 200:
	return response.text
	else:
	return f"Failed to fetch file content: {response.status_code}"

	# Function to query GPT-4o-mini
	def ask_code_question(code_content, question):
	if not code_content.strip():
	return "No code content available to analyze."
	if not question.strip():
	return "Please enter a question about the code."

	# Construct the prompt
	prompt = f"""
	Here is a Python file from a GitHub repository:

	{code_content}

	Please answer the following question about this file:
	- {question}
	"""

	try:
	# Query GPT-4o-mini
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are a helpful assistant skilled in understanding code."},
	{"role": "user", "content": prompt}
	]
	)
	# Extract and return GPT's response
	return response.choices[0].message.content.strip()
	except Exception as e:
	return f"Error querying GPT-4o-mini: {str(e)}"

	from graphviz import Source
	import re

	# Function to generate and clean Graphviz diagrams using GPT-4o-mini
	def generate_dot_code_from_code(code_content, diagram_type):
	if not code_content.strip():
	return "No code content available to analyze."

	# Construct the prompt dynamically based on diagram type
	prompt = f"""
	Here is some Python code from a GitHub repository:

	{code_content}

	Please generate a {diagram_type} for this code in Graphviz DOT/digraph format. Ensure the DOT code is valid and renderable.
	Don't include any other text. Don't provide any other explainatory commentry.
	Ensure the DOT code includes all necessary opening and closing brackets {"brackets"} for graphs and subgraphs.
	"""
	#Ensure that the output of the code starts with "@startuml" and Ends with "@enduml".
	try:
	# Query GPT-4o-mini
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": "You are a helpful assistant that generates Graphviz DOT code for visualizing Python code. You are restricted to only generate Graphviz Code starting with digraph & ending with }"},
	{"role": "user", "content": prompt}
	]
	)
	raw_dot_code = response.choices[0].message.content.strip()
	validated_dot_code = validate_and_fix_dot_code(raw_dot_code) # Fix any missing brackets

	pattern = r"digraph\b[\s\S]*?^\}"
	match = re.search(pattern, validated_dot_code,re.MULTILINE \| re.DOTALL)
	if match:
	validated_dot_code = match.group(0) # Extract the matched content
	else:
	return "Failed to extract valid Graphviz code."

	return validated_dot_code
	except Exception as e:
	return f"Error querying GPT-4o-mini: {str(e)}"

	def validate_and_fix_dot_code(dot_code):
	# Check for unbalanced brackets
	open_brackets = dot_code.count("{")
	close_brackets = dot_code.count("}")

	# If there are missing closing brackets, add them at the end
	if open_brackets > close_brackets:
	missing_brackets = open_brackets - close_brackets
	dot_code += "}" * missing_brackets

	return dot_code


	def render_dot_code(dot_code, filename=None):

	"""
	Renders Graphviz DOT code and saves it as a PNG image.

	Args:
	dot_code (str): The DOT code to render.
	filename (str): Name for the output PNG file (without extension).

	Returns:
	str: Path to the generated PNG image.
	"""
	# Ensure the images directory exists
	output_dir = "/content/images"
	os.makedirs(output_dir, exist_ok=True)

	# Save and render the diagram
	output_path = os.path.join(output_dir, f"{filename}")
	try:
	src = Source(dot_code, format="png")
	rendered_path = src.render(output_path, cleanup=True)
	# The `rendered_path` will have an extra `.png` extension
	#png_path = f"{rendered_path}.png"
	png_path = f"{rendered_path}"
	# Remove the unnecessary file without the extension
	#if os.path.exists(rendered_path):
	# os.remove(rendered_path)
	return png_path
	except Exception as e:
	return f"Error rendering diagram: {str(e)}"

	import time

	def handle_generate_diagram(code_content, diagram_type, retries=5, wait_time=1):
	"""
	Handles diagram generation and returns the rendered image for display.

	Args:
	code_content (str): The source code to analyze.
	diagram_type (str): Type of diagram to generate.
	retries (int): Number of times to retry checking for the file.
	wait_time (float): Time (in seconds) to wait between retries.

	Returns:
	PIL.Image.Image or str: The generated diagram or an error message.
	"""
	print("Code content received:", code_content) # Debugging print

	# Generate and render the diagram
	image_path = generate_and_render_diagram(code_content, diagram_type)
	print(f"Generated image path: {image_path}") # Debugging print

	# Retry logic for checking file existence
	for attempt in range(retries):
	if os.path.exists(image_path):
	try:
	return Image.open(image_path) # Return the image if found
	except Exception as e:
	print(f"Error opening image on attempt {attempt + 1}: {e}")
	else:
	print(f"Image not found. Retrying... ({attempt + 1}/{retries})")
	time.sleep(wait_time) # Wait before the next check

	# If the image is still not found after retries
	print(f"Failed to generate image after {retries} retries: {image_path}")
	return f"Failed to generate image: {image_path}"

	# Gradio Interface
	with gr.Blocks() as demo:
	# Tab 1: Repository Discovery
	with gr.Tab("Repository Discovery"):
	with gr.Row():
	topics_input = gr.Textbox(
	label="Topics (comma-separated, leave empty to fetch by date only)",
	placeholder="e.g., machine-learning, deep-learning (leave empty for date-based search)"
	)
	similar_topics = gr.Textbox(
	label="Similar Topics (based on embeddings)",
	interactive=False
	)
	gr.Button("Get Similar Topics").click(
	search_similar_topics,
	inputs=[topics_input],
	outputs=[similar_topics]
	)

	with gr.Row():
	start_date_input = gr.Textbox(
	label="Start Date (YYYY-MM-DD, leave empty if not filtering by date)",
	placeholder="Set to filter recent repositories by date or leave empty"
	)
	language_filter = gr.Dropdown(
	choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
	label="Language Filter",
	value=""
	)
	stars_min = gr.Number(label="Stars Min", value=10)
	stars_max = gr.Number(label="Stars Max", value=1000)
	with gr.Row():
	forks_min = gr.Number(label="Forks Min", value=0)
	forks_max = gr.Number(label="Forks Max", value=500)
	total_repos = gr.Number(label="Total Repositories", value=10, step=10)
	sort_order = gr.Dropdown(
	choices=["stars", "forks", "updated"],
	label="Sort Order",
	value="stars"
	)
	with gr.Row():
	output_data = gr.Dataframe(label="Discovered Repositories")
	output_file = gr.File(label="Download CSV", file_count="single")
	gr.Button("Discover Repositories").click(
	gradio_interface,
	inputs=[
	topics_input, start_date_input, language_filter, stars_min, stars_max,
	forks_min, forks_max, total_repos, sort_order
	],
	outputs=[output_data, output_file]
	)

	# Tab 2: Organization Watch
	with gr.Tab("Organization Watch"):
	with gr.Row():
	org_input = gr.Textbox(
	label="Organizations (comma-separated)",
	placeholder="e.g., facebookresearch, openai"
	)
	with gr.Row():
	language_filter = gr.Dropdown(
	choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
	label="Language Filter",
	value=""
	)
	stars_min = gr.Number(label="Stars Min", value=10)
	stars_max = gr.Number(label="Stars Max", value=1000)
	with gr.Row():
	forks_min = gr.Number(label="Forks Min", value=0)
	forks_max = gr.Number(label="Forks Max", value=500)
	total_repos = gr.Number(label="Total Repositories", value=10, step=10)
	sort_order = gr.Dropdown(
	choices=["stars", "forks", "updated"],
	label="Sort Order",
	value="stars"
	)
	with gr.Row():
	output_data = gr.Dataframe(label="Repositories by Organizations")
	output_file = gr.File(label="Download CSV", file_count="single")
	gr.Button("Fetch Organization Repositories").click(
	fetch_org_repositories,
	inputs=[
	org_input, language_filter, stars_min, stars_max, forks_min, forks_max,
	sort_order, total_repos
	],
	outputs=[output_data, output_file]
	)

	# Tab 3: Code Analysis
	# Gradio Interface for Code Analysis (Updated)
	with gr.Tab("Code Analysis"):
	with gr.Row():
	repo_dropdown = gr.Dropdown(
	label="Select Repository",
	choices=[],
	interactive=True
	)
	refresh_button = gr.Button("Refresh Repositories")
	with gr.Row():
	branch_dropdown = gr.Dropdown(
	label="Select Branch",
	choices=[],
	interactive=True
	)
	with gr.Row():
	keywords_output = gr.Textbox(label="Keywords")
	entities_output = gr.Textbox(label="Entities")
	with gr.Row():
	summary_output = gr.Textbox(label="Summary")
	wordcloud_output = gr.Plot(label="Word Cloud") # Use Plot instead of Image

	# New components for displaying files
	with gr.Row():
	files_list = gr.Dropdown(
	label="Files in Repository",
	choices=[],
	interactive=True
	)

	with gr.Row():
	file_content_box = gr.Textbox(
	label="File Content",
	lines=20,
	interactive=True
	)



	with gr.Row(): # Combine question input and button in the same row
	question_input = gr.Textbox(
	label="Ask a Question",
	placeholder="Enter your question about the code...",
	lines=1
	)
	question_button = gr.Button("Get Answer")

	with gr.Row():
	answer_output = gr.Textbox(label="Bot's Answer", lines=10, interactive=False)

	# Diagram generation interface
	with gr.Row():
	diagram_type = gr.Dropdown(
	label="Select Diagram Type",
	choices=["Call Graph", "Data Flow Diagram", "Sequence Diagram", "Class Diagram", "Component Diagram", "Workflow Diagram"],
	value="Call Graph"
	)
	generate_diagram_button = gr.Button("Generate Diagram")
	with gr.Row():
	#diagram_output = gr.Image(label="Generated Diagram", type="pil")
	diagram_output = gr.Image(
	label="Generated Diagram",
	type="pil", # Ensures compatibility with PIL.Image.Image
	elem_id="diagram_output", # Add an ID for custom styling if needed
	interactive=False, # No need for user interaction on the output
	show_label=True,
	height=600, # Set a larger default height
	width=800, # Set a larger default width
	)


	# Hook up the question button to ask_code_question
	question_button.click(
	ask_code_question,
	inputs=[file_content_box, question_input], # Inputs: Code content and user question
	outputs=[answer_output] # Output: Answer from LLM
	)

	# Callback to generate and render the diagram
	def generate_and_render_diagram(code_content, diagram_type):
	# Generate DOT code
	dot_code = generate_dot_code_from_code(code_content, diagram_type)

	# Check for valid DOT code
	if not dot_code.strip().startswith("digraph"):
	return "Invalid DOT code generated."

	unique_filename = f"diagram_{uuid.uuid4().hex}" # Generate a unique filename
	return render_dot_code(dot_code, filename=unique_filename) # Render the diagram


	generate_diagram_button.click(
	handle_generate_diagram,
	inputs=[file_content_box, diagram_type], # Use file_content_box instead of answer_output
	outputs=[diagram_output] # Output: PNG file path
	)

	# Refresh repository list
	refresh_button.click(
	lambda: gr.update(choices=get_discovered_repos()),
	inputs=[],
	outputs=[repo_dropdown]
	)

	# Update branch dropdown when a repository is selected
	def update_branches(repo):
	if repo:
	owner, repo_name = repo.split("/")
	branches = get_branches(owner, repo_name)
	default_branch = get_default_branch(owner, repo_name)
	return gr.update(choices=branches, value=default_branch)
	return gr.update(choices=[], value=None)

	repo_dropdown.change(
	update_branches,
	inputs=[repo_dropdown],
	outputs=[branch_dropdown]
	)

	# Analyze README content based on the selected repository and branch
	def analyze_readme(repo, branch):
	if repo and branch:
	owner, repo_name = repo.split("/") # Extract the owner and repo name.
	# Pass branch to analyze specific README
	return process_readme(owner, repo_name, branch)
	return "No repository or branch selected.", "", "", None

	repo_dropdown.change(
	analyze_readme,
	inputs=[repo_dropdown, branch_dropdown],
	outputs=[keywords_output, entities_output, summary_output, wordcloud_output]
	)

	branch_dropdown.change(
	analyze_readme, # Function to call when branch changes
	inputs=[repo_dropdown, branch_dropdown], # Pass both repo and branch as inputs
	outputs=[keywords_output, entities_output, summary_output, wordcloud_output] # Update outputs
	)

	# Fetch files in the selected repository
	def update_files(repo):
	global files_data # To store fetched files for later use
	if repo:
	owner, repo_name = repo.split("/") # Extract owner and repo
	print("Selected repository:", repo)
	files = fetch_files(owner, repo_name) # Call with default path=""
	files_data = files # Store the fetched files for later use
	file_names = [f"{file['name']} ({file['path']})" for file in files] # Prepare dropdown labels
	print("Fetched files:", files) # Debugging to ensure files are fetched correctly
	print("File names for dropdown:", file_names) # Debugging to ensure dropdown labels are created
	return gr.update(choices=file_names, value=None) # Update the dropdown
	files_data = [] # Clear files_data if no repo is selected
	return gr.update(choices=[], value=None)



	repo_dropdown.change(
	lambda repo: update_files(repo),
	inputs=[repo_dropdown],
	outputs=[files_list] # Update both files_list and file_content_box
	)

	# Fetch and display file content
	def display_file_content(repo, branch, selected_file):
	if repo and branch and selected_file:
	owner, repo_name = repo.split("/")
	file_path = selected_file.split(" (")[1][:-1] # Extract the file path from the dropdown label
	content = fetch_file_content(owner, repo_name, branch, file_path)
	return content
	return "No file selected."

	files_list.change(
	display_file_content,
	inputs=[repo_dropdown, branch_dropdown, files_list],
	outputs=[file_content_box]
	)



	demo.launch()