PuristanLabs1 commited on
Commit
db569a8
·
verified ·
1 Parent(s): a67e492

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +797 -0
app.py ADDED
@@ -0,0 +1,797 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from datetime import datetime
4
+ import gradio as gr
5
+ import pickle
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from wordcloud import WordCloud
8
+ import matplotlib.pyplot as plt
9
+ import base64
10
+ from io import BytesIO
11
+ import json
12
+ from openai import OpenAI
13
+ from graphviz import Source
14
+ import re
15
+ from PIL import Image
16
+ import os
17
+ import uuid
18
+
19
+ # Fixed directory for images
20
+ IMAGE_DIR = "/content/images" #to save the diagram png images
21
+ os.makedirs(IMAGE_DIR, exist_ok=True) # Create the directory if it doesn't exist
22
+
23
+ # Constants for GitHub API
24
+ GITHUB_API_URL = "https://api.github.com/search/repositories"
25
+ ACCESS_TOKEN = os.getenv("github_pat")
26
+ if not ACCESS_TOKEN:
27
+ raise ValueError("Missing GitHub Personal Access Token.")
28
+ HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
29
+ # Access OpenAI API key from secrets
30
+ OPENAI_API_KEY = os.getenv("openai_key")
31
+ if not OPENAI_API_KEY:
32
+ raise ValueError("Missing OpenAI API Key. Please set it as a secret in Hugging Face.")
33
+
34
+ # Initialize OpenAI client once
35
+ client = OpenAI(api_key=OPENAI_API_KEY)
36
+
37
+ # Global variable for allowed extensions
38
+ ALLOWED_EXTENSIONS = [".py", ".js", ".md", ".toml", ".yaml"]
39
+
40
+ # Load topic embeddings
41
+ with open("github_topics_embeddings.pkl", "rb") as f:
42
+ topic_data = pickle.load(f)
43
+
44
+ topics = topic_data["topics"]
45
+ embeddings = topic_data["embeddings"]
46
+
47
+ discovered_repos = [] # Format: ["owner/repo_name", ...]
48
+
49
+ # Function to search for similar topics
50
+ def search_similar_topics(input_text):
51
+ if not input_text.strip():
52
+ return "Enter topics to see suggestions."
53
+ try:
54
+ model = SentenceTransformer('all-MiniLM-L6-v2')
55
+ query_embedding = model.encode(input_text, convert_to_tensor=True)
56
+ similarities = util.pytorch_cos_sim(query_embedding, embeddings)
57
+ top_indices = similarities[0].argsort(descending=True)[:10] # Top 5 matches
58
+ return ", ".join([topics[i] for i in top_indices])
59
+ except Exception as e:
60
+ return f"Error in generating suggestions: {str(e)}"
61
+
62
+ # Function to fetch repositories with pagination
63
+ def search_repositories(query, sort="stars", order="desc", total_repos=10):
64
+ all_repos = []
65
+ per_page = 100 if total_repos > 100 else total_repos
66
+ total_pages = (total_repos // per_page) + 1
67
+
68
+ for page in range(1, total_pages + 1):
69
+ params = {
70
+ "q": query,
71
+ "sort": sort,
72
+ "order": order,
73
+ "per_page": per_page,
74
+ "page": page,
75
+ }
76
+ response = requests.get(GITHUB_API_URL, headers=HEADERS, params=params)
77
+ print(f"Query: {query}, Status Code: {response.status_code}")
78
+ print(f"Response: {response.json()}")
79
+
80
+ if response.status_code != 200:
81
+ raise Exception(f"GitHub API error: {response.status_code} {response.text}")
82
+
83
+ items = response.json().get("items", [])
84
+ if not items:
85
+ break
86
+
87
+ all_repos.extend(items)
88
+ if len(all_repos) >= total_repos:
89
+ break
90
+
91
+ return all_repos[:total_repos]
92
+
93
+ # Function to calculate additional metrics
94
+ def calculate_additional_metrics(repo):
95
+ created_date = datetime.strptime(repo["created_at"], "%Y-%m-%dT%H:%M:%SZ")
96
+ updated_date = datetime.strptime(repo["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
97
+ days_since_creation = (datetime.utcnow() - created_date).days
98
+ days_since_update = (datetime.utcnow() - updated_date).days
99
+ star_velocity = repo["stargazers_count"] / days_since_creation if days_since_creation > 0 else 0
100
+ fork_to_star_ratio = (repo["forks_count"] / repo["stargazers_count"] * 100) if repo["stargazers_count"] > 0 else 0
101
+ hidden_gem = "Yes" if repo["stargazers_count"] < 500 and repo["forks_count"] < 50 else "No"
102
+ hidden_gem_trend = "Rising" if star_velocity > 1 else "Stable"
103
+ rising_score = ((star_velocity * 10) +
104
+ (repo["forks_count"] * 0.2) +
105
+ (repo.get("watchers_count", 0) * 0.3) +
106
+ (1 / (days_since_update + 1) * 20) -
107
+ (repo["open_issues_count"] * 0.01))
108
+ legacy_score = (repo["stargazers_count"] * 0.6) + \
109
+ (repo["forks_count"] * 0.3) + \
110
+ (repo.get("watchers_count", 0) * 0.1) - \
111
+ (repo["open_issues_count"] * 0.05)
112
+ owner, repo_name = repo["owner"]["login"], repo["name"]
113
+ repo_details_url = f"https://api.github.com/repos/{owner}/{repo_name}"
114
+ response = requests.get(repo_details_url, headers=HEADERS)
115
+ if response.status_code == 200:
116
+ repo_details = response.json()
117
+ actual_watchers = repo_details.get("subscribers_count", 0)
118
+ else:
119
+ actual_watchers = 0
120
+ watcher_to_stars_ratio = (actual_watchers / repo["stargazers_count"]) * 100 if repo["stargazers_count"] > 0 else 0
121
+
122
+ return {
123
+ "Rising Score": round(rising_score, 2),
124
+ "Legacy Score": round(legacy_score, 2),
125
+ "Star Velocity (Stars/Day)": round(star_velocity, 2),
126
+ "Fork-to-Star Ratio (%)": round(fork_to_star_ratio, 2),
127
+ "Watchers": actual_watchers,
128
+ "Watcher-to-Stars Ratio (%)": round(watcher_to_stars_ratio, 2),
129
+ "Language": repo.get("language", "N/A"),
130
+ "Topics": ", ".join(repo.get("topics", [])),
131
+ "Hidden Gem": hidden_gem,
132
+ "Hidden Gem Trend": hidden_gem_trend,
133
+ "Open Issues": repo["open_issues_count"],
134
+ "Created At": repo["created_at"],
135
+ "Last Updated": repo["pushed_at"],
136
+ "days_since_creation": round(days_since_creation, 2),
137
+ "days_since_update": round(days_since_update, 2),
138
+ "URL": repo["html_url"],
139
+ }
140
+
141
+ # Repository Discovery Interface
142
+ def gradio_interface(topics, start_date, language_filter, stars_min, stars_max, forks_min, forks_max, total_repos, sort_order):
143
+ global discovered_repos
144
+
145
+ if not topics.strip() and not start_date.strip():
146
+ # If neither topics nor start_date are provided, return a validation error
147
+ return pd.DataFrame(), "Please provide at least a topic or a start date."
148
+
149
+ topics_list = [topic.strip() for topic in topics.split(",") if topic.strip()]
150
+ stars_range = (stars_min, stars_max)
151
+ forks_range = (forks_min, forks_max)
152
+ df = pd.DataFrame()
153
+ all_repos_data = []
154
+
155
+ try:
156
+ # If no topics are provided, fetch repositories by filters only
157
+ if not topics_list:
158
+ query = f"stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
159
+ if start_date.strip():
160
+ query += f" created:>{start_date.strip()}"
161
+ if language_filter:
162
+ query += f" language:{language_filter}"
163
+
164
+ # Fetch repositories
165
+ repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
166
+ for repo in repos:
167
+ repo_data = {
168
+ "Name": repo["name"],
169
+ "Owner": repo["owner"]["login"],
170
+ "Stars": repo["stargazers_count"],
171
+ "Forks": repo["forks_count"],
172
+ "Description": repo.get("description", "N/A"),
173
+ }
174
+ repo_data.update(calculate_additional_metrics(repo))
175
+ all_repos_data.append(repo_data)
176
+ else:
177
+ for topic in topics_list:
178
+ # Construct query
179
+ query = f"topic:{topic} stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
180
+ if start_date.strip():
181
+ query += f" created:>{start_date.strip()}"
182
+ if language_filter:
183
+ query += f" language:{language_filter}"
184
+
185
+ # Fetch repositories
186
+ repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
187
+ for repo in repos:
188
+ repo_data = {
189
+ "Name": repo["name"],
190
+ "Owner": repo["owner"]["login"],
191
+ "Stars": repo["stargazers_count"],
192
+ "Forks": repo["forks_count"],
193
+ "Description": repo.get("description", "N/A"),
194
+ }
195
+ repo_data.update(calculate_additional_metrics(repo))
196
+ all_repos_data.append(repo_data)
197
+ #Add repository to discovered_repos
198
+ discovered_repos.append(f"{repo['owner']['login']}/{repo['name']}")
199
+
200
+ if not all_repos_data:
201
+ return pd.DataFrame(), "No repositories found matching the criteria."
202
+
203
+
204
+
205
+ # Remove duplicates from discovered_repos
206
+ discovered_repos = list(set(discovered_repos))
207
+
208
+ # Create DataFrame
209
+ df = pd.DataFrame(all_repos_data)
210
+
211
+ except Exception as e:
212
+ print(f"Error: {e}")
213
+ return pd.DataFrame(), f"Error fetching repositories: {str(e)}"
214
+
215
+ csv_file = None
216
+ if not df.empty:
217
+ csv_file = "discovered_repositories.csv"
218
+ df.to_csv(csv_file, index=False)
219
+ return df, csv_file
220
+ #return df, gr.File.update(visible=True, value=csv_file)
221
+
222
+ #Organization Watch Interface
223
+ def fetch_org_repositories(org_names, language_filter, stars_min, stars_max, forks_min, forks_max, sort_order, total_repos):
224
+ try:
225
+ org_list = [org.strip() for org in org_names.split(",") if org.strip()]
226
+ if not org_list:
227
+ return pd.DataFrame(), "Enter at least one organization."
228
+
229
+ all_repos_data = []
230
+ for org in org_list:
231
+ # Query repositories for each organization
232
+ query = f"user:{org} stars:{stars_min}..{stars_max} forks:{forks_min}..{forks_max}"
233
+ if language_filter:
234
+ query += f" language:{language_filter}"
235
+
236
+ repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
237
+
238
+ for repo in repos:
239
+ repo_data = {
240
+ "Name": repo["name"],
241
+ "Owner": repo["owner"]["login"],
242
+ "Stars": repo["stargazers_count"],
243
+ "Forks": repo["forks_count"],
244
+ "Description": repo.get("description", "N/A"),
245
+ }
246
+ repo_data.update(calculate_additional_metrics(repo))
247
+ all_repos_data.append(repo_data)
248
+
249
+ if not all_repos_data:
250
+ return pd.DataFrame(), "No repositories found for the specified organizations."
251
+
252
+ # Create DataFrame
253
+ df = pd.DataFrame(all_repos_data)
254
+ csv_file = "organization_repositories.csv"
255
+ df.to_csv(csv_file, index=False)
256
+ return df, csv_file
257
+
258
+ except Exception as e:
259
+ print(f"Error in fetch_org_repositories: {e}")
260
+ return pd.DataFrame(), f"Error: {str(e)}"
261
+
262
+ # Function to fetch discovered repositories for the dropdown
263
+ def get_discovered_repos():
264
+ global discovered_repos
265
+ return discovered_repos
266
+
267
+ def process_readme(owner, repo, branch):
268
+ # Fetch README content from the specified branch
269
+ #url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/README.md"
270
+ url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md"
271
+ response = requests.get(url, headers=HEADERS)
272
+ if response.status_code == 200:
273
+ readme_content = response.text
274
+ else:
275
+ #return "Failed to fetch README content.", "", "", None
276
+ return f"Failed to fetch README content from branch {branch}.", "", "", None
277
+
278
+ # Process README content with OpenAI
279
+ MODEL = "gpt-4o-mini"
280
+
281
+ completion = client.chat.completions.create(
282
+ model=MODEL,
283
+ messages=[
284
+ {"role": "system", "content": "You are a helpful assistant that extracts keywords, named entities, and generates summaries from text."},
285
+ {"role": "user", "content": f"""
286
+ Perform the following tasks on the following README file:
287
+ 1. Extract the top 25 most important keywords from the text only.
288
+ 2. Extract named entities (e.g., people, organizations, technologies).
289
+ 3. Summarize the content in one paragraph.
290
+
291
+ Return the results in the following JSON format:
292
+ {{
293
+ "keywords": ["keyword1", "keyword2", ...],
294
+ "entities": ["entity1", "entity2", ...],
295
+ "summary": "A concise summary of the README."
296
+ }}
297
+
298
+ README file:
299
+ {readme_content}
300
+ """}
301
+ ],
302
+ response_format={"type": "json_object"}
303
+ )
304
+
305
+ result = completion.choices[0].message.content
306
+ result_json = json.loads(result)
307
+
308
+ keywords = ", ".join(result_json["keywords"])
309
+ entities = ", ".join(result_json["entities"])
310
+ summary = result_json["summary"]
311
+
312
+ # Generate word cloud
313
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(keywords)
314
+ plt.figure(figsize=(10, 5))
315
+ plt.imshow(wordcloud, interpolation='bilinear')
316
+ plt.axis('off')
317
+
318
+ return keywords, entities, summary, plt
319
+
320
+ # Function to get all branches of a repository
321
+ def get_branches(owner, repo):
322
+ url = f"https://api.github.com/repos/{owner}/{repo}/branches"
323
+ response = requests.get(url, headers=HEADERS)
324
+ if response.status_code == 200:
325
+ branches = [branch["name"] for branch in response.json()]
326
+ return branches
327
+ else:
328
+ return []
329
+
330
+ # Function to get the default branch of a repository
331
+ def get_default_branch(owner, repo):
332
+ url = f"https://api.github.com/repos/{owner}/{repo}"
333
+ response = requests.get(url, headers=HEADERS)
334
+ if response.status_code == 200:
335
+ repo_data = response.json()
336
+ return repo_data["default_branch"]
337
+ else:
338
+ return None
339
+
340
+ def fetch_files(owner, repo, path=""):
341
+
342
+ # Base URL for the GitHub API
343
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" if path else f"https://api.github.com/repos/{owner}/{repo}/contents"
344
+ response = requests.get(url, headers=HEADERS)
345
+
346
+ if response.status_code != 200:
347
+ return f"Failed to fetch files: {response.status_code}", []
348
+
349
+ files = []
350
+ for item in response.json():
351
+ if item["type"] == "file": # Only add files
352
+ # Use the globally defined allowed extensions
353
+ if any(item["name"].endswith(ext) for ext in ALLOWED_EXTENSIONS):
354
+ files.append({
355
+ "name": item["name"],
356
+ "path": item["path"],
357
+ "download_url": item["download_url"]
358
+ })
359
+ elif item["type"] == "dir":
360
+ # Recursively fetch files in subdirectories
361
+ sub_files = fetch_files(owner, repo, item["path"])
362
+ files.extend(sub_files)
363
+ return files
364
+
365
+
366
+ # Function to fetch the content of a specific file
367
+ def fetch_file_content(owner, repo, branch, file_path):
368
+ file_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{file_path}"
369
+ response = requests.get(file_url)
370
+
371
+ if response.status_code == 200:
372
+ return response.text
373
+ else:
374
+ return f"Failed to fetch file content: {response.status_code}"
375
+
376
+ # Function to query GPT-4o-mini
377
+ def ask_code_question(code_content, question):
378
+ if not code_content.strip():
379
+ return "No code content available to analyze."
380
+ if not question.strip():
381
+ return "Please enter a question about the code."
382
+
383
+ # Construct the prompt
384
+ prompt = f"""
385
+ Here is a Python file from a GitHub repository:
386
+
387
+ {code_content}
388
+
389
+ Please answer the following question about this file:
390
+ - {question}
391
+ """
392
+
393
+ try:
394
+ # Query GPT-4o-mini
395
+ response = client.chat.completions.create(
396
+ model="gpt-4o-mini",
397
+ messages=[
398
+ {"role": "system", "content": "You are a helpful assistant skilled in understanding code."},
399
+ {"role": "user", "content": prompt}
400
+ ]
401
+ )
402
+ # Extract and return GPT's response
403
+ return response.choices[0].message.content.strip()
404
+ except Exception as e:
405
+ return f"Error querying GPT-4o-mini: {str(e)}"
406
+
407
+ from graphviz import Source
408
+ import re
409
+
410
+ # Function to generate and clean Graphviz diagrams using GPT-4o-mini
411
+ def generate_dot_code_from_code(code_content, diagram_type):
412
+ if not code_content.strip():
413
+ return "No code content available to analyze."
414
+
415
+ # Construct the prompt dynamically based on diagram type
416
+ prompt = f"""
417
+ Here is some Python code from a GitHub repository:
418
+
419
+ {code_content}
420
+
421
+ Please generate a {diagram_type} for this code in Graphviz DOT/digraph format. Ensure the DOT code is valid and renderable.
422
+ Don't include any other text. Don't provide any other explainatory commentry.
423
+ Ensure the DOT code includes all necessary opening and closing brackets {"brackets"} for graphs and subgraphs.
424
+ """
425
+ #Ensure that the output of the code starts with "@startuml" and Ends with "@enduml".
426
+ try:
427
+ # Query GPT-4o-mini
428
+ response = client.chat.completions.create(
429
+ model="gpt-4o",
430
+ messages=[
431
+ {"role": "system", "content": "You are a helpful assistant that generates Graphviz DOT code for visualizing Python code. You are restricted to only generate Graphviz Code starting with digraph & ending with }"},
432
+ {"role": "user", "content": prompt}
433
+ ]
434
+ )
435
+ raw_dot_code = response.choices[0].message.content.strip()
436
+ validated_dot_code = validate_and_fix_dot_code(raw_dot_code) # Fix any missing brackets
437
+
438
+ pattern = r"digraph\b[\s\S]*?^\}"
439
+ match = re.search(pattern, validated_dot_code,re.MULTILINE | re.DOTALL)
440
+ if match:
441
+ validated_dot_code = match.group(0) # Extract the matched content
442
+ else:
443
+ return "Failed to extract valid Graphviz code."
444
+
445
+ return validated_dot_code
446
+ except Exception as e:
447
+ return f"Error querying GPT-4o-mini: {str(e)}"
448
+
449
+ def validate_and_fix_dot_code(dot_code):
450
+ # Check for unbalanced brackets
451
+ open_brackets = dot_code.count("{")
452
+ close_brackets = dot_code.count("}")
453
+
454
+ # If there are missing closing brackets, add them at the end
455
+ if open_brackets > close_brackets:
456
+ missing_brackets = open_brackets - close_brackets
457
+ dot_code += "}" * missing_brackets
458
+
459
+ return dot_code
460
+
461
+
462
+ def render_dot_code(dot_code, filename=None):
463
+
464
+ """
465
+ Renders Graphviz DOT code and saves it as a PNG image.
466
+
467
+ Args:
468
+ dot_code (str): The DOT code to render.
469
+ filename (str): Name for the output PNG file (without extension).
470
+
471
+ Returns:
472
+ str: Path to the generated PNG image.
473
+ """
474
+ # Ensure the images directory exists
475
+ output_dir = "/content/images"
476
+ os.makedirs(output_dir, exist_ok=True)
477
+
478
+ # Save and render the diagram
479
+ output_path = os.path.join(output_dir, f"{filename}")
480
+ try:
481
+ src = Source(dot_code, format="png")
482
+ rendered_path = src.render(output_path, cleanup=True)
483
+ # The `rendered_path` will have an extra `.png` extension
484
+ #png_path = f"{rendered_path}.png"
485
+ png_path = f"{rendered_path}"
486
+ # Remove the unnecessary file without the extension
487
+ #if os.path.exists(rendered_path):
488
+ # os.remove(rendered_path)
489
+ return png_path
490
+ except Exception as e:
491
+ return f"Error rendering diagram: {str(e)}"
492
+
493
+ import time
494
+
495
+ def handle_generate_diagram(code_content, diagram_type, retries=5, wait_time=1):
496
+ """
497
+ Handles diagram generation and returns the rendered image for display.
498
+
499
+ Args:
500
+ code_content (str): The source code to analyze.
501
+ diagram_type (str): Type of diagram to generate.
502
+ retries (int): Number of times to retry checking for the file.
503
+ wait_time (float): Time (in seconds) to wait between retries.
504
+
505
+ Returns:
506
+ PIL.Image.Image or str: The generated diagram or an error message.
507
+ """
508
+ print("Code content received:", code_content) # Debugging print
509
+
510
+ # Generate and render the diagram
511
+ image_path = generate_and_render_diagram(code_content, diagram_type)
512
+ print(f"Generated image path: {image_path}") # Debugging print
513
+
514
+ # Retry logic for checking file existence
515
+ for attempt in range(retries):
516
+ if os.path.exists(image_path):
517
+ try:
518
+ return Image.open(image_path) # Return the image if found
519
+ except Exception as e:
520
+ print(f"Error opening image on attempt {attempt + 1}: {e}")
521
+ else:
522
+ print(f"Image not found. Retrying... ({attempt + 1}/{retries})")
523
+ time.sleep(wait_time) # Wait before the next check
524
+
525
+ # If the image is still not found after retries
526
+ print(f"Failed to generate image after {retries} retries: {image_path}")
527
+ return f"Failed to generate image: {image_path}"
528
+
529
+ # Gradio Interface
530
+ with gr.Blocks() as demo:
531
+ # Tab 1: Repository Discovery
532
+ with gr.Tab("Repository Discovery"):
533
+ with gr.Row():
534
+ topics_input = gr.Textbox(
535
+ label="Topics (comma-separated, leave empty to fetch by date only)",
536
+ placeholder="e.g., machine-learning, deep-learning (leave empty for date-based search)"
537
+ )
538
+ similar_topics = gr.Textbox(
539
+ label="Similar Topics (based on embeddings)",
540
+ interactive=False
541
+ )
542
+ gr.Button("Get Similar Topics").click(
543
+ search_similar_topics,
544
+ inputs=[topics_input],
545
+ outputs=[similar_topics]
546
+ )
547
+
548
+ with gr.Row():
549
+ start_date_input = gr.Textbox(
550
+ label="Start Date (YYYY-MM-DD, leave empty if not filtering by date)",
551
+ placeholder="Set to filter recent repositories by date or leave empty"
552
+ )
553
+ language_filter = gr.Dropdown(
554
+ choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
555
+ label="Language Filter",
556
+ value=""
557
+ )
558
+ stars_min = gr.Number(label="Stars Min", value=10)
559
+ stars_max = gr.Number(label="Stars Max", value=1000)
560
+ with gr.Row():
561
+ forks_min = gr.Number(label="Forks Min", value=0)
562
+ forks_max = gr.Number(label="Forks Max", value=500)
563
+ total_repos = gr.Number(label="Total Repositories", value=10, step=10)
564
+ sort_order = gr.Dropdown(
565
+ choices=["stars", "forks", "updated"],
566
+ label="Sort Order",
567
+ value="stars"
568
+ )
569
+ with gr.Row():
570
+ output_data = gr.Dataframe(label="Discovered Repositories")
571
+ output_file = gr.File(label="Download CSV", file_count="single")
572
+ gr.Button("Discover Repositories").click(
573
+ gradio_interface,
574
+ inputs=[
575
+ topics_input, start_date_input, language_filter, stars_min, stars_max,
576
+ forks_min, forks_max, total_repos, sort_order
577
+ ],
578
+ outputs=[output_data, output_file]
579
+ )
580
+
581
+ # Tab 2: Organization Watch
582
+ with gr.Tab("Organization Watch"):
583
+ with gr.Row():
584
+ org_input = gr.Textbox(
585
+ label="Organizations (comma-separated)",
586
+ placeholder="e.g., facebookresearch, openai"
587
+ )
588
+ with gr.Row():
589
+ language_filter = gr.Dropdown(
590
+ choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
591
+ label="Language Filter",
592
+ value=""
593
+ )
594
+ stars_min = gr.Number(label="Stars Min", value=10)
595
+ stars_max = gr.Number(label="Stars Max", value=1000)
596
+ with gr.Row():
597
+ forks_min = gr.Number(label="Forks Min", value=0)
598
+ forks_max = gr.Number(label="Forks Max", value=500)
599
+ total_repos = gr.Number(label="Total Repositories", value=10, step=10)
600
+ sort_order = gr.Dropdown(
601
+ choices=["stars", "forks", "updated"],
602
+ label="Sort Order",
603
+ value="stars"
604
+ )
605
+ with gr.Row():
606
+ output_data = gr.Dataframe(label="Repositories by Organizations")
607
+ output_file = gr.File(label="Download CSV", file_count="single")
608
+ gr.Button("Fetch Organization Repositories").click(
609
+ fetch_org_repositories,
610
+ inputs=[
611
+ org_input, language_filter, stars_min, stars_max, forks_min, forks_max,
612
+ sort_order, total_repos
613
+ ],
614
+ outputs=[output_data, output_file]
615
+ )
616
+
617
+ # Tab 3: Code Analysis
618
+ # Gradio Interface for Code Analysis (Updated)
619
+ with gr.Tab("Code Analysis"):
620
+ with gr.Row():
621
+ repo_dropdown = gr.Dropdown(
622
+ label="Select Repository",
623
+ choices=[],
624
+ interactive=True
625
+ )
626
+ refresh_button = gr.Button("Refresh Repositories")
627
+ with gr.Row():
628
+ branch_dropdown = gr.Dropdown(
629
+ label="Select Branch",
630
+ choices=[],
631
+ interactive=True
632
+ )
633
+ with gr.Row():
634
+ keywords_output = gr.Textbox(label="Keywords")
635
+ entities_output = gr.Textbox(label="Entities")
636
+ with gr.Row():
637
+ summary_output = gr.Textbox(label="Summary")
638
+ wordcloud_output = gr.Plot(label="Word Cloud") # Use Plot instead of Image
639
+
640
+ # New components for displaying files
641
+ with gr.Row():
642
+ files_list = gr.Dropdown(
643
+ label="Files in Repository",
644
+ choices=[],
645
+ interactive=True
646
+ )
647
+
648
+ with gr.Row():
649
+ file_content_box = gr.Textbox(
650
+ label="File Content",
651
+ lines=20,
652
+ interactive=True
653
+ )
654
+
655
+
656
+
657
+ with gr.Row(): # Combine question input and button in the same row
658
+ question_input = gr.Textbox(
659
+ label="Ask a Question",
660
+ placeholder="Enter your question about the code...",
661
+ lines=1
662
+ )
663
+ question_button = gr.Button("Get Answer")
664
+
665
+ with gr.Row():
666
+ answer_output = gr.Textbox(label="Bot's Answer", lines=10, interactive=False)
667
+
668
+ # Diagram generation interface
669
+ with gr.Row():
670
+ diagram_type = gr.Dropdown(
671
+ label="Select Diagram Type",
672
+ choices=["Call Graph", "Data Flow Diagram", "Sequence Diagram", "Class Diagram", "Component Diagram", "Workflow Diagram"],
673
+ value="Call Graph"
674
+ )
675
+ generate_diagram_button = gr.Button("Generate Diagram")
676
+ with gr.Row():
677
+ #diagram_output = gr.Image(label="Generated Diagram", type="pil")
678
+ diagram_output = gr.Image(
679
+ label="Generated Diagram",
680
+ type="pil", # Ensures compatibility with PIL.Image.Image
681
+ elem_id="diagram_output", # Add an ID for custom styling if needed
682
+ interactive=False, # No need for user interaction on the output
683
+ show_label=True,
684
+ height=600, # Set a larger default height
685
+ width=800, # Set a larger default width
686
+ )
687
+
688
+
689
+ # Hook up the question button to ask_code_question
690
+ question_button.click(
691
+ ask_code_question,
692
+ inputs=[file_content_box, question_input], # Inputs: Code content and user question
693
+ outputs=[answer_output] # Output: Answer from LLM
694
+ )
695
+
696
+ # Callback to generate and render the diagram
697
+ def generate_and_render_diagram(code_content, diagram_type):
698
+ # Generate DOT code
699
+ dot_code = generate_dot_code_from_code(code_content, diagram_type)
700
+
701
+ # Check for valid DOT code
702
+ if not dot_code.strip().startswith("digraph"):
703
+ return "Invalid DOT code generated."
704
+
705
+ unique_filename = f"diagram_{uuid.uuid4().hex}" # Generate a unique filename
706
+ return render_dot_code(dot_code, filename=unique_filename) # Render the diagram
707
+
708
+
709
+ generate_diagram_button.click(
710
+ handle_generate_diagram,
711
+ inputs=[file_content_box, diagram_type], # Use file_content_box instead of answer_output
712
+ outputs=[diagram_output] # Output: PNG file path
713
+ )
714
+
715
+ # Refresh repository list
716
+ refresh_button.click(
717
+ lambda: gr.update(choices=get_discovered_repos()),
718
+ inputs=[],
719
+ outputs=[repo_dropdown]
720
+ )
721
+
722
+ # Update branch dropdown when a repository is selected
723
+ def update_branches(repo):
724
+ if repo:
725
+ owner, repo_name = repo.split("/")
726
+ branches = get_branches(owner, repo_name)
727
+ default_branch = get_default_branch(owner, repo_name)
728
+ return gr.update(choices=branches, value=default_branch)
729
+ return gr.update(choices=[], value=None)
730
+
731
+ repo_dropdown.change(
732
+ update_branches,
733
+ inputs=[repo_dropdown],
734
+ outputs=[branch_dropdown]
735
+ )
736
+
737
+ # Analyze README content based on the selected repository and branch
738
+ def analyze_readme(repo, branch):
739
+ if repo and branch:
740
+ owner, repo_name = repo.split("/") # Extract the owner and repo name.
741
+ # Pass branch to analyze specific README
742
+ return process_readme(owner, repo_name, branch)
743
+ return "No repository or branch selected.", "", "", None
744
+
745
+ repo_dropdown.change(
746
+ analyze_readme,
747
+ inputs=[repo_dropdown, branch_dropdown],
748
+ outputs=[keywords_output, entities_output, summary_output, wordcloud_output]
749
+ )
750
+
751
+ branch_dropdown.change(
752
+ analyze_readme, # Function to call when branch changes
753
+ inputs=[repo_dropdown, branch_dropdown], # Pass both repo and branch as inputs
754
+ outputs=[keywords_output, entities_output, summary_output, wordcloud_output] # Update outputs
755
+ )
756
+
757
+ # Fetch files in the selected repository
758
+ def update_files(repo):
759
+ global files_data # To store fetched files for later use
760
+ if repo:
761
+ owner, repo_name = repo.split("/") # Extract owner and repo
762
+ print("Selected repository:", repo)
763
+ files = fetch_files(owner, repo_name) # Call with default path=""
764
+ files_data = files # Store the fetched files for later use
765
+ file_names = [f"{file['name']} ({file['path']})" for file in files] # Prepare dropdown labels
766
+ print("Fetched files:", files) # Debugging to ensure files are fetched correctly
767
+ print("File names for dropdown:", file_names) # Debugging to ensure dropdown labels are created
768
+ return gr.update(choices=file_names, value=None) # Update the dropdown
769
+ files_data = [] # Clear files_data if no repo is selected
770
+ return gr.update(choices=[], value=None)
771
+
772
+
773
+
774
+ repo_dropdown.change(
775
+ lambda repo: update_files(repo),
776
+ inputs=[repo_dropdown],
777
+ outputs=[files_list] # Update both files_list and file_content_box
778
+ )
779
+
780
+ # Fetch and display file content
781
+ def display_file_content(repo, branch, selected_file):
782
+ if repo and branch and selected_file:
783
+ owner, repo_name = repo.split("/")
784
+ file_path = selected_file.split(" (")[1][:-1] # Extract the file path from the dropdown label
785
+ content = fetch_file_content(owner, repo_name, branch, file_path)
786
+ return content
787
+ return "No file selected."
788
+
789
+ files_list.change(
790
+ display_file_content,
791
+ inputs=[repo_dropdown, branch_dropdown, files_list],
792
+ outputs=[file_content_box]
793
+ )
794
+
795
+
796
+
797
+ demo.launch()