PuristanLabs1 commited on
Commit
4f99cf2
·
verified ·
1 Parent(s): 7a9c66c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +758 -2
app.py CHANGED
@@ -1,4 +1,484 @@
1
- How do i add directory name /temp to this file name in this function. The file is being generated in Huggingface spaces and HF saves all dynamic files in tmp directory..
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def render_dot_code(dot_code, filename=None):
4
  """
@@ -36,4 +516,280 @@ def render_dot_code(dot_code, filename=None):
36
 
37
  except Exception as e:
38
  print(f"Error rendering or uploading diagram: {e}")
39
- return f"Error rendering or uploading diagram: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from datetime import datetime
4
+ import gradio as gr
5
+ import pickle
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from wordcloud import WordCloud
8
+ import matplotlib.pyplot as plt
9
+ import base64
10
+ from io import BytesIO
11
+ import json
12
+ from openai import OpenAI
13
+ from graphviz import Source
14
+ import re
15
+ from PIL import Image
16
+ import os
17
+ import uuid
18
+
19
+
20
+ #IMAGE_DIR = "./images"
21
+ IMAGE_DIR = "/tmp"
22
+ os.makedirs(IMAGE_DIR, exist_ok=True)
23
+ IMGUR_CLIENT_ID = "429c0410bdece6a"
24
+
25
+ GITHUB_API_URL = "https://api.github.com/search/repositories"
26
+ ACCESS_TOKEN = os.getenv("github_pat")
27
+ if not ACCESS_TOKEN:
28
+ raise ValueError("Missing GitHub Personal Access Token.")
29
+ HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
30
+
31
+
32
+ OPENAI_API_KEY = os.getenv("openai_key")
33
+ if not OPENAI_API_KEY:
34
+ raise ValueError("Missing OpenAI API Key. Please set it as a secret in Hugging Face.")
35
+
36
+ client = OpenAI(api_key=OPENAI_API_KEY)
37
+
38
+
39
+ ALLOWED_EXTENSIONS = [".py", ".js", ".md", ".toml", ".yaml"]
40
+
41
+
42
+ with open("github_topics_embeddings.pkl", "rb") as f:
43
+ topic_data = pickle.load(f)
44
+
45
+ topics = topic_data["topics"]
46
+ embeddings = topic_data["embeddings"]
47
+
48
+ discovered_repos = []
49
+
50
+
51
+ def search_similar_topics(input_text):
52
+ if not input_text.strip():
53
+ return "Enter topics to see suggestions."
54
+ try:
55
+ model = SentenceTransformer('all-MiniLM-L6-v2')
56
+ query_embedding = model.encode(input_text, convert_to_tensor=True)
57
+ similarities = util.pytorch_cos_sim(query_embedding, embeddings)
58
+ top_indices = similarities[0].argsort(descending=True)[:10]
59
+ return ", ".join([topics[i] for i in top_indices])
60
+ except Exception as e:
61
+ return f"Error in generating suggestions: {str(e)}"
62
+
63
+
64
+ def search_repositories(query, sort="stars", order="desc", total_repos=10):
65
+ all_repos = []
66
+ per_page = 100 if total_repos > 100 else total_repos
67
+ total_pages = (total_repos // per_page) + 1
68
+
69
+ for page in range(1, total_pages + 1):
70
+ params = {
71
+ "q": query,
72
+ "sort": sort,
73
+ "order": order,
74
+ "per_page": per_page,
75
+ "page": page,
76
+ }
77
+ response = requests.get(GITHUB_API_URL, headers=HEADERS, params=params)
78
+
79
+ if response.status_code != 200:
80
+ raise Exception(f"GitHub API error: {response.status_code} {response.text}")
81
+
82
+ items = response.json().get("items", [])
83
+ if not items:
84
+ break
85
+
86
+ all_repos.extend(items)
87
+ if len(all_repos) >= total_repos:
88
+ break
89
+
90
+ return all_repos[:total_repos]
91
+
92
+
93
+ def calculate_additional_metrics(repo):
94
+ created_date = datetime.strptime(repo["created_at"], "%Y-%m-%dT%H:%M:%SZ")
95
+ updated_date = datetime.strptime(repo["updated_at"], "%Y-%m-%dT%H:%M:%SZ")
96
+ days_since_creation = (datetime.utcnow() - created_date).days
97
+ days_since_update = (datetime.utcnow() - updated_date).days
98
+ star_velocity = repo["stargazers_count"] / days_since_creation if days_since_creation > 0 else 0
99
+ fork_to_star_ratio = (repo["forks_count"] / repo["stargazers_count"] * 100) if repo["stargazers_count"] > 0 else 0
100
+ hidden_gem = "Yes" if repo["stargazers_count"] < 500 and repo["forks_count"] < 50 else "No"
101
+ hidden_gem_trend = "Rising" if star_velocity > 1 else "Stable"
102
+ rising_score = ((star_velocity * 10) +
103
+ (repo["forks_count"] * 0.2) +
104
+ (repo.get("watchers_count", 0) * 0.3) +
105
+ (1 / (days_since_update + 1) * 20) -
106
+ (repo["open_issues_count"] * 0.01))
107
+ legacy_score = (repo["stargazers_count"] * 0.6) + \
108
+ (repo["forks_count"] * 0.3) + \
109
+ (repo.get("watchers_count", 0) * 0.1) - \
110
+ (repo["open_issues_count"] * 0.05)
111
+ owner, repo_name = repo["owner"]["login"], repo["name"]
112
+ repo_details_url = f"https://api.github.com/repos/{owner}/{repo_name}"
113
+ response = requests.get(repo_details_url, headers=HEADERS)
114
+ if response.status_code == 200:
115
+ repo_details = response.json()
116
+ actual_watchers = repo_details.get("subscribers_count", 0)
117
+ else:
118
+ actual_watchers = 0
119
+ watcher_to_stars_ratio = (actual_watchers / repo["stargazers_count"]) * 100 if repo["stargazers_count"] > 0 else 0
120
+
121
+ return {
122
+ "Rising Score": round(rising_score, 2),
123
+ "Legacy Score": round(legacy_score, 2),
124
+ "Star Velocity (Stars/Day)": round(star_velocity, 2),
125
+ "Fork-to-Star Ratio (%)": round(fork_to_star_ratio, 2),
126
+ "Watchers": actual_watchers,
127
+ "Watcher-to-Stars Ratio (%)": round(watcher_to_stars_ratio, 2),
128
+ "Language": repo.get("language", "N/A"),
129
+ "Topics": ", ".join(repo.get("topics", [])),
130
+ "Hidden Gem": hidden_gem,
131
+ "Hidden Gem Trend": hidden_gem_trend,
132
+ "Open Issues": repo["open_issues_count"],
133
+ "Created At": repo["created_at"],
134
+ "Last Updated": repo["pushed_at"],
135
+ "days_since_creation": round(days_since_creation, 2),
136
+ "days_since_update": round(days_since_update, 2),
137
+ "URL": repo["html_url"],
138
+ }
139
+
140
+
141
+ def gradio_interface(topics, start_date, language_filter, stars_min, stars_max, forks_min, forks_max, total_repos, sort_order):
142
+ global discovered_repos
143
+
144
+ if not topics.strip() and not start_date.strip():
145
+
146
+ return pd.DataFrame(), "Please provide at least a topic or a start date."
147
+
148
+ topics_list = [topic.strip() for topic in topics.split(",") if topic.strip()]
149
+ stars_range = (stars_min, stars_max)
150
+ forks_range = (forks_min, forks_max)
151
+ df = pd.DataFrame()
152
+ all_repos_data = []
153
+
154
+ try:
155
+
156
+ if not topics_list:
157
+ query = f"stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
158
+ if start_date.strip():
159
+ query += f" created:>{start_date.strip()}"
160
+ if language_filter:
161
+ query += f" language:{language_filter}"
162
+
163
+
164
+ repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
165
+ for repo in repos:
166
+ repo_data = {
167
+ "Name": repo["name"],
168
+ "Owner": repo["owner"]["login"],
169
+ "Stars": repo["stargazers_count"],
170
+ "Forks": repo["forks_count"],
171
+ "Description": repo.get("description", "N/A"),
172
+ }
173
+ repo_data.update(calculate_additional_metrics(repo))
174
+ all_repos_data.append(repo_data)
175
+ else:
176
+ for topic in topics_list:
177
+
178
+ query = f"topic:{topic} stars:{stars_range[0]}..{stars_range[1]} forks:{forks_range[0]}..{forks_range[1]}"
179
+ if start_date.strip():
180
+ query += f" created:>{start_date.strip()}"
181
+ if language_filter:
182
+ query += f" language:{language_filter}"
183
+
184
+
185
+ repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
186
+ for repo in repos:
187
+ repo_data = {
188
+ "Name": repo["name"],
189
+ "Owner": repo["owner"]["login"],
190
+ "Stars": repo["stargazers_count"],
191
+ "Forks": repo["forks_count"],
192
+ "Description": repo.get("description", "N/A"),
193
+ }
194
+ repo_data.update(calculate_additional_metrics(repo))
195
+ all_repos_data.append(repo_data)
196
+
197
+ discovered_repos.append(f"{repo['owner']['login']}/{repo['name']}")
198
+
199
+ if not all_repos_data:
200
+ return pd.DataFrame(), "No repositories found matching the criteria."
201
+
202
+
203
+
204
+ discovered_repos = list(set(discovered_repos))
205
+
206
+
207
+ df = pd.DataFrame(all_repos_data)
208
+
209
+ except Exception as e:
210
+ print(f"Error: {e}")
211
+ return pd.DataFrame(), f"Error fetching repositories: {str(e)}"
212
+
213
+ csv_file = None
214
+ if not df.empty:
215
+ csv_file = "discovered_repositories.csv"
216
+ df.to_csv(csv_file, index=False)
217
+ return df, csv_file
218
+
219
+
220
+
221
+ def fetch_org_repositories(org_names, language_filter, stars_min, stars_max, forks_min, forks_max, sort_order, total_repos):
222
+ try:
223
+ org_list = [org.strip() for org in org_names.split(",") if org.strip()]
224
+ if not org_list:
225
+ return pd.DataFrame(), "Enter at least one organization."
226
+
227
+ all_repos_data = []
228
+ for org in org_list:
229
+
230
+ query = f"user:{org} stars:{stars_min}..{stars_max} forks:{forks_min}..{forks_max}"
231
+ if language_filter:
232
+ query += f" language:{language_filter}"
233
+
234
+ repos = search_repositories(query=query, sort=sort_order, total_repos=total_repos)
235
+
236
+ for repo in repos:
237
+ repo_data = {
238
+ "Name": repo["name"],
239
+ "Owner": repo["owner"]["login"],
240
+ "Stars": repo["stargazers_count"],
241
+ "Forks": repo["forks_count"],
242
+ "Description": repo.get("description", "N/A"),
243
+ }
244
+ repo_data.update(calculate_additional_metrics(repo))
245
+ all_repos_data.append(repo_data)
246
+
247
+ if not all_repos_data:
248
+ return pd.DataFrame(), "No repositories found for the specified organizations."
249
+
250
+
251
+ df = pd.DataFrame(all_repos_data)
252
+ csv_file = "organization_repositories.csv"
253
+ df.to_csv(csv_file, index=False)
254
+ return df, csv_file
255
+
256
+ except Exception as e:
257
+ print(f"Error in fetch_org_repositories: {e}")
258
+ return pd.DataFrame(), f"Error: {str(e)}"
259
+
260
+
261
+ def get_discovered_repos():
262
+ global discovered_repos
263
+ return discovered_repos
264
+
265
+ def process_readme(owner, repo, branch):
266
+
267
+ url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/README.md"
268
+ response = requests.get(url, headers=HEADERS)
269
+ if response.status_code == 200:
270
+ readme_content = response.text
271
+ else:
272
+
273
+ return f"Failed to fetch README content from branch {branch}.", "", "", None
274
+
275
+
276
+ MODEL = "gpt-4o-mini"
277
+
278
+ completion = client.chat.completions.create(
279
+ model=MODEL,
280
+ messages=[
281
+ {"role": "system", "content": "You are a helpful assistant that extracts keywords, named entities, and generates summaries from text."},
282
+ {"role": "user", "content": f"""
283
+ Perform the following tasks on the following README file:
284
+ 1. Extract the top 25 most important keywords from the text only.
285
+ 2. Extract All Major named entities (e.g., people, organizations, technologies).
286
+ 3. Summarize the content in one paragraph.
287
+
288
+ Return the results in the following JSON format:
289
+ {{
290
+ "keywords": ["keyword1", "keyword2", ...],
291
+ "entities": ["entity1", "entity2", ...],
292
+ "summary": "A concise summary of the README."
293
+ }}
294
+
295
+ README file:
296
+ {readme_content}
297
+ """}
298
+ ],
299
+ response_format={"type": "json_object"}
300
+ )
301
+
302
+ result = completion.choices[0].message.content
303
+ result_json = json.loads(result)
304
+
305
+ keywords = ", ".join(result_json["keywords"])
306
+ entities = ", ".join(result_json["entities"])
307
+ summary = result_json["summary"]
308
+
309
+
310
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(keywords)
311
+ plt.figure(figsize=(10, 5))
312
+ plt.imshow(wordcloud, interpolation='bilinear')
313
+ plt.axis('off')
314
+
315
+ return keywords, entities, summary, plt
316
+
317
+
318
+ def get_branches(owner, repo):
319
+ url = f"https://api.github.com/repos/{owner}/{repo}/branches"
320
+ response = requests.get(url, headers=HEADERS)
321
+ if response.status_code == 200:
322
+ branches = [branch["name"] for branch in response.json()]
323
+ return branches
324
+ else:
325
+ return []
326
+
327
+
328
+ def get_default_branch(owner, repo):
329
+ url = f"https://api.github.com/repos/{owner}/{repo}"
330
+ response = requests.get(url, headers=HEADERS)
331
+ if response.status_code == 200:
332
+ repo_data = response.json()
333
+ return repo_data["default_branch"]
334
+ else:
335
+ return None
336
+
337
+ def fetch_files(owner, repo, path=""):
338
+
339
+
340
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" if path else f"https://api.github.com/repos/{owner}/{repo}/contents"
341
+ response = requests.get(url, headers=HEADERS)
342
+
343
+ if response.status_code != 200:
344
+ return f"Failed to fetch files: {response.status_code}", []
345
+
346
+ files = []
347
+ for item in response.json():
348
+ if item["type"] == "file": # Only add files
349
+
350
+ if any(item["name"].endswith(ext) for ext in ALLOWED_EXTENSIONS):
351
+ files.append({
352
+ "name": item["name"],
353
+ "path": item["path"],
354
+ "download_url": item["download_url"]
355
+ })
356
+ elif item["type"] == "dir":
357
+
358
+ sub_files = fetch_files(owner, repo, item["path"])
359
+ files.extend(sub_files)
360
+ return files
361
+
362
+
363
+
364
+ def fetch_file_content(owner, repo, branch, file_path):
365
+ file_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{file_path}"
366
+ response = requests.get(file_url)
367
+
368
+ if response.status_code == 200:
369
+ return response.text
370
+ else:
371
+ return f"Failed to fetch file content: {response.status_code}"
372
+
373
+
374
+ def ask_code_question(code_content, question):
375
+ if not code_content.strip():
376
+ return "No code content available to analyze."
377
+ if not question.strip():
378
+ return "Please enter a question about the code."
379
+
380
+
381
+ prompt = f"""
382
+ Here is a Python file from a GitHub repository:
383
+
384
+ {code_content}
385
+
386
+ Please answer the following question about this file:
387
+ - {question}
388
+ """
389
+
390
+ try:
391
+
392
+ response = client.chat.completions.create(
393
+ model="gpt-4o-mini",
394
+ messages=[
395
+ {"role": "system", "content": "You are a helpful assistant skilled in understanding code."},
396
+ {"role": "user", "content": prompt}
397
+ ]
398
+ )
399
+
400
+ return response.choices[0].message.content.strip()
401
+ except Exception as e:
402
+ return f"Error querying the LLM: {str(e)}"
403
+
404
+
405
+ def upload_image_to_imgur(image_path):
406
+ """
407
+ Upload an image to Imgur and return the hosted URL.
408
+
409
+ Args:
410
+ image_path (str): Path to the image file to upload.
411
+
412
+ Returns:
413
+ str: The URL of the uploaded image or an error message.
414
+ """
415
+ url = "https://api.imgur.com/3/image"
416
+ headers = {
417
+ "Authorization": f"Client-ID {IMGUR_CLIENT_ID}"
418
+ }
419
+ with open(image_path, "rb") as image_file:
420
+ payload = {
421
+ "image": image_file,
422
+ "type": "file"
423
+ }
424
+ try:
425
+ response = requests.post(url, headers=headers, files=payload)
426
+ if response.status_code == 200:
427
+ data = response.json()
428
+ return data["data"]["link"] # URL of the uploaded image
429
+ else:
430
+ return f"Failed to upload image. Status code: {response.status_code}, Response: {response.text}"
431
+ except Exception as e:
432
+ return f"Error uploading image to Imgur: {str(e)}"
433
+
434
+ def generate_dot_code_from_code(code_content, diagram_type):
435
+ if not code_content.strip():
436
+ return "No code content available to analyze."
437
+
438
+
439
+ prompt = f"""
440
+ Here is some Python code from a GitHub repository:
441
+
442
+ {code_content}
443
+
444
+ Please generate a {diagram_type} for this code in Graphviz DOT/digraph format. Ensure the DOT code is valid and renderable.
445
+ Don't include any other text. Don't provide any other explanatory commentary.
446
+ Ensure the DOT code includes all necessary opening and closing brackets {"brackets"} for graphs and subgraphs.
447
+ """
448
+
449
+ try:
450
+
451
+ response = client.chat.completions.create(
452
+ model="gpt-4o",
453
+ messages=[
454
+ {"role": "system", "content": "You are a helpful assistant that generates Graphviz DOT code for visualizing Python code. You are restricted to only generate Graphviz Code starting with digraph & ending with }"},
455
+ {"role": "user", "content": prompt}
456
+ ]
457
+ )
458
+ raw_dot_code = response.choices[0].message.content.strip()
459
+ validated_dot_code = validate_and_fix_dot_code(raw_dot_code) # Fix any missing brackets
460
+
461
+ pattern = r"digraph\b[\s\S]*?^\}"
462
+ match = re.search(pattern, validated_dot_code,re.MULTILINE | re.DOTALL)
463
+ if match:
464
+ validated_dot_code = match.group(0) # Extract the matched content
465
+ else:
466
+ return "Failed to extract valid Graphviz code."
467
+
468
+ return validated_dot_code
469
+ except Exception as e:
470
+ return f"Error querying GPT-4o-mini: {str(e)}"
471
+
472
+ def validate_and_fix_dot_code(dot_code):
473
+
474
+ open_brackets = dot_code.count("{")
475
+ close_brackets = dot_code.count("}")
476
+
477
+ if open_brackets > close_brackets:
478
+ missing_brackets = open_brackets - close_brackets
479
+ dot_code += "}" * missing_brackets
480
+
481
+ return dot_code
482
 
483
  def render_dot_code(dot_code, filename=None):
484
  """
 
516
 
517
  except Exception as e:
518
  print(f"Error rendering or uploading diagram: {e}")
519
+ return f"Error rendering or uploading diagram: {e}"
520
+
521
+ import time
522
+
523
+ def handle_generate_diagram(code_content, diagram_type, retries=5, wait_time=1):
524
+
525
+ imgur_url = render_dot_code(generate_dot_code_from_code(code_content, diagram_type))
526
+ if imgur_url.startswith("http"): # Check if the response is a valid URL
527
+ return f'<img src="{imgur_url}" alt="Generated Diagram" style="max-width: 100%; height: auto;">'
528
+ else:
529
+ return f"<p>Error: {imgur_url}</p>" # Return the error message in HTML format
530
+
531
+
532
+ # Gradio Interface
533
+ with gr.Blocks() as demo:
534
+ # Tab 1: Repository Discovery
535
+ with gr.Tab("Repository Discovery"):
536
+ with gr.Row():
537
+ topics_input = gr.Textbox(
538
+ label="Topics (comma-separated, leave empty to fetch by date only)",
539
+ placeholder="e.g., machine-learning, deep-learning (leave empty for date-based search)"
540
+ )
541
+ similar_topics = gr.Textbox(
542
+ label="Similar Topics (based on embeddings)",
543
+ interactive=False
544
+ )
545
+ gr.Button("Get Similar Topics").click(
546
+ search_similar_topics,
547
+ inputs=[topics_input],
548
+ outputs=[similar_topics]
549
+ )
550
+
551
+ with gr.Row():
552
+ start_date_input = gr.Textbox(
553
+ label="Start Date (YYYY-MM-DD, leave empty if not filtering by date)",
554
+ placeholder="Set to filter recent repositories by date or leave empty"
555
+ )
556
+ language_filter = gr.Dropdown(
557
+ choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
558
+ label="Language Filter",
559
+ value=""
560
+ )
561
+ stars_min = gr.Number(label="Stars Min", value=10)
562
+ stars_max = gr.Number(label="Stars Max", value=1000)
563
+ with gr.Row():
564
+ forks_min = gr.Number(label="Forks Min", value=0)
565
+ forks_max = gr.Number(label="Forks Max", value=500)
566
+ total_repos = gr.Number(label="Total Repositories", value=10, step=10)
567
+ sort_order = gr.Dropdown(
568
+ choices=["stars", "forks", "updated"],
569
+ label="Sort Order",
570
+ value="stars"
571
+ )
572
+ with gr.Row():
573
+ output_data = gr.Dataframe(label="Discovered Repositories")
574
+ output_file = gr.File(label="Download CSV", file_count="single")
575
+ gr.Button("Discover Repositories").click(
576
+ gradio_interface,
577
+ inputs=[
578
+ topics_input, start_date_input, language_filter, stars_min, stars_max,
579
+ forks_min, forks_max, total_repos, sort_order
580
+ ],
581
+ outputs=[output_data, output_file]
582
+ )
583
+
584
+ # Tab 2: Organization Watch
585
+ with gr.Tab("Organization Watch"):
586
+ with gr.Row():
587
+ org_input = gr.Textbox(
588
+ label="Organizations (comma-separated)",
589
+ placeholder="e.g., facebookresearch, openai"
590
+ )
591
+ with gr.Row():
592
+ language_filter = gr.Dropdown(
593
+ choices=["", "Python", "JavaScript", "Java", "C++", "Ruby", "Go"],
594
+ label="Language Filter",
595
+ value=""
596
+ )
597
+ stars_min = gr.Number(label="Stars Min", value=10)
598
+ stars_max = gr.Number(label="Stars Max", value=1000)
599
+ with gr.Row():
600
+ forks_min = gr.Number(label="Forks Min", value=0)
601
+ forks_max = gr.Number(label="Forks Max", value=500)
602
+ total_repos = gr.Number(label="Total Repositories", value=10, step=10)
603
+ sort_order = gr.Dropdown(
604
+ choices=["stars", "forks", "updated"],
605
+ label="Sort Order",
606
+ value="stars"
607
+ )
608
+ with gr.Row():
609
+ output_data = gr.Dataframe(label="Repositories by Organizations")
610
+ output_file = gr.File(label="Download CSV", file_count="single")
611
+ gr.Button("Fetch Organization Repositories").click(
612
+ fetch_org_repositories,
613
+ inputs=[
614
+ org_input, language_filter, stars_min, stars_max, forks_min, forks_max,
615
+ sort_order, total_repos
616
+ ],
617
+ outputs=[output_data, output_file]
618
+ )
619
+
620
+ # Tab 3: Code Analysis
621
+
622
+ with gr.Tab("Code Analysis"):
623
+ with gr.Row():
624
+ repo_dropdown = gr.Dropdown(
625
+ label="Select Repository",
626
+ choices=[],
627
+ interactive=True
628
+ )
629
+ refresh_button = gr.Button("Refresh Repositories")
630
+ with gr.Row():
631
+ branch_dropdown = gr.Dropdown(
632
+ label="Select Branch",
633
+ choices=[],
634
+ interactive=True
635
+ )
636
+ with gr.Row():
637
+ keywords_output = gr.Textbox(label="Keywords")
638
+ entities_output = gr.Textbox(label="Entities")
639
+ with gr.Row():
640
+ summary_output = gr.Textbox(label="Summary")
641
+ wordcloud_output = gr.Plot(label="Word Cloud")
642
+
643
+
644
+ with gr.Row():
645
+ files_list = gr.Dropdown(
646
+ label="Files in Repository",
647
+ choices=[],
648
+ interactive=True
649
+ )
650
+
651
+ with gr.Row():
652
+ file_content_box = gr.Textbox(
653
+ label="File Content",
654
+ lines=20,
655
+ interactive=True
656
+ )
657
+
658
+
659
+
660
+ with gr.Row():
661
+ question_input = gr.Textbox(
662
+ label="Ask a Question",
663
+ placeholder="Enter your question about the code...",
664
+ lines=1
665
+ )
666
+ question_button = gr.Button("Get Answer")
667
+
668
+ with gr.Row():
669
+ answer_output = gr.Textbox(label="Bot's Answer", lines=10, interactive=False)
670
+
671
+
672
+ with gr.Row():
673
+ diagram_type = gr.Dropdown(
674
+ label="Select Diagram Type",
675
+ choices=["Call Graph", "Data Flow Diagram", "Sequence Diagram", "Class Diagram", "Component Diagram", "Workflow Diagram"],
676
+ value="Call Graph"
677
+ )
678
+ generate_diagram_button = gr.Button("Generate Diagram")
679
+ with gr.Row():
680
+
681
+ diagram_output = gr.HTML(
682
+ label="Generated Diagram",
683
+
684
+ )
685
+
686
+
687
+
688
+ question_button.click(
689
+ ask_code_question,
690
+ inputs=[file_content_box, question_input],
691
+ outputs=[answer_output]
692
+ )
693
+
694
+
695
+ def generate_and_render_diagram(code_content, diagram_type):
696
+
697
+ dot_code = generate_dot_code_from_code(code_content, diagram_type)
698
+
699
+
700
+ if not dot_code.strip().startswith("digraph"):
701
+ return "Invalid DOT code generated."
702
+
703
+ unique_filename = f"diagram_{uuid.uuid4().hex}"
704
+ return render_dot_code(dot_code, filename=unique_filename)
705
+
706
+
707
+ generate_diagram_button.click(
708
+ handle_generate_diagram,
709
+ inputs=[file_content_box, diagram_type],
710
+ outputs=[diagram_output]
711
+ )
712
+
713
+
714
+ refresh_button.click(
715
+ lambda: gr.update(choices=get_discovered_repos()),
716
+ inputs=[],
717
+ outputs=[repo_dropdown]
718
+ )
719
+
720
+
721
+ def update_branches(repo):
722
+ if repo:
723
+ owner, repo_name = repo.split("/")
724
+ branches = get_branches(owner, repo_name)
725
+ default_branch = get_default_branch(owner, repo_name)
726
+ return gr.update(choices=branches, value=default_branch)
727
+ return gr.update(choices=[], value=None)
728
+
729
+ repo_dropdown.change(
730
+ update_branches,
731
+ inputs=[repo_dropdown],
732
+ outputs=[branch_dropdown]
733
+ )
734
+
735
+
736
+ def analyze_readme(repo, branch):
737
+ if repo and branch:
738
+ owner, repo_name = repo.split("/")
739
+
740
+ return process_readme(owner, repo_name, branch)
741
+ return "No repository or branch selected.", "", "", None
742
+
743
+ repo_dropdown.change(
744
+ analyze_readme,
745
+ inputs=[repo_dropdown, branch_dropdown],
746
+ outputs=[keywords_output, entities_output, summary_output, wordcloud_output]
747
+ )
748
+
749
+ branch_dropdown.change(
750
+ analyze_readme,
751
+ inputs=[repo_dropdown, branch_dropdown],
752
+ outputs=[keywords_output, entities_output, summary_output, wordcloud_output]
753
+ )
754
+
755
+
756
+ def update_files(repo):
757
+ global files_data
758
+ if repo:
759
+ owner, repo_name = repo.split("/")
760
+
761
+ files = fetch_files(owner, repo_name)
762
+ files_data = files
763
+ file_names = [f"{file['name']} ({file['path']})" for file in files]
764
+ return gr.update(choices=file_names, value=None)
765
+ files_data = []
766
+ return gr.update(choices=[], value=None)
767
+
768
+
769
+
770
+ repo_dropdown.change(
771
+ lambda repo: update_files(repo),
772
+ inputs=[repo_dropdown],
773
+ outputs=[files_list]
774
+ )
775
+
776
+
777
+ def display_file_content(repo, branch, selected_file):
778
+ if repo and branch and selected_file:
779
+ owner, repo_name = repo.split("/")
780
+ file_path = selected_file.split(" (")[1][:-1]
781
+ content = fetch_file_content(owner, repo_name, branch, file_path)
782
+ return content
783
+ return "No file selected."
784
+
785
+ files_list.change(
786
+ display_file_content,
787
+ inputs=[repo_dropdown, branch_dropdown, files_list],
788
+ outputs=[file_content_box]
789
+ )
790
+
791
+
792
+
793
+ #demo.launch()
794
+ #demo.launch(share=True, server_name="0.0.0.0", server_port=7860, static_dirs={"images": "./images"})
795
+ demo.launch(share=True)