Spaces:
Sleeping
Sleeping
""" | |
Module which updates any of the issues to reflect changes in the issue state | |
""" | |
import json | |
import datetime | |
from defaults import TOKEN, OWNER, REPO | |
GITHUB_API_VERSION = "2022-11-28" | |
# Get the issues that have been updated since the last update | |
import json | |
import argparse | |
import requests | |
import os | |
import numpy as np | |
import json | |
import datetime | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") | |
OWNER = "huggingface" | |
REPO = "transformers" | |
GITHUB_API_VERSION = "2022-11-28" | |
TOKEN = os.environ.get("GITHUB_TOKEN") | |
JSON_FILE = f"issues.json" | |
def get_issues( | |
input_filename=JSON_FILE, | |
output_filename=JSON_FILE, | |
github_api_version=GITHUB_API_VERSION, | |
owner=OWNER, | |
repo=REPO, | |
token=TOKEN, | |
n_pages=-1, | |
): | |
""" | |
Function to get the issues from the transformers repo and save them to a json file | |
""" | |
with open("issues_dict.json", "r") as f: | |
issues = json.load(f) | |
# Get most recent updated at information | |
updated_at = [issue["updated_at"] for issue in issues.values()] | |
most_recent = max(updated_at) | |
# If file exists and we want to overwrite it, delete it | |
if not os.path.exists(output_filename): | |
raise ValueError(f"File {output_filename} does not exist") | |
# Define the URL and headers | |
url = f"https://api.github.com/repos/{owner}/{repo}/issues" | |
headers = { | |
"Accept": "application/vnd.github+json", | |
f"Authorization": f"{token}", | |
"X-GitHub-Api-Version": f"{github_api_version}", | |
"User-Agent": "amyeroberts", | |
} | |
per_page = 100 | |
page = 1 | |
query_params = { | |
"state": "all", | |
"since": "2024-02-01T11:33:35Z", | |
# "since": most_recent, | |
"sort": "created", | |
"direction": "asc", | |
"page": page, | |
} | |
new_lines = [] | |
page_limit = (n_pages + page) if n_pages > 0 else np.inf | |
while True: | |
if page >= page_limit: | |
break | |
# Send the GET request | |
response = requests.get(url, headers=headers, params=query_params) | |
if not response.status_code == 200: | |
raise ValueError( | |
f"Request failed with status code {response.status_code} and message {response.text}" | |
) | |
json_response = response.json() | |
logger.info(f"Page: {page}, number of issues: {len(json_response)}") | |
# If we get an empty response, we've reached the end of the issues | |
if len(json_response) == 0: | |
break | |
new_lines.extend(json_response) | |
# If we get less than the number of issues per page, we've reached the end of the issues | |
if len(json_response) < per_page: | |
break | |
page += 1 | |
query_params["page"] = page | |
issue_lines_map = {issue["number"]: issue for issue in new_lines} | |
with open(input_filename, "r") as f: | |
with open("tmp_" + output_filename, "a") as g: | |
for line in f: | |
issue = json.loads(line) | |
number = issue["number"] | |
if number in issue_lines_map: | |
g.write(json.dumps(issue_lines_map[number])) | |
g.write("\n") | |
else: | |
g.write(line) | |
os.rename("tmp_" + output_filename, output_filename) | |
with open("updated_issues.json", "w") as f: | |
json.dump(issue_lines_map, f, indent=4, sort_keys=True) | |
return output_filename | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--input_filename", type=str, default=JSON_FILE) | |
parser.add_argument("--output_filename", type=str, default=JSON_FILE) | |
parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION) | |
parser.add_argument("--owner", type=str, default=OWNER) | |
parser.add_argument("--repo", type=str, default=REPO) | |
parser.add_argument("--token", type=str, default=TOKEN) | |
parser.add_argument("--n_pages", type=int, default=-1) | |
args = parser.parse_args() | |
get_issues(**vars(args)) | |