import gradio as gr
import pandas as pd
from accelerate.utils import convert_bytes
from hub_utils import check_for_discussion, report_results
from huggingface_hub.utils import HfHubHTTPError
from model_utils import calculate_memory, get_model
def get_results(model_name: str, library: str, options: list, access_token: str):
model = get_model(model_name, library, access_token)
# try:
# has_discussion = check_for_discussion(model_name)
# except HfHubHTTPError:
# has_discussion = True
title = f"## Memory usage for '{model_name}'"
data = calculate_memory(model, options)
stages = {"model": [], "gradients": [], "optimizer": [], "step": []}
for i, option in enumerate(data):
for stage in stages:
stages[stage].append(option["Training using Adam (Peak vRAM)"][stage])
value = max(data[i]["Training using Adam (Peak vRAM)"].values())
if value == -1:
value = "N/A"
else:
value = convert_bytes(value)
data[i]["Training using Adam (Peak vRAM)"] = value
if any(value != -1 for value in stages["model"]):
out_explain = "## Training using Adam explained:\n"
out_explain += "When training on a batch size of 1, each stage of the training process is expected to have near the following memory results for each precision you selected:\n"
memory_values = pd.DataFrame(
columns=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"]
)
for i, dtype in enumerate(options):
if stages["model"][i] != -1:
memory_values.loc[len(memory_values.index)] = [
dtype,
convert_bytes(stages["model"][i]),
convert_bytes(stages["gradients"][i]),
convert_bytes(stages["optimizer"][i]),
convert_bytes(stages["step"][i]),
]
return [
title,
gr.update(visible=True, value=pd.DataFrame(data)),
gr.update(visible=True, value=out_explain),
gr.update(visible=True, value=memory_values),
]
else:
return [
title,
gr.update(visible=True, value=pd.DataFrame(data)),
gr.update(visible=False, value=""),
gr.update(visible=False, value=pd.DataFrame()),
]
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(
"""