|
{ |
|
"base_current_gpu_type": "NVIDIA A100-PCIE-40GB", |
|
"base_current_gpu_total_memory": 40339.3125, |
|
"base_memory_inference_first": 690.0, |
|
"base_memory_inference": 570.0, |
|
"base_token_generation_latency_sync": 25.232858657836914, |
|
"base_token_generation_latency_async": 25.168074667453766, |
|
"base_token_generation_throughput_sync": 0.03963086440423651, |
|
"base_token_generation_throughput_async": 0.03973287640048031, |
|
"base_token_generation_CO2_emissions": 6.916667409165086e-06, |
|
"base_token_generation_energy_consumption": 0.001975681904854854, |
|
"base_inference_latency_sync": 25.73680648803711, |
|
"base_inference_latency_async": 25.754165649414062, |
|
"base_inference_throughput_sync": 0.03885485949722692, |
|
"base_inference_throughput_async": 0.03882867003391939, |
|
"base_inference_CO2_emissions": 8.20508156037289e-06, |
|
"base_inference_energy_consumption": 1.885578995579329e-05, |
|
"smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", |
|
"smashed_current_gpu_total_memory": 40339.3125, |
|
"smashed_memory_inference_first": 104.0, |
|
"smashed_memory_inference": 106.0, |
|
"smashed_token_generation_latency_sync": 53.81842727661133, |
|
"smashed_token_generation_latency_async": 53.83266881108284, |
|
"smashed_token_generation_throughput_sync": 0.01858099633533113, |
|
"smashed_token_generation_throughput_async": 0.018576080697565642, |
|
"smashed_token_generation_CO2_emissions": 1.4030177588319765e-05, |
|
"smashed_token_generation_energy_consumption": 0.004235501136593081, |
|
"smashed_inference_latency_sync": 53.602509307861325, |
|
"smashed_inference_latency_async": 53.591203689575195, |
|
"smashed_inference_throughput_sync": 0.018655843036313607, |
|
"smashed_inference_throughput_async": 0.018659778679211203, |
|
"smashed_inference_CO2_emissions": 1.382929576343033e-05, |
|
"smashed_inference_energy_consumption": 3.627959529056468e-05 |
|
} |