811f151d8b0d3af6d25a9f65c3c549ea5171b7c7629ddb0b24d15e4a82c3fa05
Browse files- base_results.json +18 -0
- plots.png +0 -0
- smashed_results.json +18 -0
base_results.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"current_gpu_type": "NVIDIA A10G",
|
3 |
+
"current_gpu_total_memory": 22716.5,
|
4 |
+
"memory_inference_first": 9396.0,
|
5 |
+
"memory_inference": 9396.0,
|
6 |
+
"token_generation_latency_sync": 41.4751594543457,
|
7 |
+
"token_generation_latency_async": 41.440220922231674,
|
8 |
+
"token_generation_throughput_sync": 0.024110817490666007,
|
9 |
+
"token_generation_throughput_async": 0.024131145484881433,
|
10 |
+
"token_generation_CO2_emissions": 2.7722372785106468e-06,
|
11 |
+
"token_generation_energy_consumption": 0.0013460853224844924,
|
12 |
+
"inference_latency_sync": 34.25094375610352,
|
13 |
+
"inference_latency_async": 27.751636505126953,
|
14 |
+
"inference_throughput_sync": 0.02919627579084737,
|
15 |
+
"inference_throughput_async": 0.036033911002518924,
|
16 |
+
"inference_CO2_emissions": 1.7203736862892723e-06,
|
17 |
+
"inference_energy_consumption": 9.17469136023194e-06
|
18 |
+
}
|
plots.png
ADDED
![]() |
smashed_results.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"current_gpu_type": "NVIDIA A10G",
|
3 |
+
"current_gpu_total_memory": 22716.5,
|
4 |
+
"memory_inference_first": 4286.0,
|
5 |
+
"memory_inference": 4208.0,
|
6 |
+
"token_generation_latency_sync": 64.72505264282226,
|
7 |
+
"token_generation_latency_async": 64.4813310354948,
|
8 |
+
"token_generation_throughput_sync": 0.015449968121592494,
|
9 |
+
"token_generation_throughput_async": 0.015508364730398223,
|
10 |
+
"token_generation_CO2_emissions": 4.161721770576098e-06,
|
11 |
+
"token_generation_energy_consumption": 0.0031534097319298876,
|
12 |
+
"inference_latency_sync": 47.56480026245117,
|
13 |
+
"inference_latency_async": 47.13015556335449,
|
14 |
+
"inference_throughput_sync": 0.0210239503683867,
|
15 |
+
"inference_throughput_async": 0.02121783787994832,
|
16 |
+
"inference_CO2_emissions": 3.2781412719702414e-06,
|
17 |
+
"inference_energy_consumption": 1.0538887149448775e-05
|
18 |
+
}
|