mindef-overdracht/llm-throughput-tests-mindef-metadateren/results/results_openai_gpt-oss-120b_services/benchmark_results.json
2026-06-02 11:46:24 +02:00

1254 lines
30 KiB
JSON

{
"timestamp": "2026-01-22T14:46:31.276437",
"model_name": "openai/gpt-oss-120b",
"results": [
{
"config": {
"input_tokens": 1000,
"output_tokens": 512,
"batch_size": 1,
"num_batches": 1,
"total_requests": 1,
"actual_input_tokens": 1268
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 1,
"failed_requests": 0
},
"latency": {
"mean": 3.25,
"std": 0.0,
"min": 3.25,
"max": 3.25,
"p50": 3.25,
"p95": 3.25,
"p99": 3.25,
"ci_95_lower": 3.25,
"ci_95_upper": 3.25
},
"ttft": {
"mean": 1.228,
"std": 0.0,
"p50": 1.228,
"p90": 1.228
},
"tokens": {
"total_generated": 512,
"content_tokens": 512,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 157.56,
"concurrent_content_tps": 157.56,
"requests_per_second": 0.31,
"actual_wall_time": 3.25,
"efficiency_percent": 100.0
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 1.0,
"avg_batch_throughput": 157.56,
"min_batch_throughput": 157.56,
"max_batch_throughput": 157.56
}
},
{
"config": {
"input_tokens": 1000,
"output_tokens": 512,
"batch_size": 8,
"num_batches": 1,
"total_requests": 8,
"actual_input_tokens": 1268
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 8,
"failed_requests": 0
},
"latency": {
"mean": 6.513,
"std": 0.018,
"min": 6.467,
"max": 6.524,
"p50": 6.52,
"p95": 6.523,
"p99": 6.524,
"ci_95_lower": 6.501,
"ci_95_upper": 6.525
},
"ttft": {
"mean": 3.362,
"std": 1.481,
"p50": 2.592,
"p90": 5.281
},
"tokens": {
"total_generated": 4096,
"content_tokens": 4096,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 627.74,
"concurrent_content_tps": 627.74,
"requests_per_second": 1.23,
"actual_wall_time": 6.525,
"efficiency_percent": 99.81
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 8.0,
"avg_batch_throughput": 627.74,
"min_batch_throughput": 627.74,
"max_batch_throughput": 627.74
}
},
{
"config": {
"input_tokens": 1000,
"output_tokens": 512,
"batch_size": 16,
"num_batches": 1,
"total_requests": 16,
"actual_input_tokens": 1268
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 16,
"failed_requests": 0
},
"latency": {
"mean": 8.828,
"std": 0.013,
"min": 8.798,
"max": 8.844,
"p50": 8.825,
"p95": 8.842,
"p99": 8.844,
"ci_95_lower": 8.821,
"ci_95_upper": 8.834
},
"ttft": {
"mean": 3.498,
"std": 1.405,
"p50": 2.913,
"p90": 6.162
},
"tokens": {
"total_generated": 8192,
"content_tokens": 8192,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 925.82,
"concurrent_content_tps": 925.82,
"requests_per_second": 1.81,
"actual_wall_time": 8.848,
"efficiency_percent": 99.77
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 16.0,
"avg_batch_throughput": 925.82,
"min_batch_throughput": 925.82,
"max_batch_throughput": 925.82
}
},
{
"config": {
"input_tokens": 1000,
"output_tokens": 512,
"batch_size": 24,
"num_batches": 1,
"total_requests": 24,
"actual_input_tokens": 1268
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 24,
"failed_requests": 0
},
"latency": {
"mean": 10.38,
"std": 0.031,
"min": 10.299,
"max": 10.414,
"p50": 10.393,
"p95": 10.404,
"p99": 10.412,
"ci_95_lower": 10.367,
"ci_95_upper": 10.392
},
"ttft": {
"mean": 4.489,
"std": 2.111,
"p50": 3.683,
"p90": 8.168
},
"tokens": {
"total_generated": 12288,
"content_tokens": 12288,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1179.3,
"concurrent_content_tps": 1179.3,
"requests_per_second": 2.3,
"actual_wall_time": 10.42,
"efficiency_percent": 99.62
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 24.0,
"avg_batch_throughput": 1179.3,
"min_batch_throughput": 1179.3,
"max_batch_throughput": 1179.3
}
},
{
"config": {
"input_tokens": 1000,
"output_tokens": 512,
"batch_size": 32,
"num_batches": 1,
"total_requests": 32,
"actual_input_tokens": 1268
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 32,
"failed_requests": 0
},
"latency": {
"mean": 11.608,
"std": 0.043,
"min": 11.464,
"max": 11.642,
"p50": 11.632,
"p95": 11.64,
"p99": 11.641,
"ci_95_lower": 11.593,
"ci_95_upper": 11.623
},
"ttft": {
"mean": 4.908,
"std": 2.125,
"p50": 4.134,
"p90": 8.842
},
"tokens": {
"total_generated": 16384,
"content_tokens": 16384,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1405.72,
"concurrent_content_tps": 1405.72,
"requests_per_second": 2.75,
"actual_wall_time": 11.655,
"efficiency_percent": 99.6
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 32.0,
"avg_batch_throughput": 1405.72,
"min_batch_throughput": 1405.72,
"max_batch_throughput": 1405.72
}
},
{
"config": {
"input_tokens": 1000,
"output_tokens": 512,
"batch_size": 64,
"num_batches": 1,
"total_requests": 64,
"actual_input_tokens": 1268
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 64,
"failed_requests": 0
},
"latency": {
"mean": 15.496,
"std": 0.096,
"min": 15.255,
"max": 15.789,
"p50": 15.516,
"p95": 15.597,
"p99": 15.67,
"ci_95_lower": 15.473,
"ci_95_upper": 15.52
},
"ttft": {
"mean": 5.896,
"std": 2.086,
"p50": 5.354,
"p90": 8.428
},
"tokens": {
"total_generated": 32768,
"content_tokens": 32768,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 2072.97,
"concurrent_content_tps": 2072.97,
"requests_per_second": 4.05,
"actual_wall_time": 15.807,
"efficiency_percent": 98.03
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 64.0,
"avg_batch_throughput": 2072.97,
"min_batch_throughput": 2072.97,
"max_batch_throughput": 2072.97
}
},
{
"config": {
"input_tokens": 2500,
"output_tokens": 512,
"batch_size": 1,
"num_batches": 1,
"total_requests": 1,
"actual_input_tokens": 3053
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 1,
"failed_requests": 0
},
"latency": {
"mean": 3.44,
"std": 0.0,
"min": 3.44,
"max": 3.44,
"p50": 3.44,
"p95": 3.44,
"p99": 3.44,
"ci_95_lower": 3.44,
"ci_95_upper": 3.44
},
"ttft": {
"mean": 1.375,
"std": 0.0,
"p50": 1.375,
"p90": 1.375
},
"tokens": {
"total_generated": 512,
"content_tokens": 512,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 148.84,
"concurrent_content_tps": 148.84,
"requests_per_second": 0.29,
"actual_wall_time": 3.44,
"efficiency_percent": 100.0
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 1.0,
"avg_batch_throughput": 148.84,
"min_batch_throughput": 148.84,
"max_batch_throughput": 148.84
}
},
{
"config": {
"input_tokens": 2500,
"output_tokens": 512,
"batch_size": 8,
"num_batches": 1,
"total_requests": 8,
"actual_input_tokens": 3053
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 8,
"failed_requests": 0
},
"latency": {
"mean": 6.921,
"std": 0.006,
"min": 6.905,
"max": 6.927,
"p50": 6.922,
"p95": 6.927,
"p99": 6.927,
"ci_95_lower": 6.916,
"ci_95_upper": 6.925
},
"ttft": {
"mean": 2.777,
"std": 0.805,
"p50": 2.474,
"p90": 3.941
},
"tokens": {
"total_generated": 4096,
"content_tokens": 4096,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 591.17,
"concurrent_content_tps": 591.17,
"requests_per_second": 1.15,
"actual_wall_time": 6.929,
"efficiency_percent": 99.89
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 8.0,
"avg_batch_throughput": 591.17,
"min_batch_throughput": 591.17,
"max_batch_throughput": 591.17
}
},
{
"config": {
"input_tokens": 2500,
"output_tokens": 512,
"batch_size": 16,
"num_batches": 1,
"total_requests": 16,
"actual_input_tokens": 3053
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 16,
"failed_requests": 0
},
"latency": {
"mean": 9.419,
"std": 0.01,
"min": 9.404,
"max": 9.431,
"p50": 9.425,
"p95": 9.43,
"p99": 9.431,
"ci_95_lower": 9.414,
"ci_95_upper": 9.424
},
"ttft": {
"mean": 3.433,
"std": 0.733,
"p50": 3.253,
"p90": 4.342
},
"tokens": {
"total_generated": 8192,
"content_tokens": 8192,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 868.43,
"concurrent_content_tps": 868.43,
"requests_per_second": 1.7,
"actual_wall_time": 9.433,
"efficiency_percent": 99.85
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 16.0,
"avg_batch_throughput": 868.43,
"min_batch_throughput": 868.43,
"max_batch_throughput": 868.43
}
},
{
"config": {
"input_tokens": 2500,
"output_tokens": 512,
"batch_size": 24,
"num_batches": 1,
"total_requests": 24,
"actual_input_tokens": 3053
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 24,
"failed_requests": 0
},
"latency": {
"mean": 11.23,
"std": 0.054,
"min": 10.977,
"max": 11.251,
"p50": 11.245,
"p95": 11.25,
"p99": 11.25,
"ci_95_lower": 11.208,
"ci_95_upper": 11.252
},
"ttft": {
"mean": 3.744,
"std": 1.427,
"p50": 3.333,
"p90": 4.496
},
"tokens": {
"total_generated": 12288,
"content_tokens": 12288,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1091.4,
"concurrent_content_tps": 1091.4,
"requests_per_second": 2.13,
"actual_wall_time": 11.259,
"efficiency_percent": 99.74
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 24.0,
"avg_batch_throughput": 1091.4,
"min_batch_throughput": 1091.4,
"max_batch_throughput": 1091.4
}
},
{
"config": {
"input_tokens": 2500,
"output_tokens": 512,
"batch_size": 32,
"num_batches": 1,
"total_requests": 32,
"actual_input_tokens": 3053
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 32,
"failed_requests": 0
},
"latency": {
"mean": 12.53,
"std": 0.038,
"min": 12.424,
"max": 12.571,
"p50": 12.546,
"p95": 12.568,
"p99": 12.57,
"ci_95_lower": 12.517,
"ci_95_upper": 12.544
},
"ttft": {
"mean": 4.884,
"std": 1.795,
"p50": 4.274,
"p90": 6.106
},
"tokens": {
"total_generated": 16384,
"content_tokens": 16384,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1302.77,
"concurrent_content_tps": 1302.77,
"requests_per_second": 2.54,
"actual_wall_time": 12.576,
"efficiency_percent": 99.63
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 32.0,
"avg_batch_throughput": 1302.77,
"min_batch_throughput": 1302.77,
"max_batch_throughput": 1302.77
}
},
{
"config": {
"input_tokens": 2500,
"output_tokens": 512,
"batch_size": 64,
"num_batches": 1,
"total_requests": 64,
"actual_input_tokens": 3053
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 64,
"failed_requests": 0
},
"latency": {
"mean": 16.78,
"std": 0.07,
"min": 16.507,
"max": 16.953,
"p50": 16.778,
"p95": 16.934,
"p99": 16.95,
"ci_95_lower": 16.763,
"ci_95_upper": 16.797
},
"ttft": {
"mean": 6.451,
"std": 2.606,
"p50": 5.536,
"p90": 10.157
},
"tokens": {
"total_generated": 32768,
"content_tokens": 32768,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1931.74,
"concurrent_content_tps": 1931.74,
"requests_per_second": 3.77,
"actual_wall_time": 16.963,
"efficiency_percent": 98.92
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 64.0,
"avg_batch_throughput": 1931.74,
"min_batch_throughput": 1931.74,
"max_batch_throughput": 1931.74
}
},
{
"config": {
"input_tokens": 5000,
"output_tokens": 512,
"batch_size": 1,
"num_batches": 1,
"total_requests": 1,
"actual_input_tokens": 6024
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 1,
"failed_requests": 0
},
"latency": {
"mean": 3.725,
"std": 0.0,
"min": 3.725,
"max": 3.725,
"p50": 3.725,
"p95": 3.725,
"p99": 3.725,
"ci_95_lower": 3.725,
"ci_95_upper": 3.725
},
"ttft": {
"mean": 1.855,
"std": 0.0,
"p50": 1.855,
"p90": 1.855
},
"tokens": {
"total_generated": 512,
"content_tokens": 512,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 137.46,
"concurrent_content_tps": 137.46,
"requests_per_second": 0.27,
"actual_wall_time": 3.725,
"efficiency_percent": 100.0
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 1.0,
"avg_batch_throughput": 137.46,
"min_batch_throughput": 137.46,
"max_batch_throughput": 137.46
}
},
{
"config": {
"input_tokens": 5000,
"output_tokens": 512,
"batch_size": 8,
"num_batches": 1,
"total_requests": 8,
"actual_input_tokens": 6024
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 8,
"failed_requests": 0
},
"latency": {
"mean": 7.418,
"std": 0.058,
"min": 7.269,
"max": 7.448,
"p50": 7.444,
"p95": 7.447,
"p99": 7.448,
"ci_95_lower": 7.378,
"ci_95_upper": 7.458
},
"ttft": {
"mean": 3.301,
"std": 1.58,
"p50": 2.914,
"p90": 4.562
},
"tokens": {
"total_generated": 4096,
"content_tokens": 4096,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 549.89,
"concurrent_content_tps": 549.89,
"requests_per_second": 1.07,
"actual_wall_time": 7.449,
"efficiency_percent": 99.58
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 8.0,
"avg_batch_throughput": 549.89,
"min_batch_throughput": 549.89,
"max_batch_throughput": 549.89
}
},
{
"config": {
"input_tokens": 5000,
"output_tokens": 512,
"batch_size": 16,
"num_batches": 1,
"total_requests": 16,
"actual_input_tokens": 6024
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 16,
"failed_requests": 0
},
"latency": {
"mean": 9.992,
"std": 0.024,
"min": 9.937,
"max": 10.019,
"p50": 10.001,
"p95": 10.016,
"p99": 10.019,
"ci_95_lower": 9.98,
"ci_95_upper": 10.003
},
"ttft": {
"mean": 3.948,
"std": 1.636,
"p50": 3.491,
"p90": 5.599
},
"tokens": {
"total_generated": 8192,
"content_tokens": 8192,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 817.4,
"concurrent_content_tps": 817.4,
"requests_per_second": 1.6,
"actual_wall_time": 10.022,
"efficiency_percent": 99.7
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 16.0,
"avg_batch_throughput": 817.4,
"min_batch_throughput": 817.4,
"max_batch_throughput": 817.4
}
},
{
"config": {
"input_tokens": 5000,
"output_tokens": 512,
"batch_size": 24,
"num_batches": 1,
"total_requests": 24,
"actual_input_tokens": 6024
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 24,
"failed_requests": 0
},
"latency": {
"mean": 12.189,
"std": 0.038,
"min": 12.013,
"max": 12.21,
"p50": 12.197,
"p95": 12.209,
"p99": 12.21,
"ci_95_lower": 12.174,
"ci_95_upper": 12.204
},
"ttft": {
"mean": 4.238,
"std": 1.059,
"p50": 3.938,
"p90": 5.769
},
"tokens": {
"total_generated": 12288,
"content_tokens": 12288,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1005.93,
"concurrent_content_tps": 1005.93,
"requests_per_second": 1.96,
"actual_wall_time": 12.216,
"efficiency_percent": 99.78
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 24.0,
"avg_batch_throughput": 1005.93,
"min_batch_throughput": 1005.93,
"max_batch_throughput": 1005.93
}
},
{
"config": {
"input_tokens": 5000,
"output_tokens": 512,
"batch_size": 32,
"num_batches": 1,
"total_requests": 32,
"actual_input_tokens": 6024
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 32,
"failed_requests": 0
},
"latency": {
"mean": 13.535,
"std": 0.07,
"min": 13.146,
"max": 13.563,
"p50": 13.546,
"p95": 13.56,
"p99": 13.563,
"ci_95_lower": 13.511,
"ci_95_upper": 13.559
},
"ttft": {
"mean": 4.996,
"std": 1.647,
"p50": 4.524,
"p90": 6.854
},
"tokens": {
"total_generated": 16384,
"content_tokens": 16384,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1207.9,
"concurrent_content_tps": 1207.9,
"requests_per_second": 2.36,
"actual_wall_time": 13.564,
"efficiency_percent": 99.78
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 32.0,
"avg_batch_throughput": 1207.9,
"min_batch_throughput": 1207.9,
"max_batch_throughput": 1207.9
}
},
{
"config": {
"input_tokens": 5000,
"output_tokens": 512,
"batch_size": 64,
"num_batches": 1,
"total_requests": 64,
"actual_input_tokens": 6024
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 64,
"failed_requests": 0
},
"latency": {
"mean": 18.236,
"std": 0.071,
"min": 17.676,
"max": 18.258,
"p50": 18.245,
"p95": 18.257,
"p99": 18.258,
"ci_95_lower": 18.218,
"ci_95_upper": 18.253
},
"ttft": {
"mean": 6.521,
"std": 2.744,
"p50": 5.802,
"p90": 8.623
},
"tokens": {
"total_generated": 32768,
"content_tokens": 32768,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1793.73,
"concurrent_content_tps": 1793.73,
"requests_per_second": 3.5,
"actual_wall_time": 18.268,
"efficiency_percent": 99.82
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 64.0,
"avg_batch_throughput": 1793.73,
"min_batch_throughput": 1793.73,
"max_batch_throughput": 1793.73
}
},
{
"config": {
"input_tokens": 9000,
"output_tokens": 512,
"batch_size": 1,
"num_batches": 1,
"total_requests": 1,
"actual_input_tokens": 10777
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 1,
"failed_requests": 0
},
"latency": {
"mean": 4.17,
"std": 0.0,
"min": 4.17,
"max": 4.17,
"p50": 4.17,
"p95": 4.17,
"p99": 4.17,
"ci_95_lower": 4.17,
"ci_95_upper": 4.17
},
"ttft": {
"mean": 1.79,
"std": 0.0,
"p50": 1.79,
"p90": 1.79
},
"tokens": {
"total_generated": 512,
"content_tokens": 512,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 122.79,
"concurrent_content_tps": 122.79,
"requests_per_second": 0.24,
"actual_wall_time": 4.17,
"efficiency_percent": 100.0
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 1.0,
"avg_batch_throughput": 122.79,
"min_batch_throughput": 122.79,
"max_batch_throughput": 122.79
}
},
{
"config": {
"input_tokens": 9000,
"output_tokens": 512,
"batch_size": 8,
"num_batches": 1,
"total_requests": 8,
"actual_input_tokens": 10777
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 8,
"failed_requests": 0
},
"latency": {
"mean": 7.837,
"std": 0.011,
"min": 7.808,
"max": 7.846,
"p50": 7.84,
"p95": 7.845,
"p99": 7.846,
"ci_95_lower": 7.829,
"ci_95_upper": 7.845
},
"ttft": {
"mean": 2.73,
"std": 0.413,
"p50": 2.727,
"p90": 3.176
},
"tokens": {
"total_generated": 4096,
"content_tokens": 4096,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 521.94,
"concurrent_content_tps": 521.94,
"requests_per_second": 1.02,
"actual_wall_time": 7.848,
"efficiency_percent": 99.86
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 8.0,
"avg_batch_throughput": 521.94,
"min_batch_throughput": 521.94,
"max_batch_throughput": 521.94
}
},
{
"config": {
"input_tokens": 9000,
"output_tokens": 512,
"batch_size": 16,
"num_batches": 1,
"total_requests": 16,
"actual_input_tokens": 10777
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 16,
"failed_requests": 0
},
"latency": {
"mean": 10.825,
"std": 0.051,
"min": 10.645,
"max": 10.858,
"p50": 10.843,
"p95": 10.856,
"p99": 10.858,
"ci_95_lower": 10.8,
"ci_95_upper": 10.85
},
"ttft": {
"mean": 3.809,
"std": 0.481,
"p50": 3.923,
"p90": 4.335
},
"tokens": {
"total_generated": 8192,
"content_tokens": 8192,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 754.44,
"concurrent_content_tps": 754.44,
"requests_per_second": 1.47,
"actual_wall_time": 10.858,
"efficiency_percent": 99.69
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 16.0,
"avg_batch_throughput": 754.44,
"min_batch_throughput": 754.44,
"max_batch_throughput": 754.44
}
},
{
"config": {
"input_tokens": 9000,
"output_tokens": 512,
"batch_size": 24,
"num_batches": 1,
"total_requests": 24,
"actual_input_tokens": 10777
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 24,
"failed_requests": 0
},
"latency": {
"mean": 13.617,
"std": 0.082,
"min": 13.31,
"max": 13.728,
"p50": 13.61,
"p95": 13.726,
"p99": 13.727,
"ci_95_lower": 13.585,
"ci_95_upper": 13.65
},
"ttft": {
"mean": 5.393,
"std": 2.261,
"p50": 4.893,
"p90": 8.595
},
"tokens": {
"total_generated": 12288,
"content_tokens": 12288,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 895.03,
"concurrent_content_tps": 895.03,
"requests_per_second": 1.75,
"actual_wall_time": 13.729,
"efficiency_percent": 99.18
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 24.0,
"avg_batch_throughput": 895.03,
"min_batch_throughput": 895.03,
"max_batch_throughput": 895.03
}
},
{
"config": {
"input_tokens": 9000,
"output_tokens": 512,
"batch_size": 32,
"num_batches": 1,
"total_requests": 32,
"actual_input_tokens": 10777
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 32,
"failed_requests": 0
},
"latency": {
"mean": 15.071,
"std": 0.058,
"min": 14.788,
"max": 15.128,
"p50": 15.075,
"p95": 15.121,
"p99": 15.127,
"ci_95_lower": 15.051,
"ci_95_upper": 15.091
},
"ttft": {
"mean": 6.012,
"std": 2.11,
"p50": 5.568,
"p90": 9.381
},
"tokens": {
"total_generated": 16384,
"content_tokens": 16384,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1082.91,
"concurrent_content_tps": 1082.91,
"requests_per_second": 2.12,
"actual_wall_time": 15.13,
"efficiency_percent": 99.61
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 32.0,
"avg_batch_throughput": 1082.91,
"min_batch_throughput": 1082.91,
"max_batch_throughput": 1082.91
}
},
{
"config": {
"input_tokens": 9000,
"output_tokens": 512,
"batch_size": 64,
"num_batches": 1,
"total_requests": 64,
"actual_input_tokens": 10777
},
"success_metrics": {
"success_rate": 100.0,
"successful_requests": 64,
"failed_requests": 0
},
"latency": {
"mean": 20.583,
"std": 0.133,
"min": 19.9,
"max": 20.765,
"p50": 20.581,
"p95": 20.731,
"p99": 20.762,
"ci_95_lower": 20.55,
"ci_95_upper": 20.615
},
"ttft": {
"mean": 7.616,
"std": 2.21,
"p50": 7.112,
"p90": 9.38
},
"tokens": {
"total_generated": 32768,
"content_tokens": 32768,
"reasoning_tokens": 0,
"avg_per_request": 512.0
},
"throughput": {
"concurrent_total_tps": 1575.94,
"concurrent_content_tps": 1575.94,
"requests_per_second": 3.08,
"actual_wall_time": 20.793,
"efficiency_percent": 98.99
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 64.0,
"avg_batch_throughput": 1575.94,
"min_batch_throughput": 1575.94,
"max_batch_throughput": 1575.94
}
}
]
}