1254 lines
30 KiB
JSON
1254 lines
30 KiB
JSON
{
|
|
"timestamp": "2026-01-22T14:46:31.276437",
|
|
"model_name": "openai/gpt-oss-120b",
|
|
"results": [
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 1,
|
|
"total_requests": 1,
|
|
"actual_input_tokens": 1268
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 1,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 3.25,
|
|
"std": 0.0,
|
|
"min": 3.25,
|
|
"max": 3.25,
|
|
"p50": 3.25,
|
|
"p95": 3.25,
|
|
"p99": 3.25,
|
|
"ci_95_lower": 3.25,
|
|
"ci_95_upper": 3.25
|
|
},
|
|
"ttft": {
|
|
"mean": 1.228,
|
|
"std": 0.0,
|
|
"p50": 1.228,
|
|
"p90": 1.228
|
|
},
|
|
"tokens": {
|
|
"total_generated": 512,
|
|
"content_tokens": 512,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 157.56,
|
|
"concurrent_content_tps": 157.56,
|
|
"requests_per_second": 0.31,
|
|
"actual_wall_time": 3.25,
|
|
"efficiency_percent": 100.0
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 157.56,
|
|
"min_batch_throughput": 157.56,
|
|
"max_batch_throughput": 157.56
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 1,
|
|
"total_requests": 8,
|
|
"actual_input_tokens": 1268
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 8,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 6.513,
|
|
"std": 0.018,
|
|
"min": 6.467,
|
|
"max": 6.524,
|
|
"p50": 6.52,
|
|
"p95": 6.523,
|
|
"p99": 6.524,
|
|
"ci_95_lower": 6.501,
|
|
"ci_95_upper": 6.525
|
|
},
|
|
"ttft": {
|
|
"mean": 3.362,
|
|
"std": 1.481,
|
|
"p50": 2.592,
|
|
"p90": 5.281
|
|
},
|
|
"tokens": {
|
|
"total_generated": 4096,
|
|
"content_tokens": 4096,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 627.74,
|
|
"concurrent_content_tps": 627.74,
|
|
"requests_per_second": 1.23,
|
|
"actual_wall_time": 6.525,
|
|
"efficiency_percent": 99.81
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 627.74,
|
|
"min_batch_throughput": 627.74,
|
|
"max_batch_throughput": 627.74
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 16,
|
|
"num_batches": 1,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 1268
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 8.828,
|
|
"std": 0.013,
|
|
"min": 8.798,
|
|
"max": 8.844,
|
|
"p50": 8.825,
|
|
"p95": 8.842,
|
|
"p99": 8.844,
|
|
"ci_95_lower": 8.821,
|
|
"ci_95_upper": 8.834
|
|
},
|
|
"ttft": {
|
|
"mean": 3.498,
|
|
"std": 1.405,
|
|
"p50": 2.913,
|
|
"p90": 6.162
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 925.82,
|
|
"concurrent_content_tps": 925.82,
|
|
"requests_per_second": 1.81,
|
|
"actual_wall_time": 8.848,
|
|
"efficiency_percent": 99.77
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 16.0,
|
|
"avg_batch_throughput": 925.82,
|
|
"min_batch_throughput": 925.82,
|
|
"max_batch_throughput": 925.82
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 24,
|
|
"num_batches": 1,
|
|
"total_requests": 24,
|
|
"actual_input_tokens": 1268
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 24,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 10.38,
|
|
"std": 0.031,
|
|
"min": 10.299,
|
|
"max": 10.414,
|
|
"p50": 10.393,
|
|
"p95": 10.404,
|
|
"p99": 10.412,
|
|
"ci_95_lower": 10.367,
|
|
"ci_95_upper": 10.392
|
|
},
|
|
"ttft": {
|
|
"mean": 4.489,
|
|
"std": 2.111,
|
|
"p50": 3.683,
|
|
"p90": 8.168
|
|
},
|
|
"tokens": {
|
|
"total_generated": 12288,
|
|
"content_tokens": 12288,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1179.3,
|
|
"concurrent_content_tps": 1179.3,
|
|
"requests_per_second": 2.3,
|
|
"actual_wall_time": 10.42,
|
|
"efficiency_percent": 99.62
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 24.0,
|
|
"avg_batch_throughput": 1179.3,
|
|
"min_batch_throughput": 1179.3,
|
|
"max_batch_throughput": 1179.3
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 1,
|
|
"total_requests": 32,
|
|
"actual_input_tokens": 1268
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 32,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 11.608,
|
|
"std": 0.043,
|
|
"min": 11.464,
|
|
"max": 11.642,
|
|
"p50": 11.632,
|
|
"p95": 11.64,
|
|
"p99": 11.641,
|
|
"ci_95_lower": 11.593,
|
|
"ci_95_upper": 11.623
|
|
},
|
|
"ttft": {
|
|
"mean": 4.908,
|
|
"std": 2.125,
|
|
"p50": 4.134,
|
|
"p90": 8.842
|
|
},
|
|
"tokens": {
|
|
"total_generated": 16384,
|
|
"content_tokens": 16384,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1405.72,
|
|
"concurrent_content_tps": 1405.72,
|
|
"requests_per_second": 2.75,
|
|
"actual_wall_time": 11.655,
|
|
"efficiency_percent": 99.6
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 1405.72,
|
|
"min_batch_throughput": 1405.72,
|
|
"max_batch_throughput": 1405.72
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 1,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 1268
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 15.496,
|
|
"std": 0.096,
|
|
"min": 15.255,
|
|
"max": 15.789,
|
|
"p50": 15.516,
|
|
"p95": 15.597,
|
|
"p99": 15.67,
|
|
"ci_95_lower": 15.473,
|
|
"ci_95_upper": 15.52
|
|
},
|
|
"ttft": {
|
|
"mean": 5.896,
|
|
"std": 2.086,
|
|
"p50": 5.354,
|
|
"p90": 8.428
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 2072.97,
|
|
"concurrent_content_tps": 2072.97,
|
|
"requests_per_second": 4.05,
|
|
"actual_wall_time": 15.807,
|
|
"efficiency_percent": 98.03
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 64.0,
|
|
"avg_batch_throughput": 2072.97,
|
|
"min_batch_throughput": 2072.97,
|
|
"max_batch_throughput": 2072.97
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 2500,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 1,
|
|
"total_requests": 1,
|
|
"actual_input_tokens": 3053
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 1,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 3.44,
|
|
"std": 0.0,
|
|
"min": 3.44,
|
|
"max": 3.44,
|
|
"p50": 3.44,
|
|
"p95": 3.44,
|
|
"p99": 3.44,
|
|
"ci_95_lower": 3.44,
|
|
"ci_95_upper": 3.44
|
|
},
|
|
"ttft": {
|
|
"mean": 1.375,
|
|
"std": 0.0,
|
|
"p50": 1.375,
|
|
"p90": 1.375
|
|
},
|
|
"tokens": {
|
|
"total_generated": 512,
|
|
"content_tokens": 512,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 148.84,
|
|
"concurrent_content_tps": 148.84,
|
|
"requests_per_second": 0.29,
|
|
"actual_wall_time": 3.44,
|
|
"efficiency_percent": 100.0
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 148.84,
|
|
"min_batch_throughput": 148.84,
|
|
"max_batch_throughput": 148.84
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 2500,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 1,
|
|
"total_requests": 8,
|
|
"actual_input_tokens": 3053
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 8,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 6.921,
|
|
"std": 0.006,
|
|
"min": 6.905,
|
|
"max": 6.927,
|
|
"p50": 6.922,
|
|
"p95": 6.927,
|
|
"p99": 6.927,
|
|
"ci_95_lower": 6.916,
|
|
"ci_95_upper": 6.925
|
|
},
|
|
"ttft": {
|
|
"mean": 2.777,
|
|
"std": 0.805,
|
|
"p50": 2.474,
|
|
"p90": 3.941
|
|
},
|
|
"tokens": {
|
|
"total_generated": 4096,
|
|
"content_tokens": 4096,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 591.17,
|
|
"concurrent_content_tps": 591.17,
|
|
"requests_per_second": 1.15,
|
|
"actual_wall_time": 6.929,
|
|
"efficiency_percent": 99.89
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 591.17,
|
|
"min_batch_throughput": 591.17,
|
|
"max_batch_throughput": 591.17
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 2500,
|
|
"output_tokens": 512,
|
|
"batch_size": 16,
|
|
"num_batches": 1,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 3053
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 9.419,
|
|
"std": 0.01,
|
|
"min": 9.404,
|
|
"max": 9.431,
|
|
"p50": 9.425,
|
|
"p95": 9.43,
|
|
"p99": 9.431,
|
|
"ci_95_lower": 9.414,
|
|
"ci_95_upper": 9.424
|
|
},
|
|
"ttft": {
|
|
"mean": 3.433,
|
|
"std": 0.733,
|
|
"p50": 3.253,
|
|
"p90": 4.342
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 868.43,
|
|
"concurrent_content_tps": 868.43,
|
|
"requests_per_second": 1.7,
|
|
"actual_wall_time": 9.433,
|
|
"efficiency_percent": 99.85
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 16.0,
|
|
"avg_batch_throughput": 868.43,
|
|
"min_batch_throughput": 868.43,
|
|
"max_batch_throughput": 868.43
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 2500,
|
|
"output_tokens": 512,
|
|
"batch_size": 24,
|
|
"num_batches": 1,
|
|
"total_requests": 24,
|
|
"actual_input_tokens": 3053
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 24,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 11.23,
|
|
"std": 0.054,
|
|
"min": 10.977,
|
|
"max": 11.251,
|
|
"p50": 11.245,
|
|
"p95": 11.25,
|
|
"p99": 11.25,
|
|
"ci_95_lower": 11.208,
|
|
"ci_95_upper": 11.252
|
|
},
|
|
"ttft": {
|
|
"mean": 3.744,
|
|
"std": 1.427,
|
|
"p50": 3.333,
|
|
"p90": 4.496
|
|
},
|
|
"tokens": {
|
|
"total_generated": 12288,
|
|
"content_tokens": 12288,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1091.4,
|
|
"concurrent_content_tps": 1091.4,
|
|
"requests_per_second": 2.13,
|
|
"actual_wall_time": 11.259,
|
|
"efficiency_percent": 99.74
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 24.0,
|
|
"avg_batch_throughput": 1091.4,
|
|
"min_batch_throughput": 1091.4,
|
|
"max_batch_throughput": 1091.4
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 2500,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 1,
|
|
"total_requests": 32,
|
|
"actual_input_tokens": 3053
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 32,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 12.53,
|
|
"std": 0.038,
|
|
"min": 12.424,
|
|
"max": 12.571,
|
|
"p50": 12.546,
|
|
"p95": 12.568,
|
|
"p99": 12.57,
|
|
"ci_95_lower": 12.517,
|
|
"ci_95_upper": 12.544
|
|
},
|
|
"ttft": {
|
|
"mean": 4.884,
|
|
"std": 1.795,
|
|
"p50": 4.274,
|
|
"p90": 6.106
|
|
},
|
|
"tokens": {
|
|
"total_generated": 16384,
|
|
"content_tokens": 16384,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1302.77,
|
|
"concurrent_content_tps": 1302.77,
|
|
"requests_per_second": 2.54,
|
|
"actual_wall_time": 12.576,
|
|
"efficiency_percent": 99.63
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 1302.77,
|
|
"min_batch_throughput": 1302.77,
|
|
"max_batch_throughput": 1302.77
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 2500,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 1,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 3053
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 16.78,
|
|
"std": 0.07,
|
|
"min": 16.507,
|
|
"max": 16.953,
|
|
"p50": 16.778,
|
|
"p95": 16.934,
|
|
"p99": 16.95,
|
|
"ci_95_lower": 16.763,
|
|
"ci_95_upper": 16.797
|
|
},
|
|
"ttft": {
|
|
"mean": 6.451,
|
|
"std": 2.606,
|
|
"p50": 5.536,
|
|
"p90": 10.157
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1931.74,
|
|
"concurrent_content_tps": 1931.74,
|
|
"requests_per_second": 3.77,
|
|
"actual_wall_time": 16.963,
|
|
"efficiency_percent": 98.92
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 64.0,
|
|
"avg_batch_throughput": 1931.74,
|
|
"min_batch_throughput": 1931.74,
|
|
"max_batch_throughput": 1931.74
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 5000,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 1,
|
|
"total_requests": 1,
|
|
"actual_input_tokens": 6024
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 1,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 3.725,
|
|
"std": 0.0,
|
|
"min": 3.725,
|
|
"max": 3.725,
|
|
"p50": 3.725,
|
|
"p95": 3.725,
|
|
"p99": 3.725,
|
|
"ci_95_lower": 3.725,
|
|
"ci_95_upper": 3.725
|
|
},
|
|
"ttft": {
|
|
"mean": 1.855,
|
|
"std": 0.0,
|
|
"p50": 1.855,
|
|
"p90": 1.855
|
|
},
|
|
"tokens": {
|
|
"total_generated": 512,
|
|
"content_tokens": 512,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 137.46,
|
|
"concurrent_content_tps": 137.46,
|
|
"requests_per_second": 0.27,
|
|
"actual_wall_time": 3.725,
|
|
"efficiency_percent": 100.0
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 137.46,
|
|
"min_batch_throughput": 137.46,
|
|
"max_batch_throughput": 137.46
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 5000,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 1,
|
|
"total_requests": 8,
|
|
"actual_input_tokens": 6024
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 8,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 7.418,
|
|
"std": 0.058,
|
|
"min": 7.269,
|
|
"max": 7.448,
|
|
"p50": 7.444,
|
|
"p95": 7.447,
|
|
"p99": 7.448,
|
|
"ci_95_lower": 7.378,
|
|
"ci_95_upper": 7.458
|
|
},
|
|
"ttft": {
|
|
"mean": 3.301,
|
|
"std": 1.58,
|
|
"p50": 2.914,
|
|
"p90": 4.562
|
|
},
|
|
"tokens": {
|
|
"total_generated": 4096,
|
|
"content_tokens": 4096,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 549.89,
|
|
"concurrent_content_tps": 549.89,
|
|
"requests_per_second": 1.07,
|
|
"actual_wall_time": 7.449,
|
|
"efficiency_percent": 99.58
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 549.89,
|
|
"min_batch_throughput": 549.89,
|
|
"max_batch_throughput": 549.89
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 5000,
|
|
"output_tokens": 512,
|
|
"batch_size": 16,
|
|
"num_batches": 1,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 6024
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 9.992,
|
|
"std": 0.024,
|
|
"min": 9.937,
|
|
"max": 10.019,
|
|
"p50": 10.001,
|
|
"p95": 10.016,
|
|
"p99": 10.019,
|
|
"ci_95_lower": 9.98,
|
|
"ci_95_upper": 10.003
|
|
},
|
|
"ttft": {
|
|
"mean": 3.948,
|
|
"std": 1.636,
|
|
"p50": 3.491,
|
|
"p90": 5.599
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 817.4,
|
|
"concurrent_content_tps": 817.4,
|
|
"requests_per_second": 1.6,
|
|
"actual_wall_time": 10.022,
|
|
"efficiency_percent": 99.7
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 16.0,
|
|
"avg_batch_throughput": 817.4,
|
|
"min_batch_throughput": 817.4,
|
|
"max_batch_throughput": 817.4
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 5000,
|
|
"output_tokens": 512,
|
|
"batch_size": 24,
|
|
"num_batches": 1,
|
|
"total_requests": 24,
|
|
"actual_input_tokens": 6024
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 24,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 12.189,
|
|
"std": 0.038,
|
|
"min": 12.013,
|
|
"max": 12.21,
|
|
"p50": 12.197,
|
|
"p95": 12.209,
|
|
"p99": 12.21,
|
|
"ci_95_lower": 12.174,
|
|
"ci_95_upper": 12.204
|
|
},
|
|
"ttft": {
|
|
"mean": 4.238,
|
|
"std": 1.059,
|
|
"p50": 3.938,
|
|
"p90": 5.769
|
|
},
|
|
"tokens": {
|
|
"total_generated": 12288,
|
|
"content_tokens": 12288,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1005.93,
|
|
"concurrent_content_tps": 1005.93,
|
|
"requests_per_second": 1.96,
|
|
"actual_wall_time": 12.216,
|
|
"efficiency_percent": 99.78
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 24.0,
|
|
"avg_batch_throughput": 1005.93,
|
|
"min_batch_throughput": 1005.93,
|
|
"max_batch_throughput": 1005.93
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 5000,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 1,
|
|
"total_requests": 32,
|
|
"actual_input_tokens": 6024
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 32,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 13.535,
|
|
"std": 0.07,
|
|
"min": 13.146,
|
|
"max": 13.563,
|
|
"p50": 13.546,
|
|
"p95": 13.56,
|
|
"p99": 13.563,
|
|
"ci_95_lower": 13.511,
|
|
"ci_95_upper": 13.559
|
|
},
|
|
"ttft": {
|
|
"mean": 4.996,
|
|
"std": 1.647,
|
|
"p50": 4.524,
|
|
"p90": 6.854
|
|
},
|
|
"tokens": {
|
|
"total_generated": 16384,
|
|
"content_tokens": 16384,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1207.9,
|
|
"concurrent_content_tps": 1207.9,
|
|
"requests_per_second": 2.36,
|
|
"actual_wall_time": 13.564,
|
|
"efficiency_percent": 99.78
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 1207.9,
|
|
"min_batch_throughput": 1207.9,
|
|
"max_batch_throughput": 1207.9
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 5000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 1,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 6024
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 18.236,
|
|
"std": 0.071,
|
|
"min": 17.676,
|
|
"max": 18.258,
|
|
"p50": 18.245,
|
|
"p95": 18.257,
|
|
"p99": 18.258,
|
|
"ci_95_lower": 18.218,
|
|
"ci_95_upper": 18.253
|
|
},
|
|
"ttft": {
|
|
"mean": 6.521,
|
|
"std": 2.744,
|
|
"p50": 5.802,
|
|
"p90": 8.623
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1793.73,
|
|
"concurrent_content_tps": 1793.73,
|
|
"requests_per_second": 3.5,
|
|
"actual_wall_time": 18.268,
|
|
"efficiency_percent": 99.82
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 64.0,
|
|
"avg_batch_throughput": 1793.73,
|
|
"min_batch_throughput": 1793.73,
|
|
"max_batch_throughput": 1793.73
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 9000,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 1,
|
|
"total_requests": 1,
|
|
"actual_input_tokens": 10777
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 1,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 4.17,
|
|
"std": 0.0,
|
|
"min": 4.17,
|
|
"max": 4.17,
|
|
"p50": 4.17,
|
|
"p95": 4.17,
|
|
"p99": 4.17,
|
|
"ci_95_lower": 4.17,
|
|
"ci_95_upper": 4.17
|
|
},
|
|
"ttft": {
|
|
"mean": 1.79,
|
|
"std": 0.0,
|
|
"p50": 1.79,
|
|
"p90": 1.79
|
|
},
|
|
"tokens": {
|
|
"total_generated": 512,
|
|
"content_tokens": 512,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 122.79,
|
|
"concurrent_content_tps": 122.79,
|
|
"requests_per_second": 0.24,
|
|
"actual_wall_time": 4.17,
|
|
"efficiency_percent": 100.0
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 122.79,
|
|
"min_batch_throughput": 122.79,
|
|
"max_batch_throughput": 122.79
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 9000,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 1,
|
|
"total_requests": 8,
|
|
"actual_input_tokens": 10777
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 8,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 7.837,
|
|
"std": 0.011,
|
|
"min": 7.808,
|
|
"max": 7.846,
|
|
"p50": 7.84,
|
|
"p95": 7.845,
|
|
"p99": 7.846,
|
|
"ci_95_lower": 7.829,
|
|
"ci_95_upper": 7.845
|
|
},
|
|
"ttft": {
|
|
"mean": 2.73,
|
|
"std": 0.413,
|
|
"p50": 2.727,
|
|
"p90": 3.176
|
|
},
|
|
"tokens": {
|
|
"total_generated": 4096,
|
|
"content_tokens": 4096,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 521.94,
|
|
"concurrent_content_tps": 521.94,
|
|
"requests_per_second": 1.02,
|
|
"actual_wall_time": 7.848,
|
|
"efficiency_percent": 99.86
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 521.94,
|
|
"min_batch_throughput": 521.94,
|
|
"max_batch_throughput": 521.94
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 9000,
|
|
"output_tokens": 512,
|
|
"batch_size": 16,
|
|
"num_batches": 1,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 10777
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 10.825,
|
|
"std": 0.051,
|
|
"min": 10.645,
|
|
"max": 10.858,
|
|
"p50": 10.843,
|
|
"p95": 10.856,
|
|
"p99": 10.858,
|
|
"ci_95_lower": 10.8,
|
|
"ci_95_upper": 10.85
|
|
},
|
|
"ttft": {
|
|
"mean": 3.809,
|
|
"std": 0.481,
|
|
"p50": 3.923,
|
|
"p90": 4.335
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 754.44,
|
|
"concurrent_content_tps": 754.44,
|
|
"requests_per_second": 1.47,
|
|
"actual_wall_time": 10.858,
|
|
"efficiency_percent": 99.69
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 16.0,
|
|
"avg_batch_throughput": 754.44,
|
|
"min_batch_throughput": 754.44,
|
|
"max_batch_throughput": 754.44
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 9000,
|
|
"output_tokens": 512,
|
|
"batch_size": 24,
|
|
"num_batches": 1,
|
|
"total_requests": 24,
|
|
"actual_input_tokens": 10777
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 24,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 13.617,
|
|
"std": 0.082,
|
|
"min": 13.31,
|
|
"max": 13.728,
|
|
"p50": 13.61,
|
|
"p95": 13.726,
|
|
"p99": 13.727,
|
|
"ci_95_lower": 13.585,
|
|
"ci_95_upper": 13.65
|
|
},
|
|
"ttft": {
|
|
"mean": 5.393,
|
|
"std": 2.261,
|
|
"p50": 4.893,
|
|
"p90": 8.595
|
|
},
|
|
"tokens": {
|
|
"total_generated": 12288,
|
|
"content_tokens": 12288,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 895.03,
|
|
"concurrent_content_tps": 895.03,
|
|
"requests_per_second": 1.75,
|
|
"actual_wall_time": 13.729,
|
|
"efficiency_percent": 99.18
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 24.0,
|
|
"avg_batch_throughput": 895.03,
|
|
"min_batch_throughput": 895.03,
|
|
"max_batch_throughput": 895.03
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 9000,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 1,
|
|
"total_requests": 32,
|
|
"actual_input_tokens": 10777
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 32,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 15.071,
|
|
"std": 0.058,
|
|
"min": 14.788,
|
|
"max": 15.128,
|
|
"p50": 15.075,
|
|
"p95": 15.121,
|
|
"p99": 15.127,
|
|
"ci_95_lower": 15.051,
|
|
"ci_95_upper": 15.091
|
|
},
|
|
"ttft": {
|
|
"mean": 6.012,
|
|
"std": 2.11,
|
|
"p50": 5.568,
|
|
"p90": 9.381
|
|
},
|
|
"tokens": {
|
|
"total_generated": 16384,
|
|
"content_tokens": 16384,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1082.91,
|
|
"concurrent_content_tps": 1082.91,
|
|
"requests_per_second": 2.12,
|
|
"actual_wall_time": 15.13,
|
|
"efficiency_percent": 99.61
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 1082.91,
|
|
"min_batch_throughput": 1082.91,
|
|
"max_batch_throughput": 1082.91
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 9000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 1,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 10777
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 20.583,
|
|
"std": 0.133,
|
|
"min": 19.9,
|
|
"max": 20.765,
|
|
"p50": 20.581,
|
|
"p95": 20.731,
|
|
"p99": 20.762,
|
|
"ci_95_lower": 20.55,
|
|
"ci_95_upper": 20.615
|
|
},
|
|
"ttft": {
|
|
"mean": 7.616,
|
|
"std": 2.21,
|
|
"p50": 7.112,
|
|
"p90": 9.38
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1575.94,
|
|
"concurrent_content_tps": 1575.94,
|
|
"requests_per_second": 3.08,
|
|
"actual_wall_time": 20.793,
|
|
"efficiency_percent": 98.99
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 64.0,
|
|
"avg_batch_throughput": 1575.94,
|
|
"min_batch_throughput": 1575.94,
|
|
"max_batch_throughput": 1575.94
|
|
}
|
|
}
|
|
]
|
|
} |