630 lines
15 KiB
JSON
630 lines
15 KiB
JSON
{
|
|
"timestamp": "2026-03-11T11:10:08.245541",
|
|
"model_name": "QuantTrio/Qwen3.5-35B-A3B-AWQ",
|
|
"results": [
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 2,
|
|
"total_requests": 2,
|
|
"actual_input_tokens": 1140
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 2,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 9.155,
|
|
"std": 5.968,
|
|
"min": 3.187,
|
|
"max": 15.123,
|
|
"p50": 9.155,
|
|
"p95": 14.526,
|
|
"p99": 15.003,
|
|
"ci_95_lower": 0.884,
|
|
"ci_95_upper": 17.426
|
|
},
|
|
"ttft": {
|
|
"mean": 9.155,
|
|
"std": 5.968,
|
|
"p50": 9.155,
|
|
"p90": 13.929
|
|
},
|
|
"tokens": {
|
|
"total_generated": 1024,
|
|
"content_tokens": 1024,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 55.62,
|
|
"concurrent_content_tps": 55.62,
|
|
"requests_per_second": 0.11,
|
|
"actual_wall_time": 18.412,
|
|
"efficiency_percent": 57.18
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 97.26,
|
|
"min_batch_throughput": 33.86,
|
|
"max_batch_throughput": 160.67
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 2,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 1003
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 8.081,
|
|
"std": 2.287,
|
|
"min": 5.772,
|
|
"max": 10.373,
|
|
"p50": 8.085,
|
|
"p95": 10.372,
|
|
"p99": 10.373,
|
|
"ci_95_lower": 6.961,
|
|
"ci_95_upper": 9.202
|
|
},
|
|
"ttft": {
|
|
"mean": 8.081,
|
|
"std": 2.287,
|
|
"p50": 8.085,
|
|
"p90": 10.37
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 503.04,
|
|
"concurrent_content_tps": 503.04,
|
|
"requests_per_second": 0.98,
|
|
"actual_wall_time": 16.285,
|
|
"efficiency_percent": 91.31
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 549.93,
|
|
"min_batch_throughput": 394.83,
|
|
"max_batch_throughput": 705.03
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 2,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 1028
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 8.686,
|
|
"std": 0.017,
|
|
"min": 8.636,
|
|
"max": 8.732,
|
|
"p50": 8.688,
|
|
"p95": 8.71,
|
|
"p99": 8.721,
|
|
"ci_95_lower": 8.682,
|
|
"ci_95_upper": 8.691
|
|
},
|
|
"ttft": {
|
|
"mean": 8.595,
|
|
"std": 0.727,
|
|
"p50": 8.687,
|
|
"p90": 8.707
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 1865.45,
|
|
"concurrent_content_tps": 1865.45,
|
|
"requests_per_second": 3.64,
|
|
"actual_wall_time": 17.566,
|
|
"efficiency_percent": 98.9
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 1876.54,
|
|
"min_batch_throughput": 1870.97,
|
|
"max_batch_throughput": 1882.11
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 1000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 2,
|
|
"total_requests": 128,
|
|
"actual_input_tokens": 1028
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 128,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 12.207,
|
|
"std": 0.04,
|
|
"min": 12.108,
|
|
"max": 12.283,
|
|
"p50": 12.211,
|
|
"p95": 12.263,
|
|
"p99": 12.273,
|
|
"ci_95_lower": 12.2,
|
|
"ci_95_upper": 12.214
|
|
},
|
|
"ttft": {
|
|
"mean": 12.044,
|
|
"std": 1.066,
|
|
"p50": 12.205,
|
|
"p90": 12.257
|
|
},
|
|
"tokens": {
|
|
"total_generated": 65536,
|
|
"content_tokens": 65536,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 2654.48,
|
|
"concurrent_content_tps": 2654.48,
|
|
"requests_per_second": 5.18,
|
|
"actual_wall_time": 24.689,
|
|
"efficiency_percent": 98.89
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 64.0,
|
|
"avg_batch_throughput": 2665.65,
|
|
"min_batch_throughput": 2658.45,
|
|
"max_batch_throughput": 2672.85
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 10000,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 2,
|
|
"total_requests": 2,
|
|
"actual_input_tokens": 8871
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 2,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 3.533,
|
|
"std": 0.026,
|
|
"min": 3.507,
|
|
"max": 3.559,
|
|
"p50": 3.533,
|
|
"p95": 3.557,
|
|
"p99": 3.559,
|
|
"ci_95_lower": 3.497,
|
|
"ci_95_upper": 3.569
|
|
},
|
|
"ttft": {
|
|
"mean": 3.533,
|
|
"std": 0.026,
|
|
"p50": 3.533,
|
|
"p90": 3.554
|
|
},
|
|
"tokens": {
|
|
"total_generated": 1024,
|
|
"content_tokens": 1024,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 142.85,
|
|
"concurrent_content_tps": 142.85,
|
|
"requests_per_second": 0.28,
|
|
"actual_wall_time": 7.168,
|
|
"efficiency_percent": 98.57
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 144.92,
|
|
"min_batch_throughput": 143.85,
|
|
"max_batch_throughput": 145.99
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 10000,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 2,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 8895
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 7.325,
|
|
"std": 0.144,
|
|
"min": 7.142,
|
|
"max": 7.493,
|
|
"p50": 7.333,
|
|
"p95": 7.489,
|
|
"p99": 7.492,
|
|
"ci_95_lower": 7.254,
|
|
"ci_95_upper": 7.395
|
|
},
|
|
"ttft": {
|
|
"mean": 7.325,
|
|
"std": 0.144,
|
|
"p50": 7.333,
|
|
"p90": 7.487
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 550.76,
|
|
"concurrent_content_tps": 550.76,
|
|
"requests_per_second": 1.08,
|
|
"actual_wall_time": 14.874,
|
|
"efficiency_percent": 98.45
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 554.82,
|
|
"min_batch_throughput": 543.43,
|
|
"max_batch_throughput": 566.21
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 10000,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 2,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 8842
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 16.085,
|
|
"std": 2.082,
|
|
"min": 13.822,
|
|
"max": 18.383,
|
|
"p50": 16.109,
|
|
"p95": 18.273,
|
|
"p99": 18.329,
|
|
"ci_95_lower": 15.575,
|
|
"ci_95_upper": 16.595
|
|
},
|
|
"ttft": {
|
|
"mean": 15.996,
|
|
"std": 2.114,
|
|
"p50": 14.22,
|
|
"p90": 18.248
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 995.46,
|
|
"concurrent_content_tps": 995.46,
|
|
"requests_per_second": 1.94,
|
|
"actual_wall_time": 32.917,
|
|
"efficiency_percent": 96.09
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 1015.38,
|
|
"min_batch_throughput": 885.0,
|
|
"max_batch_throughput": 1145.76
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 10000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 2,
|
|
"total_requests": 128,
|
|
"actual_input_tokens": 8842
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 128,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 14.781,
|
|
"std": 0.143,
|
|
"min": 14.277,
|
|
"max": 15.099,
|
|
"p50": 14.781,
|
|
"p95": 15.032,
|
|
"p99": 15.096,
|
|
"ci_95_lower": 14.756,
|
|
"ci_95_upper": 14.806
|
|
},
|
|
"ttft": {
|
|
"mean": 14.781,
|
|
"std": 0.143,
|
|
"p50": 14.781,
|
|
"p90": 14.972
|
|
},
|
|
"tokens": {
|
|
"total_generated": 65536,
|
|
"content_tokens": 65536,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 2166.53,
|
|
"concurrent_content_tps": 2166.53,
|
|
"requests_per_second": 4.23,
|
|
"actual_wall_time": 30.249,
|
|
"efficiency_percent": 97.72
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 64.0,
|
|
"avg_batch_throughput": 2174.01,
|
|
"min_batch_throughput": 2164.24,
|
|
"max_batch_throughput": 2183.78
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 50000,
|
|
"output_tokens": 512,
|
|
"batch_size": 1,
|
|
"num_batches": 2,
|
|
"total_requests": 2,
|
|
"actual_input_tokens": 42229
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 2,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 6.101,
|
|
"std": 0.019,
|
|
"min": 6.082,
|
|
"max": 6.12,
|
|
"p50": 6.101,
|
|
"p95": 6.118,
|
|
"p99": 6.12,
|
|
"ci_95_lower": 6.074,
|
|
"ci_95_upper": 6.128
|
|
},
|
|
"ttft": {
|
|
"mean": 6.101,
|
|
"std": 0.019,
|
|
"p50": 6.101,
|
|
"p90": 6.117
|
|
},
|
|
"tokens": {
|
|
"total_generated": 1024,
|
|
"content_tokens": 1024,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 83.22,
|
|
"concurrent_content_tps": 83.22,
|
|
"requests_per_second": 0.16,
|
|
"actual_wall_time": 12.305,
|
|
"efficiency_percent": 99.16
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 1.0,
|
|
"avg_batch_throughput": 83.92,
|
|
"min_batch_throughput": 83.66,
|
|
"max_batch_throughput": 84.19
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 50000,
|
|
"output_tokens": 512,
|
|
"batch_size": 8,
|
|
"num_batches": 2,
|
|
"total_requests": 16,
|
|
"actual_input_tokens": 42048
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 16,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 22.685,
|
|
"std": 2.474,
|
|
"min": 20.003,
|
|
"max": 25.463,
|
|
"p50": 22.588,
|
|
"p95": 25.387,
|
|
"p99": 25.448,
|
|
"ci_95_lower": 21.473,
|
|
"ci_95_upper": 23.897
|
|
},
|
|
"ttft": {
|
|
"mean": 22.685,
|
|
"std": 2.474,
|
|
"p50": 22.588,
|
|
"p90": 25.295
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8192,
|
|
"content_tokens": 8192,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 177.76,
|
|
"concurrent_content_tps": 177.76,
|
|
"requests_per_second": 0.35,
|
|
"actual_wall_time": 46.085,
|
|
"efficiency_percent": 97.28
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 8.0,
|
|
"avg_batch_throughput": 180.32,
|
|
"min_batch_throughput": 160.6,
|
|
"max_batch_throughput": 200.04
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 50000,
|
|
"output_tokens": 512,
|
|
"batch_size": 32,
|
|
"num_batches": 2,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 41752
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 100.0,
|
|
"successful_requests": 64,
|
|
"failed_requests": 0
|
|
},
|
|
"latency": {
|
|
"mean": 70.626,
|
|
"std": 18.722,
|
|
"min": 48.439,
|
|
"max": 90.756,
|
|
"p50": 70.358,
|
|
"p95": 90.447,
|
|
"p99": 90.677,
|
|
"ci_95_lower": 66.039,
|
|
"ci_95_upper": 75.213
|
|
},
|
|
"ttft": {
|
|
"mean": 70.626,
|
|
"std": 18.722,
|
|
"p50": 70.358,
|
|
"p90": 90.064
|
|
},
|
|
"tokens": {
|
|
"total_generated": 32768,
|
|
"content_tokens": 32768,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 225.4,
|
|
"concurrent_content_tps": 225.4,
|
|
"requests_per_second": 0.44,
|
|
"actual_wall_time": 145.377,
|
|
"efficiency_percent": 90.31
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 32.0,
|
|
"avg_batch_throughput": 241.37,
|
|
"min_batch_throughput": 179.6,
|
|
"max_batch_throughput": 303.14
|
|
}
|
|
},
|
|
{
|
|
"config": {
|
|
"input_tokens": 50000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 2,
|
|
"total_requests": 128,
|
|
"actual_input_tokens": 41810
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 63.28,
|
|
"successful_requests": 81,
|
|
"failed_requests": 47
|
|
},
|
|
"latency": {
|
|
"mean": 111.228,
|
|
"std": 2.973,
|
|
"min": 106.149,
|
|
"max": 115.385,
|
|
"p50": 112.37,
|
|
"p95": 114.998,
|
|
"p99": 115.289,
|
|
"ci_95_lower": 110.581,
|
|
"ci_95_upper": 111.876
|
|
},
|
|
"ttft": {
|
|
"mean": 111.228,
|
|
"std": 2.973,
|
|
"p50": 112.37,
|
|
"p90": 114.818
|
|
},
|
|
"tokens": {
|
|
"total_generated": 41472,
|
|
"content_tokens": 41472,
|
|
"reasoning_tokens": 0,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 182.43,
|
|
"concurrent_content_tps": 182.43,
|
|
"requests_per_second": 0.36,
|
|
"actual_wall_time": 227.333,
|
|
"efficiency_percent": 61.88
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 2,
|
|
"avg_batch_size": 40.5,
|
|
"avg_batch_throughput": 181.97,
|
|
"min_batch_throughput": 162.11,
|
|
"max_batch_throughput": 201.84
|
|
}
|
|
}
|
|
]
|
|
} |