{ "timestamp": "2026-03-11T11:10:08.245541", "model_name": "QuantTrio/Qwen3.5-35B-A3B-AWQ", "results": [ { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 1, "num_batches": 2, "total_requests": 2, "actual_input_tokens": 1140 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 2, "failed_requests": 0 }, "latency": { "mean": 9.155, "std": 5.968, "min": 3.187, "max": 15.123, "p50": 9.155, "p95": 14.526, "p99": 15.003, "ci_95_lower": 0.884, "ci_95_upper": 17.426 }, "ttft": { "mean": 9.155, "std": 5.968, "p50": 9.155, "p90": 13.929 }, "tokens": { "total_generated": 1024, "content_tokens": 1024, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 55.62, "concurrent_content_tps": 55.62, "requests_per_second": 0.11, "actual_wall_time": 18.412, "efficiency_percent": 57.18 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 1.0, "avg_batch_throughput": 97.26, "min_batch_throughput": 33.86, "max_batch_throughput": 160.67 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 8, "num_batches": 2, "total_requests": 16, "actual_input_tokens": 1003 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 8.081, "std": 2.287, "min": 5.772, "max": 10.373, "p50": 8.085, "p95": 10.372, "p99": 10.373, "ci_95_lower": 6.961, "ci_95_upper": 9.202 }, "ttft": { "mean": 8.081, "std": 2.287, "p50": 8.085, "p90": 10.37 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 503.04, "concurrent_content_tps": 503.04, "requests_per_second": 0.98, "actual_wall_time": 16.285, "efficiency_percent": 91.31 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 8.0, "avg_batch_throughput": 549.93, "min_batch_throughput": 394.83, "max_batch_throughput": 705.03 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 32, "num_batches": 2, "total_requests": 64, "actual_input_tokens": 1028 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 8.686, "std": 0.017, "min": 8.636, "max": 8.732, "p50": 8.688, "p95": 8.71, "p99": 8.721, "ci_95_lower": 8.682, "ci_95_upper": 8.691 }, "ttft": { "mean": 8.595, "std": 0.727, "p50": 8.687, "p90": 8.707 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1865.45, "concurrent_content_tps": 1865.45, "requests_per_second": 3.64, "actual_wall_time": 17.566, "efficiency_percent": 98.9 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 32.0, "avg_batch_throughput": 1876.54, "min_batch_throughput": 1870.97, "max_batch_throughput": 1882.11 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 64, "num_batches": 2, "total_requests": 128, "actual_input_tokens": 1028 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 128, "failed_requests": 0 }, "latency": { "mean": 12.207, "std": 0.04, "min": 12.108, "max": 12.283, "p50": 12.211, "p95": 12.263, "p99": 12.273, "ci_95_lower": 12.2, "ci_95_upper": 12.214 }, "ttft": { "mean": 12.044, "std": 1.066, "p50": 12.205, "p90": 12.257 }, "tokens": { "total_generated": 65536, "content_tokens": 65536, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 2654.48, "concurrent_content_tps": 2654.48, "requests_per_second": 5.18, "actual_wall_time": 24.689, "efficiency_percent": 98.89 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 64.0, "avg_batch_throughput": 2665.65, "min_batch_throughput": 2658.45, "max_batch_throughput": 2672.85 } }, { "config": { "input_tokens": 10000, "output_tokens": 512, "batch_size": 1, "num_batches": 2, "total_requests": 2, "actual_input_tokens": 8871 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 2, "failed_requests": 0 }, "latency": { "mean": 3.533, "std": 0.026, "min": 3.507, "max": 3.559, "p50": 3.533, "p95": 3.557, "p99": 3.559, "ci_95_lower": 3.497, "ci_95_upper": 3.569 }, "ttft": { "mean": 3.533, "std": 0.026, "p50": 3.533, "p90": 3.554 }, "tokens": { "total_generated": 1024, "content_tokens": 1024, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 142.85, "concurrent_content_tps": 142.85, "requests_per_second": 0.28, "actual_wall_time": 7.168, "efficiency_percent": 98.57 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 1.0, "avg_batch_throughput": 144.92, "min_batch_throughput": 143.85, "max_batch_throughput": 145.99 } }, { "config": { "input_tokens": 10000, "output_tokens": 512, "batch_size": 8, "num_batches": 2, "total_requests": 16, "actual_input_tokens": 8895 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 7.325, "std": 0.144, "min": 7.142, "max": 7.493, "p50": 7.333, "p95": 7.489, "p99": 7.492, "ci_95_lower": 7.254, "ci_95_upper": 7.395 }, "ttft": { "mean": 7.325, "std": 0.144, "p50": 7.333, "p90": 7.487 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 550.76, "concurrent_content_tps": 550.76, "requests_per_second": 1.08, "actual_wall_time": 14.874, "efficiency_percent": 98.45 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 8.0, "avg_batch_throughput": 554.82, "min_batch_throughput": 543.43, "max_batch_throughput": 566.21 } }, { "config": { "input_tokens": 10000, "output_tokens": 512, "batch_size": 32, "num_batches": 2, "total_requests": 64, "actual_input_tokens": 8842 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 16.085, "std": 2.082, "min": 13.822, "max": 18.383, "p50": 16.109, "p95": 18.273, "p99": 18.329, "ci_95_lower": 15.575, "ci_95_upper": 16.595 }, "ttft": { "mean": 15.996, "std": 2.114, "p50": 14.22, "p90": 18.248 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 995.46, "concurrent_content_tps": 995.46, "requests_per_second": 1.94, "actual_wall_time": 32.917, "efficiency_percent": 96.09 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 32.0, "avg_batch_throughput": 1015.38, "min_batch_throughput": 885.0, "max_batch_throughput": 1145.76 } }, { "config": { "input_tokens": 10000, "output_tokens": 512, "batch_size": 64, "num_batches": 2, "total_requests": 128, "actual_input_tokens": 8842 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 128, "failed_requests": 0 }, "latency": { "mean": 14.781, "std": 0.143, "min": 14.277, "max": 15.099, "p50": 14.781, "p95": 15.032, "p99": 15.096, "ci_95_lower": 14.756, "ci_95_upper": 14.806 }, "ttft": { "mean": 14.781, "std": 0.143, "p50": 14.781, "p90": 14.972 }, "tokens": { "total_generated": 65536, "content_tokens": 65536, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 2166.53, "concurrent_content_tps": 2166.53, "requests_per_second": 4.23, "actual_wall_time": 30.249, "efficiency_percent": 97.72 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 64.0, "avg_batch_throughput": 2174.01, "min_batch_throughput": 2164.24, "max_batch_throughput": 2183.78 } }, { "config": { "input_tokens": 50000, "output_tokens": 512, "batch_size": 1, "num_batches": 2, "total_requests": 2, "actual_input_tokens": 42229 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 2, "failed_requests": 0 }, "latency": { "mean": 6.101, "std": 0.019, "min": 6.082, "max": 6.12, "p50": 6.101, "p95": 6.118, "p99": 6.12, "ci_95_lower": 6.074, "ci_95_upper": 6.128 }, "ttft": { "mean": 6.101, "std": 0.019, "p50": 6.101, "p90": 6.117 }, "tokens": { "total_generated": 1024, "content_tokens": 1024, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 83.22, "concurrent_content_tps": 83.22, "requests_per_second": 0.16, "actual_wall_time": 12.305, "efficiency_percent": 99.16 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 1.0, "avg_batch_throughput": 83.92, "min_batch_throughput": 83.66, "max_batch_throughput": 84.19 } }, { "config": { "input_tokens": 50000, "output_tokens": 512, "batch_size": 8, "num_batches": 2, "total_requests": 16, "actual_input_tokens": 42048 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 22.685, "std": 2.474, "min": 20.003, "max": 25.463, "p50": 22.588, "p95": 25.387, "p99": 25.448, "ci_95_lower": 21.473, "ci_95_upper": 23.897 }, "ttft": { "mean": 22.685, "std": 2.474, "p50": 22.588, "p90": 25.295 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 177.76, "concurrent_content_tps": 177.76, "requests_per_second": 0.35, "actual_wall_time": 46.085, "efficiency_percent": 97.28 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 8.0, "avg_batch_throughput": 180.32, "min_batch_throughput": 160.6, "max_batch_throughput": 200.04 } }, { "config": { "input_tokens": 50000, "output_tokens": 512, "batch_size": 32, "num_batches": 2, "total_requests": 64, "actual_input_tokens": 41752 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 70.626, "std": 18.722, "min": 48.439, "max": 90.756, "p50": 70.358, "p95": 90.447, "p99": 90.677, "ci_95_lower": 66.039, "ci_95_upper": 75.213 }, "ttft": { "mean": 70.626, "std": 18.722, "p50": 70.358, "p90": 90.064 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 225.4, "concurrent_content_tps": 225.4, "requests_per_second": 0.44, "actual_wall_time": 145.377, "efficiency_percent": 90.31 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 32.0, "avg_batch_throughput": 241.37, "min_batch_throughput": 179.6, "max_batch_throughput": 303.14 } }, { "config": { "input_tokens": 50000, "output_tokens": 512, "batch_size": 64, "num_batches": 2, "total_requests": 128, "actual_input_tokens": 41810 }, "success_metrics": { "success_rate": 63.28, "successful_requests": 81, "failed_requests": 47 }, "latency": { "mean": 111.228, "std": 2.973, "min": 106.149, "max": 115.385, "p50": 112.37, "p95": 114.998, "p99": 115.289, "ci_95_lower": 110.581, "ci_95_upper": 111.876 }, "ttft": { "mean": 111.228, "std": 2.973, "p50": 112.37, "p90": 114.818 }, "tokens": { "total_generated": 41472, "content_tokens": 41472, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 182.43, "concurrent_content_tps": 182.43, "requests_per_second": 0.36, "actual_wall_time": 227.333, "efficiency_percent": 61.88 }, "batch_metrics": { "num_batches": 2, "avg_batch_size": 40.5, "avg_batch_throughput": 181.97, "min_batch_throughput": 162.11, "max_batch_throughput": 201.84 } } ] }