{ "timestamp": "2026-01-22T14:46:31.276437", "model_name": "openai/gpt-oss-120b", "results": [ { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 1, "num_batches": 1, "total_requests": 1, "actual_input_tokens": 1268 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 1, "failed_requests": 0 }, "latency": { "mean": 3.25, "std": 0.0, "min": 3.25, "max": 3.25, "p50": 3.25, "p95": 3.25, "p99": 3.25, "ci_95_lower": 3.25, "ci_95_upper": 3.25 }, "ttft": { "mean": 1.228, "std": 0.0, "p50": 1.228, "p90": 1.228 }, "tokens": { "total_generated": 512, "content_tokens": 512, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 157.56, "concurrent_content_tps": 157.56, "requests_per_second": 0.31, "actual_wall_time": 3.25, "efficiency_percent": 100.0 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 1.0, "avg_batch_throughput": 157.56, "min_batch_throughput": 157.56, "max_batch_throughput": 157.56 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 8, "num_batches": 1, "total_requests": 8, "actual_input_tokens": 1268 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 8, "failed_requests": 0 }, "latency": { "mean": 6.513, "std": 0.018, "min": 6.467, "max": 6.524, "p50": 6.52, "p95": 6.523, "p99": 6.524, "ci_95_lower": 6.501, "ci_95_upper": 6.525 }, "ttft": { "mean": 3.362, "std": 1.481, "p50": 2.592, "p90": 5.281 }, "tokens": { "total_generated": 4096, "content_tokens": 4096, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 627.74, "concurrent_content_tps": 627.74, "requests_per_second": 1.23, "actual_wall_time": 6.525, "efficiency_percent": 99.81 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 8.0, "avg_batch_throughput": 627.74, "min_batch_throughput": 627.74, "max_batch_throughput": 627.74 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 16, "num_batches": 1, "total_requests": 16, "actual_input_tokens": 1268 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 8.828, "std": 0.013, "min": 8.798, "max": 8.844, "p50": 8.825, "p95": 8.842, "p99": 8.844, "ci_95_lower": 8.821, "ci_95_upper": 8.834 }, "ttft": { "mean": 3.498, "std": 1.405, "p50": 2.913, "p90": 6.162 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 925.82, "concurrent_content_tps": 925.82, "requests_per_second": 1.81, "actual_wall_time": 8.848, "efficiency_percent": 99.77 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 16.0, "avg_batch_throughput": 925.82, "min_batch_throughput": 925.82, "max_batch_throughput": 925.82 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 24, "num_batches": 1, "total_requests": 24, "actual_input_tokens": 1268 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 24, "failed_requests": 0 }, "latency": { "mean": 10.38, "std": 0.031, "min": 10.299, "max": 10.414, "p50": 10.393, "p95": 10.404, "p99": 10.412, "ci_95_lower": 10.367, "ci_95_upper": 10.392 }, "ttft": { "mean": 4.489, "std": 2.111, "p50": 3.683, "p90": 8.168 }, "tokens": { "total_generated": 12288, "content_tokens": 12288, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1179.3, "concurrent_content_tps": 1179.3, "requests_per_second": 2.3, "actual_wall_time": 10.42, "efficiency_percent": 99.62 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 24.0, "avg_batch_throughput": 1179.3, "min_batch_throughput": 1179.3, "max_batch_throughput": 1179.3 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 32, "num_batches": 1, "total_requests": 32, "actual_input_tokens": 1268 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 32, "failed_requests": 0 }, "latency": { "mean": 11.608, "std": 0.043, "min": 11.464, "max": 11.642, "p50": 11.632, "p95": 11.64, "p99": 11.641, "ci_95_lower": 11.593, "ci_95_upper": 11.623 }, "ttft": { "mean": 4.908, "std": 2.125, "p50": 4.134, "p90": 8.842 }, "tokens": { "total_generated": 16384, "content_tokens": 16384, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1405.72, "concurrent_content_tps": 1405.72, "requests_per_second": 2.75, "actual_wall_time": 11.655, "efficiency_percent": 99.6 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 32.0, "avg_batch_throughput": 1405.72, "min_batch_throughput": 1405.72, "max_batch_throughput": 1405.72 } }, { "config": { "input_tokens": 1000, "output_tokens": 512, "batch_size": 64, "num_batches": 1, "total_requests": 64, "actual_input_tokens": 1268 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 15.496, "std": 0.096, "min": 15.255, "max": 15.789, "p50": 15.516, "p95": 15.597, "p99": 15.67, "ci_95_lower": 15.473, "ci_95_upper": 15.52 }, "ttft": { "mean": 5.896, "std": 2.086, "p50": 5.354, "p90": 8.428 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 2072.97, "concurrent_content_tps": 2072.97, "requests_per_second": 4.05, "actual_wall_time": 15.807, "efficiency_percent": 98.03 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 64.0, "avg_batch_throughput": 2072.97, "min_batch_throughput": 2072.97, "max_batch_throughput": 2072.97 } }, { "config": { "input_tokens": 2500, "output_tokens": 512, "batch_size": 1, "num_batches": 1, "total_requests": 1, "actual_input_tokens": 3053 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 1, "failed_requests": 0 }, "latency": { "mean": 3.44, "std": 0.0, "min": 3.44, "max": 3.44, "p50": 3.44, "p95": 3.44, "p99": 3.44, "ci_95_lower": 3.44, "ci_95_upper": 3.44 }, "ttft": { "mean": 1.375, "std": 0.0, "p50": 1.375, "p90": 1.375 }, "tokens": { "total_generated": 512, "content_tokens": 512, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 148.84, "concurrent_content_tps": 148.84, "requests_per_second": 0.29, "actual_wall_time": 3.44, "efficiency_percent": 100.0 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 1.0, "avg_batch_throughput": 148.84, "min_batch_throughput": 148.84, "max_batch_throughput": 148.84 } }, { "config": { "input_tokens": 2500, "output_tokens": 512, "batch_size": 8, "num_batches": 1, "total_requests": 8, "actual_input_tokens": 3053 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 8, "failed_requests": 0 }, "latency": { "mean": 6.921, "std": 0.006, "min": 6.905, "max": 6.927, "p50": 6.922, "p95": 6.927, "p99": 6.927, "ci_95_lower": 6.916, "ci_95_upper": 6.925 }, "ttft": { "mean": 2.777, "std": 0.805, "p50": 2.474, "p90": 3.941 }, "tokens": { "total_generated": 4096, "content_tokens": 4096, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 591.17, "concurrent_content_tps": 591.17, "requests_per_second": 1.15, "actual_wall_time": 6.929, "efficiency_percent": 99.89 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 8.0, "avg_batch_throughput": 591.17, "min_batch_throughput": 591.17, "max_batch_throughput": 591.17 } }, { "config": { "input_tokens": 2500, "output_tokens": 512, "batch_size": 16, "num_batches": 1, "total_requests": 16, "actual_input_tokens": 3053 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 9.419, "std": 0.01, "min": 9.404, "max": 9.431, "p50": 9.425, "p95": 9.43, "p99": 9.431, "ci_95_lower": 9.414, "ci_95_upper": 9.424 }, "ttft": { "mean": 3.433, "std": 0.733, "p50": 3.253, "p90": 4.342 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 868.43, "concurrent_content_tps": 868.43, "requests_per_second": 1.7, "actual_wall_time": 9.433, "efficiency_percent": 99.85 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 16.0, "avg_batch_throughput": 868.43, "min_batch_throughput": 868.43, "max_batch_throughput": 868.43 } }, { "config": { "input_tokens": 2500, "output_tokens": 512, "batch_size": 24, "num_batches": 1, "total_requests": 24, "actual_input_tokens": 3053 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 24, "failed_requests": 0 }, "latency": { "mean": 11.23, "std": 0.054, "min": 10.977, "max": 11.251, "p50": 11.245, "p95": 11.25, "p99": 11.25, "ci_95_lower": 11.208, "ci_95_upper": 11.252 }, "ttft": { "mean": 3.744, "std": 1.427, "p50": 3.333, "p90": 4.496 }, "tokens": { "total_generated": 12288, "content_tokens": 12288, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1091.4, "concurrent_content_tps": 1091.4, "requests_per_second": 2.13, "actual_wall_time": 11.259, "efficiency_percent": 99.74 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 24.0, "avg_batch_throughput": 1091.4, "min_batch_throughput": 1091.4, "max_batch_throughput": 1091.4 } }, { "config": { "input_tokens": 2500, "output_tokens": 512, "batch_size": 32, "num_batches": 1, "total_requests": 32, "actual_input_tokens": 3053 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 32, "failed_requests": 0 }, "latency": { "mean": 12.53, "std": 0.038, "min": 12.424, "max": 12.571, "p50": 12.546, "p95": 12.568, "p99": 12.57, "ci_95_lower": 12.517, "ci_95_upper": 12.544 }, "ttft": { "mean": 4.884, "std": 1.795, "p50": 4.274, "p90": 6.106 }, "tokens": { "total_generated": 16384, "content_tokens": 16384, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1302.77, "concurrent_content_tps": 1302.77, "requests_per_second": 2.54, "actual_wall_time": 12.576, "efficiency_percent": 99.63 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 32.0, "avg_batch_throughput": 1302.77, "min_batch_throughput": 1302.77, "max_batch_throughput": 1302.77 } }, { "config": { "input_tokens": 2500, "output_tokens": 512, "batch_size": 64, "num_batches": 1, "total_requests": 64, "actual_input_tokens": 3053 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 16.78, "std": 0.07, "min": 16.507, "max": 16.953, "p50": 16.778, "p95": 16.934, "p99": 16.95, "ci_95_lower": 16.763, "ci_95_upper": 16.797 }, "ttft": { "mean": 6.451, "std": 2.606, "p50": 5.536, "p90": 10.157 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1931.74, "concurrent_content_tps": 1931.74, "requests_per_second": 3.77, "actual_wall_time": 16.963, "efficiency_percent": 98.92 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 64.0, "avg_batch_throughput": 1931.74, "min_batch_throughput": 1931.74, "max_batch_throughput": 1931.74 } }, { "config": { "input_tokens": 5000, "output_tokens": 512, "batch_size": 1, "num_batches": 1, "total_requests": 1, "actual_input_tokens": 6024 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 1, "failed_requests": 0 }, "latency": { "mean": 3.725, "std": 0.0, "min": 3.725, "max": 3.725, "p50": 3.725, "p95": 3.725, "p99": 3.725, "ci_95_lower": 3.725, "ci_95_upper": 3.725 }, "ttft": { "mean": 1.855, "std": 0.0, "p50": 1.855, "p90": 1.855 }, "tokens": { "total_generated": 512, "content_tokens": 512, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 137.46, "concurrent_content_tps": 137.46, "requests_per_second": 0.27, "actual_wall_time": 3.725, "efficiency_percent": 100.0 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 1.0, "avg_batch_throughput": 137.46, "min_batch_throughput": 137.46, "max_batch_throughput": 137.46 } }, { "config": { "input_tokens": 5000, "output_tokens": 512, "batch_size": 8, "num_batches": 1, "total_requests": 8, "actual_input_tokens": 6024 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 8, "failed_requests": 0 }, "latency": { "mean": 7.418, "std": 0.058, "min": 7.269, "max": 7.448, "p50": 7.444, "p95": 7.447, "p99": 7.448, "ci_95_lower": 7.378, "ci_95_upper": 7.458 }, "ttft": { "mean": 3.301, "std": 1.58, "p50": 2.914, "p90": 4.562 }, "tokens": { "total_generated": 4096, "content_tokens": 4096, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 549.89, "concurrent_content_tps": 549.89, "requests_per_second": 1.07, "actual_wall_time": 7.449, "efficiency_percent": 99.58 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 8.0, "avg_batch_throughput": 549.89, "min_batch_throughput": 549.89, "max_batch_throughput": 549.89 } }, { "config": { "input_tokens": 5000, "output_tokens": 512, "batch_size": 16, "num_batches": 1, "total_requests": 16, "actual_input_tokens": 6024 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 9.992, "std": 0.024, "min": 9.937, "max": 10.019, "p50": 10.001, "p95": 10.016, "p99": 10.019, "ci_95_lower": 9.98, "ci_95_upper": 10.003 }, "ttft": { "mean": 3.948, "std": 1.636, "p50": 3.491, "p90": 5.599 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 817.4, "concurrent_content_tps": 817.4, "requests_per_second": 1.6, "actual_wall_time": 10.022, "efficiency_percent": 99.7 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 16.0, "avg_batch_throughput": 817.4, "min_batch_throughput": 817.4, "max_batch_throughput": 817.4 } }, { "config": { "input_tokens": 5000, "output_tokens": 512, "batch_size": 24, "num_batches": 1, "total_requests": 24, "actual_input_tokens": 6024 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 24, "failed_requests": 0 }, "latency": { "mean": 12.189, "std": 0.038, "min": 12.013, "max": 12.21, "p50": 12.197, "p95": 12.209, "p99": 12.21, "ci_95_lower": 12.174, "ci_95_upper": 12.204 }, "ttft": { "mean": 4.238, "std": 1.059, "p50": 3.938, "p90": 5.769 }, "tokens": { "total_generated": 12288, "content_tokens": 12288, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1005.93, "concurrent_content_tps": 1005.93, "requests_per_second": 1.96, "actual_wall_time": 12.216, "efficiency_percent": 99.78 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 24.0, "avg_batch_throughput": 1005.93, "min_batch_throughput": 1005.93, "max_batch_throughput": 1005.93 } }, { "config": { "input_tokens": 5000, "output_tokens": 512, "batch_size": 32, "num_batches": 1, "total_requests": 32, "actual_input_tokens": 6024 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 32, "failed_requests": 0 }, "latency": { "mean": 13.535, "std": 0.07, "min": 13.146, "max": 13.563, "p50": 13.546, "p95": 13.56, "p99": 13.563, "ci_95_lower": 13.511, "ci_95_upper": 13.559 }, "ttft": { "mean": 4.996, "std": 1.647, "p50": 4.524, "p90": 6.854 }, "tokens": { "total_generated": 16384, "content_tokens": 16384, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1207.9, "concurrent_content_tps": 1207.9, "requests_per_second": 2.36, "actual_wall_time": 13.564, "efficiency_percent": 99.78 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 32.0, "avg_batch_throughput": 1207.9, "min_batch_throughput": 1207.9, "max_batch_throughput": 1207.9 } }, { "config": { "input_tokens": 5000, "output_tokens": 512, "batch_size": 64, "num_batches": 1, "total_requests": 64, "actual_input_tokens": 6024 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 18.236, "std": 0.071, "min": 17.676, "max": 18.258, "p50": 18.245, "p95": 18.257, "p99": 18.258, "ci_95_lower": 18.218, "ci_95_upper": 18.253 }, "ttft": { "mean": 6.521, "std": 2.744, "p50": 5.802, "p90": 8.623 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1793.73, "concurrent_content_tps": 1793.73, "requests_per_second": 3.5, "actual_wall_time": 18.268, "efficiency_percent": 99.82 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 64.0, "avg_batch_throughput": 1793.73, "min_batch_throughput": 1793.73, "max_batch_throughput": 1793.73 } }, { "config": { "input_tokens": 9000, "output_tokens": 512, "batch_size": 1, "num_batches": 1, "total_requests": 1, "actual_input_tokens": 10777 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 1, "failed_requests": 0 }, "latency": { "mean": 4.17, "std": 0.0, "min": 4.17, "max": 4.17, "p50": 4.17, "p95": 4.17, "p99": 4.17, "ci_95_lower": 4.17, "ci_95_upper": 4.17 }, "ttft": { "mean": 1.79, "std": 0.0, "p50": 1.79, "p90": 1.79 }, "tokens": { "total_generated": 512, "content_tokens": 512, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 122.79, "concurrent_content_tps": 122.79, "requests_per_second": 0.24, "actual_wall_time": 4.17, "efficiency_percent": 100.0 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 1.0, "avg_batch_throughput": 122.79, "min_batch_throughput": 122.79, "max_batch_throughput": 122.79 } }, { "config": { "input_tokens": 9000, "output_tokens": 512, "batch_size": 8, "num_batches": 1, "total_requests": 8, "actual_input_tokens": 10777 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 8, "failed_requests": 0 }, "latency": { "mean": 7.837, "std": 0.011, "min": 7.808, "max": 7.846, "p50": 7.84, "p95": 7.845, "p99": 7.846, "ci_95_lower": 7.829, "ci_95_upper": 7.845 }, "ttft": { "mean": 2.73, "std": 0.413, "p50": 2.727, "p90": 3.176 }, "tokens": { "total_generated": 4096, "content_tokens": 4096, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 521.94, "concurrent_content_tps": 521.94, "requests_per_second": 1.02, "actual_wall_time": 7.848, "efficiency_percent": 99.86 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 8.0, "avg_batch_throughput": 521.94, "min_batch_throughput": 521.94, "max_batch_throughput": 521.94 } }, { "config": { "input_tokens": 9000, "output_tokens": 512, "batch_size": 16, "num_batches": 1, "total_requests": 16, "actual_input_tokens": 10777 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 16, "failed_requests": 0 }, "latency": { "mean": 10.825, "std": 0.051, "min": 10.645, "max": 10.858, "p50": 10.843, "p95": 10.856, "p99": 10.858, "ci_95_lower": 10.8, "ci_95_upper": 10.85 }, "ttft": { "mean": 3.809, "std": 0.481, "p50": 3.923, "p90": 4.335 }, "tokens": { "total_generated": 8192, "content_tokens": 8192, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 754.44, "concurrent_content_tps": 754.44, "requests_per_second": 1.47, "actual_wall_time": 10.858, "efficiency_percent": 99.69 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 16.0, "avg_batch_throughput": 754.44, "min_batch_throughput": 754.44, "max_batch_throughput": 754.44 } }, { "config": { "input_tokens": 9000, "output_tokens": 512, "batch_size": 24, "num_batches": 1, "total_requests": 24, "actual_input_tokens": 10777 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 24, "failed_requests": 0 }, "latency": { "mean": 13.617, "std": 0.082, "min": 13.31, "max": 13.728, "p50": 13.61, "p95": 13.726, "p99": 13.727, "ci_95_lower": 13.585, "ci_95_upper": 13.65 }, "ttft": { "mean": 5.393, "std": 2.261, "p50": 4.893, "p90": 8.595 }, "tokens": { "total_generated": 12288, "content_tokens": 12288, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 895.03, "concurrent_content_tps": 895.03, "requests_per_second": 1.75, "actual_wall_time": 13.729, "efficiency_percent": 99.18 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 24.0, "avg_batch_throughput": 895.03, "min_batch_throughput": 895.03, "max_batch_throughput": 895.03 } }, { "config": { "input_tokens": 9000, "output_tokens": 512, "batch_size": 32, "num_batches": 1, "total_requests": 32, "actual_input_tokens": 10777 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 32, "failed_requests": 0 }, "latency": { "mean": 15.071, "std": 0.058, "min": 14.788, "max": 15.128, "p50": 15.075, "p95": 15.121, "p99": 15.127, "ci_95_lower": 15.051, "ci_95_upper": 15.091 }, "ttft": { "mean": 6.012, "std": 2.11, "p50": 5.568, "p90": 9.381 }, "tokens": { "total_generated": 16384, "content_tokens": 16384, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1082.91, "concurrent_content_tps": 1082.91, "requests_per_second": 2.12, "actual_wall_time": 15.13, "efficiency_percent": 99.61 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 32.0, "avg_batch_throughput": 1082.91, "min_batch_throughput": 1082.91, "max_batch_throughput": 1082.91 } }, { "config": { "input_tokens": 9000, "output_tokens": 512, "batch_size": 64, "num_batches": 1, "total_requests": 64, "actual_input_tokens": 10777 }, "success_metrics": { "success_rate": 100.0, "successful_requests": 64, "failed_requests": 0 }, "latency": { "mean": 20.583, "std": 0.133, "min": 19.9, "max": 20.765, "p50": 20.581, "p95": 20.731, "p99": 20.762, "ci_95_lower": 20.55, "ci_95_upper": 20.615 }, "ttft": { "mean": 7.616, "std": 2.21, "p50": 7.112, "p90": 9.38 }, "tokens": { "total_generated": 32768, "content_tokens": 32768, "reasoning_tokens": 0, "avg_per_request": 512.0 }, "throughput": { "concurrent_total_tps": 1575.94, "concurrent_content_tps": 1575.94, "requests_per_second": 3.08, "actual_wall_time": 20.793, "efficiency_percent": 98.99 }, "batch_metrics": { "num_batches": 1, "avg_batch_size": 64.0, "avg_batch_throughput": 1575.94, "min_batch_throughput": 1575.94, "max_batch_throughput": 1575.94 } } ] }