58 lines
1.4 KiB
JSON
58 lines
1.4 KiB
JSON
{
|
|
"timestamp": "2026-03-12T09:40:09.623487",
|
|
"model_name": "openai-gpt-oss-120b",
|
|
"results": [
|
|
{
|
|
"config": {
|
|
"input_tokens": 50000,
|
|
"output_tokens": 512,
|
|
"batch_size": 64,
|
|
"num_batches": 1,
|
|
"total_requests": 64,
|
|
"actual_input_tokens": 40613
|
|
},
|
|
"success_metrics": {
|
|
"success_rate": 26.56,
|
|
"successful_requests": 17,
|
|
"failed_requests": 47
|
|
},
|
|
"latency": {
|
|
"mean": 101.447,
|
|
"std": 0.463,
|
|
"min": 100.168,
|
|
"max": 102.338,
|
|
"p50": 101.438,
|
|
"p95": 102.129,
|
|
"p99": 102.296,
|
|
"ci_95_lower": 101.227,
|
|
"ci_95_upper": 101.668
|
|
},
|
|
"ttft": {
|
|
"mean": 82.918,
|
|
"std": 6.278,
|
|
"p50": 82.795,
|
|
"p90": 86.47
|
|
},
|
|
"tokens": {
|
|
"total_generated": 8704,
|
|
"content_tokens": 6245,
|
|
"reasoning_tokens": 2459,
|
|
"avg_per_request": 512.0
|
|
},
|
|
"throughput": {
|
|
"concurrent_total_tps": 84.04,
|
|
"concurrent_content_tps": 60.3,
|
|
"requests_per_second": 0.16,
|
|
"actual_wall_time": 103.567,
|
|
"efficiency_percent": 26.02
|
|
},
|
|
"batch_metrics": {
|
|
"num_batches": 1,
|
|
"avg_batch_size": 17.0,
|
|
"avg_batch_throughput": 84.04,
|
|
"min_batch_throughput": 84.04,
|
|
"max_batch_throughput": 84.04
|
|
}
|
|
}
|
|
]
|
|
} |