mindef-overdracht/llm-throughput-tests-mindef-metadateren/results/results_openai-gpt-oss-120b-2x/benchmark_results.json
2026-06-02 11:46:24 +02:00

58 lines
1.4 KiB
JSON

{
"timestamp": "2026-03-25T17:31:40.541181",
"model_name": "openai-gpt-oss-120b-2x",
"results": [
{
"config": {
"input_tokens": 50000,
"output_tokens": 1024,
"batch_size": 64,
"num_batches": 1,
"total_requests": 64,
"actual_input_tokens": 40443
},
"success_metrics": {
"success_rate": 50.0,
"successful_requests": 32,
"failed_requests": 32
},
"latency": {
"mean": 105.486,
"std": 0.648,
"min": 104.136,
"max": 106.488,
"p50": 105.577,
"p95": 106.447,
"p99": 106.484,
"ci_95_lower": 105.262,
"ci_95_upper": 105.711
},
"ttft": {
"mean": 105.486,
"std": 0.648,
"p50": 105.577,
"p90": 106.345
},
"tokens": {
"total_generated": 32768,
"content_tokens": 32768,
"reasoning_tokens": 0,
"avg_per_request": 1024.0
},
"throughput": {
"concurrent_total_tps": 307.59,
"concurrent_content_tps": 307.59,
"requests_per_second": 0.3,
"actual_wall_time": 106.533,
"efficiency_percent": 49.51
},
"batch_metrics": {
"num_batches": 1,
"avg_batch_size": 32.0,
"avg_batch_throughput": 307.59,
"min_batch_throughput": 307.59,
"max_batch_throughput": 307.59
}
}
]
}