mindef-overdracht/llm-throughput-tests-mindef-metadateren/benchmark_config.yaml
2026-06-02 11:46:20 +02:00

70 lines
2.1 KiB
YAML

endpoint:
# internal litellm ubiops
#url: https://46e73bba-0ed9-4853-b2b0-d4509aaab06b.services.external.0a71m37v.ubiops.io/v1
#api_key:
#model_name: openai-gpt-oss-120b-max-16
#url: https://46e73bba-0ed9-4853-b2b0-d4509aaab06b.services.external.0a71m37v.ubiops.io/v1
#api_key:
#model_name: openai-gpt-oss-120b
url: https://46e73bba-0ed9-4853-b2b0-d4509aaab06b.services.external.0a71m37v.ubiops.io/v1
api_key:
model_name: openai-gpt-oss-120b-2x
#url: https://b60dd657-9ce2-4ba0-ad45-754b5be29238.services.external.0a71m37v.ubiops.io/v1
#api_key:
#model_name: openai/gpt-oss-120b
# staging litellm
#url: https://f1dfa3fc-3314-4d49-be06-98bfd3d1f5fd.services.staging.ubiops.dev/v1
#api_key:
#model_name: llama-1b
# staging vllm
#url: https://dde9ea35-6a02-4242-a3f3-5a7e7e29e7a7.services.staging.ubiops.dev/v1
#api_key:
#model_name: meta-llama/Llama-3.2-1B-Instruct
benchmark:
# Input token counts to testfhtt
input_tokens: [50000]
# Batch sizes to test (number of simultaneous requests per batch)
# Each batch sends N requests at the exact same time
batch_sizes: [64]
num_batches: 1
# Maximum output tokens per request
output_tokens: 1024
# Optional: Path to conversation dataset JSON file
# Generate with: python create_test_dataset.py
# If not provided, uses synthetic prompts
dataset: test_conversations.json # or "test_conversations.json"
# Optional: Custom text to use as input for all requests
# Uses the same text for every request (ignores input_tokens)
# Priority: text > dataset > generated prompts
# Example: "Analyze this document about machine learning..."
text: null
runtime:
# Timeout for each request (seconds)
request_timeout: 1800
# Delay between benchmark runs (seconds)
delay_between_runs: 5
# Enable detailed I/O logging (input prompts + outputs)
log_io: true
# Wait for model initialization before starting
wait_for_ready: true
# Maximum initialization check attempts
max_init_retries: 10
# Delay between initialization checks (seconds)
init_retry_delay: 30