mindef-overdracht/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-tool-calling-max-12.yaml
2026-06-02 11:46:29 +02:00

33 lines
815 B
YAML

version_name: "v-gpt-120b-tool-calling-max-12"
version_description: ""
version_labels:
import: "0a5d8365-be73-4ab7-9933-2fb93468a8de"
model-names: "openai/gpt-oss-120b"
openai-compatible: "true"
environment: "python3-12"
instance_type: "16gb_8vcpu_rtxpro"
static_ip: False
minimum_instances: 1
maximum_instances: 1
maximum_idle_time: 10
request_retention_mode: "full"
request_retention_time: 2419200
maximum_queue_size: 100000
scaling_strategy: "default"
instance_processes: 20
health_check:
path: "/health"
port: 8000
timeout: 3
interval: 5
failure_threshold: 3
ports: []
version_environment_variables:
- name: "VLLM_USE_V1"
value: "1"
- name: "MODEL_NAME"
value: "openai/gpt-oss-120b"
- name: "GPU_MEMORY_UTILIZATION"
value: "0.90"
- name: "MAX_MODEL_LEN"
value: "125000"