version_name: "v-gpt-120b-tool-calling" version_description: "" version_labels: import: "0a5d8365-be73-4ab7-9933-2fb93468a8de" model-names: "openai/gpt-oss-120b" openai-compatible: "true" environment: "python3-12" instance_type: "16gb_8vcpu_rtxpro" static_ip: False minimum_instances: 1 maximum_instances: 1 maximum_idle_time: 10 request_retention_mode: "full" request_retention_time: 2419200 maximum_queue_size: 100000 scaling_strategy: "default" instance_processes: 20 health_check: path: "/health" port: 8000 timeout: 3 interval: 5 failure_threshold: 3 ports: [] version_environment_variables: - name: "VLLM_USE_V1" value: "1" - name: "MODEL_NAME" value: "openai/gpt-oss-120b" - name: "GPU_MEMORY_UTILIZATION" value: "0.90" - name: "MAX_MODEL_LEN" value: "125000"