33 lines
808 B
YAML
33 lines
808 B
YAML
version_name: "v-gpt-120b-tool-calling"
|
|
version_description: ""
|
|
version_labels:
|
|
import: "0a5d8365-be73-4ab7-9933-2fb93468a8de"
|
|
model-names: "openai/gpt-oss-120b"
|
|
openai-compatible: "true"
|
|
environment: "python3-12"
|
|
instance_type: "16gb_8vcpu_rtxpro"
|
|
static_ip: False
|
|
minimum_instances: 1
|
|
maximum_instances: 1
|
|
maximum_idle_time: 10
|
|
request_retention_mode: "full"
|
|
request_retention_time: 2419200
|
|
maximum_queue_size: 100000
|
|
scaling_strategy: "default"
|
|
instance_processes: 20
|
|
health_check:
|
|
path: "/health"
|
|
port: 8000
|
|
timeout: 3
|
|
interval: 5
|
|
failure_threshold: 3
|
|
ports: []
|
|
version_environment_variables:
|
|
- name: "VLLM_USE_V1"
|
|
value: "1"
|
|
- name: "MODEL_NAME"
|
|
value: "openai/gpt-oss-120b"
|
|
- name: "GPU_MEMORY_UTILIZATION"
|
|
value: "0.90"
|
|
- name: "MAX_MODEL_LEN"
|
|
value: "125000" |