Import part 17

This commit is contained in:
kvanbezouw 2026-06-02 11:46:30 +02:00
parent bf0fe7eb5a
commit 901a8c8407
13 changed files with 134 additions and 0 deletions

View File

@ -0,0 +1,33 @@
version_name: "v-gpt-120b-v3"
version_description: ""
version_labels:
import: "0a5d8365-be73-4ab7-9933-2fb93468a8de"
model-names: "openai/gpt-oss-120b"
openai-compatible: "true"
environment: "python3-12"
instance_type: "16gb_8vcpu_rtxpro"
static_ip: False
minimum_instances: 1
maximum_instances: 1
maximum_idle_time: 10
request_retention_mode: "full"
request_retention_time: 2419200
maximum_queue_size: 100000
scaling_strategy: "default"
instance_processes: 20
health_check:
path: "/health"
port: 8000
timeout: 3
interval: 5
failure_threshold: 3
ports: []
version_environment_variables:
- name: "VLLM_USE_V1"
value: "1"
- name: "MODEL_NAME"
value: "openai/gpt-oss-120b"
- name: "GPU_MEMORY_UTILIZATION"
value: "0.90"
- name: "MAX_MODEL_LEN"
value: "125000"

View File

@ -0,0 +1,10 @@
deployment_name: "bge-m3"
deployment_description: ""
deployment_labels: {}
default_version: "v3"
supports_request_format: True
input_type: "plain"
input_fields: []
output_type: "plain"
output_fields: []
deployment_environment_variables: []

View File

@ -0,0 +1,22 @@
version_name: "v3"
version_description: ""
version_labels: {}
environment: "python3-13"
instance_type: "8gb_2vcpu_rtxpro_1mig"
static_ip: False
minimum_instances: 1
maximum_instances: 1
maximum_idle_time: 300
request_retention_mode: "metadata"
request_retention_time: 2419200
maximum_queue_size: 100000
scaling_strategy: "default"
instance_processes: 1
health_check:
path: "/health"
port: 8000
timeout: 3
interval: 5
failure_threshold: 3
ports: []
version_environment_variables: []

View File

@ -0,0 +1,15 @@
deployment_name: "llm-proxy"
deployment_description: ""
deployment_labels:
type: "llm-proxy"
created-by: "ubiops"
default_version: "v11"
supports_request_format: True
input_type: "plain"
input_fields: []
output_type: "plain"
output_fields: []
deployment_environment_variables:
- name: "UBIOPS_API_TOKEN"
value: ""
secret: True

View File

@ -0,0 +1,16 @@
version_name: "v11"
version_description: "Support for gpt oss + gpt oss x2 + gpt oss x3 + gemma. No cooldowns"
version_labels: {}
environment: "python3-12"
instance_type: "4096mb"
static_ip: False
minimum_instances: 1
maximum_instances: 2
maximum_idle_time: 300
request_retention_mode: "full"
request_retention_time: 2419200
maximum_queue_size: 100000
scaling_strategy: "default"
instance_processes: 1
ports: []
version_environment_variables: []

View File

@ -0,0 +1,11 @@
deployment_name: "proxy-gpt-oss-batch-3x"
deployment_description: ""
deployment_labels:
type: "llm-proxy"
default_version: "v1"
supports_request_format: True
input_type: "plain"
input_fields: []
output_type: "plain"
output_fields: []
deployment_environment_variables: []

View File

@ -0,0 +1,24 @@
version_name: "v1"
version_description: "Requests 2 GPT Oss\u0027es"
version_labels:
type: "llm-proxy"
environment: "python3-12"
instance_type: "4096mb"
static_ip: False
minimum_instances: 1
maximum_instances: 3
maximum_idle_time: 300
request_retention_mode: "full"
request_retention_time: 2419200
maximum_queue_size: 100000
scaling_strategy: "default"
instance_processes: 18
ports: []
version_environment_variables:
- name: "UBIOPS_API_TOKEN"
value: ""
secret: True
- name: "UBIOPS_DEPLOYMENT_VERSIONS"
value: "[[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling-max-12\"],[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling\"],[\"gpt-oss-120b\",\"v-gpt-120b-v3\"]]"
- name: "POLL_INTERVAL"
value: "1"

View File

@ -0,0 +1,3 @@
format_spec: v8.0
metadata:
export_date: 2026-06-02T07:52:13.765643+00:00