Import part 17
This commit is contained in:
parent
bf0fe7eb5a
commit
901a8c8407
Binary file not shown.
@ -0,0 +1,33 @@
|
|||||||
|
version_name: "v-gpt-120b-v3"
|
||||||
|
version_description: ""
|
||||||
|
version_labels:
|
||||||
|
import: "0a5d8365-be73-4ab7-9933-2fb93468a8de"
|
||||||
|
model-names: "openai/gpt-oss-120b"
|
||||||
|
openai-compatible: "true"
|
||||||
|
environment: "python3-12"
|
||||||
|
instance_type: "16gb_8vcpu_rtxpro"
|
||||||
|
static_ip: False
|
||||||
|
minimum_instances: 1
|
||||||
|
maximum_instances: 1
|
||||||
|
maximum_idle_time: 10
|
||||||
|
request_retention_mode: "full"
|
||||||
|
request_retention_time: 2419200
|
||||||
|
maximum_queue_size: 100000
|
||||||
|
scaling_strategy: "default"
|
||||||
|
instance_processes: 20
|
||||||
|
health_check:
|
||||||
|
path: "/health"
|
||||||
|
port: 8000
|
||||||
|
timeout: 3
|
||||||
|
interval: 5
|
||||||
|
failure_threshold: 3
|
||||||
|
ports: []
|
||||||
|
version_environment_variables:
|
||||||
|
- name: "VLLM_USE_V1"
|
||||||
|
value: "1"
|
||||||
|
- name: "MODEL_NAME"
|
||||||
|
value: "openai/gpt-oss-120b"
|
||||||
|
- name: "GPU_MEMORY_UTILIZATION"
|
||||||
|
value: "0.90"
|
||||||
|
- name: "MAX_MODEL_LEN"
|
||||||
|
value: "125000"
|
||||||
Binary file not shown.
@ -0,0 +1,10 @@
|
|||||||
|
deployment_name: "bge-m3"
|
||||||
|
deployment_description: ""
|
||||||
|
deployment_labels: {}
|
||||||
|
default_version: "v3"
|
||||||
|
supports_request_format: True
|
||||||
|
input_type: "plain"
|
||||||
|
input_fields: []
|
||||||
|
output_type: "plain"
|
||||||
|
output_fields: []
|
||||||
|
deployment_environment_variables: []
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
version_name: "v3"
|
||||||
|
version_description: ""
|
||||||
|
version_labels: {}
|
||||||
|
environment: "python3-13"
|
||||||
|
instance_type: "8gb_2vcpu_rtxpro_1mig"
|
||||||
|
static_ip: False
|
||||||
|
minimum_instances: 1
|
||||||
|
maximum_instances: 1
|
||||||
|
maximum_idle_time: 300
|
||||||
|
request_retention_mode: "metadata"
|
||||||
|
request_retention_time: 2419200
|
||||||
|
maximum_queue_size: 100000
|
||||||
|
scaling_strategy: "default"
|
||||||
|
instance_processes: 1
|
||||||
|
health_check:
|
||||||
|
path: "/health"
|
||||||
|
port: 8000
|
||||||
|
timeout: 3
|
||||||
|
interval: 5
|
||||||
|
failure_threshold: 3
|
||||||
|
ports: []
|
||||||
|
version_environment_variables: []
|
||||||
Binary file not shown.
@ -0,0 +1,15 @@
|
|||||||
|
deployment_name: "llm-proxy"
|
||||||
|
deployment_description: ""
|
||||||
|
deployment_labels:
|
||||||
|
type: "llm-proxy"
|
||||||
|
created-by: "ubiops"
|
||||||
|
default_version: "v11"
|
||||||
|
supports_request_format: True
|
||||||
|
input_type: "plain"
|
||||||
|
input_fields: []
|
||||||
|
output_type: "plain"
|
||||||
|
output_fields: []
|
||||||
|
deployment_environment_variables:
|
||||||
|
- name: "UBIOPS_API_TOKEN"
|
||||||
|
value: ""
|
||||||
|
secret: True
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
version_name: "v11"
|
||||||
|
version_description: "Support for gpt oss + gpt oss x2 + gpt oss x3 + gemma. No cooldowns"
|
||||||
|
version_labels: {}
|
||||||
|
environment: "python3-12"
|
||||||
|
instance_type: "4096mb"
|
||||||
|
static_ip: False
|
||||||
|
minimum_instances: 1
|
||||||
|
maximum_instances: 2
|
||||||
|
maximum_idle_time: 300
|
||||||
|
request_retention_mode: "full"
|
||||||
|
request_retention_time: 2419200
|
||||||
|
maximum_queue_size: 100000
|
||||||
|
scaling_strategy: "default"
|
||||||
|
instance_processes: 1
|
||||||
|
ports: []
|
||||||
|
version_environment_variables: []
|
||||||
Binary file not shown.
@ -0,0 +1,11 @@
|
|||||||
|
deployment_name: "proxy-gpt-oss-batch-3x"
|
||||||
|
deployment_description: ""
|
||||||
|
deployment_labels:
|
||||||
|
type: "llm-proxy"
|
||||||
|
default_version: "v1"
|
||||||
|
supports_request_format: True
|
||||||
|
input_type: "plain"
|
||||||
|
input_fields: []
|
||||||
|
output_type: "plain"
|
||||||
|
output_fields: []
|
||||||
|
deployment_environment_variables: []
|
||||||
@ -0,0 +1,24 @@
|
|||||||
|
version_name: "v1"
|
||||||
|
version_description: "Requests 2 GPT Oss\u0027es"
|
||||||
|
version_labels:
|
||||||
|
type: "llm-proxy"
|
||||||
|
environment: "python3-12"
|
||||||
|
instance_type: "4096mb"
|
||||||
|
static_ip: False
|
||||||
|
minimum_instances: 1
|
||||||
|
maximum_instances: 3
|
||||||
|
maximum_idle_time: 300
|
||||||
|
request_retention_mode: "full"
|
||||||
|
request_retention_time: 2419200
|
||||||
|
maximum_queue_size: 100000
|
||||||
|
scaling_strategy: "default"
|
||||||
|
instance_processes: 18
|
||||||
|
ports: []
|
||||||
|
version_environment_variables:
|
||||||
|
- name: "UBIOPS_API_TOKEN"
|
||||||
|
value: ""
|
||||||
|
secret: True
|
||||||
|
- name: "UBIOPS_DEPLOYMENT_VERSIONS"
|
||||||
|
value: "[[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling-max-12\"],[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling\"],[\"gpt-oss-120b\",\"v-gpt-120b-v3\"]]"
|
||||||
|
- name: "POLL_INTERVAL"
|
||||||
|
value: "1"
|
||||||
Binary file not shown.
3
ubiops-deployments/info.yaml
Normal file
3
ubiops-deployments/info.yaml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
format_spec: v8.0
|
||||||
|
metadata:
|
||||||
|
export_date: 2026-06-02T07:52:13.765643+00:00
|
||||||
Loading…
Reference in New Issue
Block a user