Import part 17
This commit is contained in:
parent
bf0fe7eb5a
commit
901a8c8407
Binary file not shown.
@ -0,0 +1,33 @@
|
||||
version_name: "v-gpt-120b-v3"
|
||||
version_description: ""
|
||||
version_labels:
|
||||
import: "0a5d8365-be73-4ab7-9933-2fb93468a8de"
|
||||
model-names: "openai/gpt-oss-120b"
|
||||
openai-compatible: "true"
|
||||
environment: "python3-12"
|
||||
instance_type: "16gb_8vcpu_rtxpro"
|
||||
static_ip: False
|
||||
minimum_instances: 1
|
||||
maximum_instances: 1
|
||||
maximum_idle_time: 10
|
||||
request_retention_mode: "full"
|
||||
request_retention_time: 2419200
|
||||
maximum_queue_size: 100000
|
||||
scaling_strategy: "default"
|
||||
instance_processes: 20
|
||||
health_check:
|
||||
path: "/health"
|
||||
port: 8000
|
||||
timeout: 3
|
||||
interval: 5
|
||||
failure_threshold: 3
|
||||
ports: []
|
||||
version_environment_variables:
|
||||
- name: "VLLM_USE_V1"
|
||||
value: "1"
|
||||
- name: "MODEL_NAME"
|
||||
value: "openai/gpt-oss-120b"
|
||||
- name: "GPU_MEMORY_UTILIZATION"
|
||||
value: "0.90"
|
||||
- name: "MAX_MODEL_LEN"
|
||||
value: "125000"
|
||||
Binary file not shown.
@ -0,0 +1,10 @@
|
||||
deployment_name: "bge-m3"
|
||||
deployment_description: ""
|
||||
deployment_labels: {}
|
||||
default_version: "v3"
|
||||
supports_request_format: True
|
||||
input_type: "plain"
|
||||
input_fields: []
|
||||
output_type: "plain"
|
||||
output_fields: []
|
||||
deployment_environment_variables: []
|
||||
@ -0,0 +1,22 @@
|
||||
version_name: "v3"
|
||||
version_description: ""
|
||||
version_labels: {}
|
||||
environment: "python3-13"
|
||||
instance_type: "8gb_2vcpu_rtxpro_1mig"
|
||||
static_ip: False
|
||||
minimum_instances: 1
|
||||
maximum_instances: 1
|
||||
maximum_idle_time: 300
|
||||
request_retention_mode: "metadata"
|
||||
request_retention_time: 2419200
|
||||
maximum_queue_size: 100000
|
||||
scaling_strategy: "default"
|
||||
instance_processes: 1
|
||||
health_check:
|
||||
path: "/health"
|
||||
port: 8000
|
||||
timeout: 3
|
||||
interval: 5
|
||||
failure_threshold: 3
|
||||
ports: []
|
||||
version_environment_variables: []
|
||||
Binary file not shown.
@ -0,0 +1,15 @@
|
||||
deployment_name: "llm-proxy"
|
||||
deployment_description: ""
|
||||
deployment_labels:
|
||||
type: "llm-proxy"
|
||||
created-by: "ubiops"
|
||||
default_version: "v11"
|
||||
supports_request_format: True
|
||||
input_type: "plain"
|
||||
input_fields: []
|
||||
output_type: "plain"
|
||||
output_fields: []
|
||||
deployment_environment_variables:
|
||||
- name: "UBIOPS_API_TOKEN"
|
||||
value: ""
|
||||
secret: True
|
||||
@ -0,0 +1,16 @@
|
||||
version_name: "v11"
|
||||
version_description: "Support for gpt oss + gpt oss x2 + gpt oss x3 + gemma. No cooldowns"
|
||||
version_labels: {}
|
||||
environment: "python3-12"
|
||||
instance_type: "4096mb"
|
||||
static_ip: False
|
||||
minimum_instances: 1
|
||||
maximum_instances: 2
|
||||
maximum_idle_time: 300
|
||||
request_retention_mode: "full"
|
||||
request_retention_time: 2419200
|
||||
maximum_queue_size: 100000
|
||||
scaling_strategy: "default"
|
||||
instance_processes: 1
|
||||
ports: []
|
||||
version_environment_variables: []
|
||||
Binary file not shown.
@ -0,0 +1,11 @@
|
||||
deployment_name: "proxy-gpt-oss-batch-3x"
|
||||
deployment_description: ""
|
||||
deployment_labels:
|
||||
type: "llm-proxy"
|
||||
default_version: "v1"
|
||||
supports_request_format: True
|
||||
input_type: "plain"
|
||||
input_fields: []
|
||||
output_type: "plain"
|
||||
output_fields: []
|
||||
deployment_environment_variables: []
|
||||
@ -0,0 +1,24 @@
|
||||
version_name: "v1"
|
||||
version_description: "Requests 2 GPT Oss\u0027es"
|
||||
version_labels:
|
||||
type: "llm-proxy"
|
||||
environment: "python3-12"
|
||||
instance_type: "4096mb"
|
||||
static_ip: False
|
||||
minimum_instances: 1
|
||||
maximum_instances: 3
|
||||
maximum_idle_time: 300
|
||||
request_retention_mode: "full"
|
||||
request_retention_time: 2419200
|
||||
maximum_queue_size: 100000
|
||||
scaling_strategy: "default"
|
||||
instance_processes: 18
|
||||
ports: []
|
||||
version_environment_variables:
|
||||
- name: "UBIOPS_API_TOKEN"
|
||||
value: ""
|
||||
secret: True
|
||||
- name: "UBIOPS_DEPLOYMENT_VERSIONS"
|
||||
value: "[[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling-max-12\"],[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling\"],[\"gpt-oss-120b\",\"v-gpt-120b-v3\"]]"
|
||||
- name: "POLL_INTERVAL"
|
||||
value: "1"
|
||||
Binary file not shown.
3
ubiops-deployments/info.yaml
Normal file
3
ubiops-deployments/info.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
format_spec: v8.0
|
||||
metadata:
|
||||
export_date: 2026-06-02T07:52:13.765643+00:00
|
||||
Loading…
Reference in New Issue
Block a user