diff --git a/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-tool-calling.zip b/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-tool-calling.zip new file mode 100644 index 0000000..a8a4f8c Binary files /dev/null and b/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-tool-calling.zip differ diff --git a/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-v3.yaml b/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-v3.yaml new file mode 100644 index 0000000..7dc9ece --- /dev/null +++ b/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-v3.yaml @@ -0,0 +1,33 @@ +version_name: "v-gpt-120b-v3" +version_description: "" +version_labels: + import: "0a5d8365-be73-4ab7-9933-2fb93468a8de" + model-names: "openai/gpt-oss-120b" + openai-compatible: "true" +environment: "python3-12" +instance_type: "16gb_8vcpu_rtxpro" +static_ip: False +minimum_instances: 1 +maximum_instances: 1 +maximum_idle_time: 10 +request_retention_mode: "full" +request_retention_time: 2419200 +maximum_queue_size: 100000 +scaling_strategy: "default" +instance_processes: 20 +health_check: + path: "/health" + port: 8000 + timeout: 3 + interval: 5 + failure_threshold: 3 +ports: [] +version_environment_variables: + - name: "VLLM_USE_V1" + value: "1" + - name: "MODEL_NAME" + value: "openai/gpt-oss-120b" + - name: "GPU_MEMORY_UTILIZATION" + value: "0.90" + - name: "MAX_MODEL_LEN" + value: "125000" \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-v3.zip b/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-v3.zip new file mode 100644 index 0000000..77a4d33 Binary files /dev/null and b/ubiops-deployments/deployments/deployment-gpt-oss-chat/versions/deployment_gpt-oss-120b_version_v-gpt-120b-v3.zip differ diff --git a/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/deployment_bge-m3.yaml b/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/deployment_bge-m3.yaml new file mode 100644 index 0000000..99d438f --- /dev/null +++ b/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/deployment_bge-m3.yaml @@ -0,0 +1,10 @@ +deployment_name: "bge-m3" +deployment_description: "" +deployment_labels: {} +default_version: "v3" +supports_request_format: True +input_type: "plain" +input_fields: [] +output_type: "plain" +output_fields: [] +deployment_environment_variables: [] \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/versions/deployment_bge-m3_version_v3.yaml b/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/versions/deployment_bge-m3_version_v3.yaml new file mode 100644 index 0000000..908011f --- /dev/null +++ b/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/versions/deployment_bge-m3_version_v3.yaml @@ -0,0 +1,22 @@ +version_name: "v3" +version_description: "" +version_labels: {} +environment: "python3-13" +instance_type: "8gb_2vcpu_rtxpro_1mig" +static_ip: False +minimum_instances: 1 +maximum_instances: 1 +maximum_idle_time: 300 +request_retention_mode: "metadata" +request_retention_time: 2419200 +maximum_queue_size: 100000 +scaling_strategy: "default" +instance_processes: 1 +health_check: + path: "/health" + port: 8000 + timeout: 3 + interval: 5 + failure_threshold: 3 +ports: [] +version_environment_variables: [] \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/versions/deployment_bge-m3_version_v3.zip b/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/versions/deployment_bge-m3_version_v3.zip new file mode 100644 index 0000000..a8b304f Binary files /dev/null and b/ubiops-deployments/deployments/deployments-embedder/deployment_bge-m3/versions/deployment_bge-m3_version_v3.zip differ diff --git a/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/deployment_llm-proxy.yaml b/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/deployment_llm-proxy.yaml new file mode 100644 index 0000000..4b1ac22 --- /dev/null +++ b/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/deployment_llm-proxy.yaml @@ -0,0 +1,15 @@ +deployment_name: "llm-proxy" +deployment_description: "" +deployment_labels: + type: "llm-proxy" + created-by: "ubiops" +default_version: "v11" +supports_request_format: True +input_type: "plain" +input_fields: [] +output_type: "plain" +output_fields: [] +deployment_environment_variables: + - name: "UBIOPS_API_TOKEN" + value: "" + secret: True \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/versions/deployment_llm-proxy_version_v11.yaml b/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/versions/deployment_llm-proxy_version_v11.yaml new file mode 100644 index 0000000..cc6ab47 --- /dev/null +++ b/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/versions/deployment_llm-proxy_version_v11.yaml @@ -0,0 +1,16 @@ +version_name: "v11" +version_description: "Support for gpt oss + gpt oss x2 + gpt oss x3 + gemma. No cooldowns" +version_labels: {} +environment: "python3-12" +instance_type: "4096mb" +static_ip: False +minimum_instances: 1 +maximum_instances: 2 +maximum_idle_time: 300 +request_retention_mode: "full" +request_retention_time: 2419200 +maximum_queue_size: 100000 +scaling_strategy: "default" +instance_processes: 1 +ports: [] +version_environment_variables: [] \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/versions/deployment_llm-proxy_version_v11.zip b/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/versions/deployment_llm-proxy_version_v11.zip new file mode 100644 index 0000000..9ea9860 Binary files /dev/null and b/ubiops-deployments/deployments/deployments-proxies/deployment_llm-proxy/versions/deployment_llm-proxy_version_v11.zip differ diff --git a/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/deployment_proxy-gpt-oss-batch-3x.yaml b/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/deployment_proxy-gpt-oss-batch-3x.yaml new file mode 100644 index 0000000..3d450c9 --- /dev/null +++ b/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/deployment_proxy-gpt-oss-batch-3x.yaml @@ -0,0 +1,11 @@ +deployment_name: "proxy-gpt-oss-batch-3x" +deployment_description: "" +deployment_labels: + type: "llm-proxy" +default_version: "v1" +supports_request_format: True +input_type: "plain" +input_fields: [] +output_type: "plain" +output_fields: [] +deployment_environment_variables: [] \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/versions/deployment_proxy-gpt-oss-batch-3x_version_v1.yaml b/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/versions/deployment_proxy-gpt-oss-batch-3x_version_v1.yaml new file mode 100644 index 0000000..3826f6c --- /dev/null +++ b/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/versions/deployment_proxy-gpt-oss-batch-3x_version_v1.yaml @@ -0,0 +1,24 @@ +version_name: "v1" +version_description: "Requests 2 GPT Oss\u0027es" +version_labels: + type: "llm-proxy" +environment: "python3-12" +instance_type: "4096mb" +static_ip: False +minimum_instances: 1 +maximum_instances: 3 +maximum_idle_time: 300 +request_retention_mode: "full" +request_retention_time: 2419200 +maximum_queue_size: 100000 +scaling_strategy: "default" +instance_processes: 18 +ports: [] +version_environment_variables: + - name: "UBIOPS_API_TOKEN" + value: "" + secret: True + - name: "UBIOPS_DEPLOYMENT_VERSIONS" + value: "[[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling-max-12\"],[\"gpt-oss-120b\",\"v-gpt-120b-tool-calling\"],[\"gpt-oss-120b\",\"v-gpt-120b-v3\"]]" + - name: "POLL_INTERVAL" + value: "1" \ No newline at end of file diff --git a/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/versions/deployment_proxy-gpt-oss-batch-3x_version_v1.zip b/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/versions/deployment_proxy-gpt-oss-batch-3x_version_v1.zip new file mode 100644 index 0000000..e884980 Binary files /dev/null and b/ubiops-deployments/deployments/deployments-proxies/deployment_proxy-gpt-oss-batch-3x/versions/deployment_proxy-gpt-oss-batch-3x_version_v1.zip differ diff --git a/ubiops-deployments/info.yaml b/ubiops-deployments/info.yaml new file mode 100644 index 0000000..16c46da --- /dev/null +++ b/ubiops-deployments/info.yaml @@ -0,0 +1,3 @@ +format_spec: v8.0 +metadata: + export_date: 2026-06-02T07:52:13.765643+00:00 \ No newline at end of file