mindef-overdracht/grafana/vllm-metrics/dashboard.json
2026-06-02 11:46:19 +02:00

2618 lines
82 KiB
JSON

{
"apiVersion": "dashboard.grafana.app/v2",
"kind": "Dashboard",
"metadata": {
"name": "vllm-perf",
"namespace": "default",
"uid": "AaGqVjAfCOd8D9DSWglyypyK6YhbvBOrEUJn5zrryYIX",
"resourceVersion": "1775139425991994",
"generation": 12,
"creationTimestamp": "2026-02-24T11:40:30Z",
"labels": {
"grafana.app/deprecatedInternalID": "4994"
},
"annotations": {
"grafana.app/createdBy": "user:ffc8gwlm9q2gwb",
"grafana.app/message": "Restored from version 4",
"grafana.app/saved-from-ui": "Grafana v12.4.2 (ebade4c739)",
"grafana.app/updatedBy": "user:ffc8gwlm9q2gwb",
"grafana.app/updatedTimestamp": "2026-04-02T14:17:05Z",
"grafana.app/folder": ""
}
},
"spec": {
"annotations": [
{
"kind": "AnnotationQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "grafana",
"version": "v0",
"datasource": {
"name": "-- Grafana --"
},
"spec": {}
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"builtIn": true,
"legacyOptions": {
"type": "dashboard"
}
}
}
],
"cursorSync": "Off",
"editable": true,
"elements": {
"panel-10": {
"kind": "Panel",
"spec": {
"id": 10,
"title": "Time to First Token (TTFT)",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.50, rate(vllm:time_to_first_token_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P50"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:time_to_first_token_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P95"
}
},
"refId": "B",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.99, rate(vllm:time_to_first_token_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P99"
}
},
"refId": "C",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-11": {
"kind": "Panel",
"spec": {
"id": 11,
"title": "Inter-Token Latency (TPOT)",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.50, rate(vllm:inter_token_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P50"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:inter_token_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P95"
}
},
"refId": "B",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.99, rate(vllm:inter_token_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P99"
}
},
"refId": "C",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-12": {
"kind": "Panel",
"spec": {
"id": 12,
"title": "End-to-End Request Latency",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.50, rate(vllm:e2e_request_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P50"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:e2e_request_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P95"
}
},
"refId": "B",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.99, rate(vllm:e2e_request_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "P99"
}
},
"refId": "C",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-13": {
"kind": "Panel",
"spec": {
"id": 13,
"title": "Request Processing Times (P95)",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:request_queue_time_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Queue Time P95"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:request_prefill_time_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Prefill Time P95"
}
},
"refId": "B",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:request_decode_time_seconds_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Decode Time P95"
}
},
"refId": "C",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-15": {
"kind": "Panel",
"spec": {
"id": 15,
"title": "Token Throughput",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:prompt_tokens_total{deployments=\"$deployments\"}[5m])",
"legendFormat": "Prompt Tokens/sec"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:generation_tokens_total{deployments=\"$deployments\"}[5m])",
"legendFormat": "Generation Tokens/sec"
}
},
"refId": "B",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-16": {
"kind": "Panel",
"spec": {
"id": 16,
"title": "Request Token Lengths",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.50, rate(vllm:request_prompt_tokens_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Prompt Length P50"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:request_prompt_tokens_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Prompt Length P95"
}
},
"refId": "B",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.50, rate(vllm:request_generation_tokens_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Generation Length P50"
}
},
"refId": "C",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "histogram_quantile(0.95, rate(vllm:request_generation_tokens_bucket{deployments=\"$deployments\"}[5m]))",
"legendFormat": "Generation Length P95"
}
},
"refId": "D",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-18": {
"kind": "Panel",
"spec": {
"id": 18,
"title": "Prefix Cache Activity",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:prefix_cache_queries_total{deployments=\"$deployments\"}[5m])",
"legendFormat": "Queries"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:prefix_cache_hits_total{deployments=\"$deployments\"}[5m])",
"legendFormat": "Hits"
}
},
"refId": "B",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.3.3",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-19": {
"kind": "Panel",
"spec": {
"id": 19,
"title": "Prefix Cache Hit Rate",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:prefix_cache_hits_total{deployments=\"$deployments\"}[5m]) / rate(vllm:prefix_cache_queries_total{deployments=\"$deployments\"}[5m])"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "gauge",
"version": "12.3.3",
"spec": {
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"min": 0,
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "red"
},
{
"value": 0.5,
"color": "yellow"
},
{
"value": 0.8,
"color": "green"
}
]
},
"color": {
"mode": "thresholds"
}
},
"overrides": []
}
}
}
}
},
"panel-2": {
"kind": "Panel",
"spec": {
"id": 2,
"title": "Requests Running",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "vllm:num_requests_running{deployments=\"$deployments\"}"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "stat",
"version": "12.4.0",
"spec": {
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
},
{
"value": 5,
"color": "yellow"
},
{
"value": 20,
"color": "red"
}
]
},
"color": {
"mode": "thresholds"
}
},
"overrides": []
}
}
}
}
},
"panel-21": {
"kind": "Panel",
"spec": {
"id": 21,
"title": "Requests Per Minute (RPM)",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"editorMode": "code",
"expr": "sum(rate(vllm:request_success_total{deployments=\"$deployments\"}[1m])) * 60",
"legendFormat": "RPM",
"range": true
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "reqpm",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Requests/min",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-22": {
"kind": "Panel",
"spec": {
"id": 22,
"title": "Input Tokens Per Minute (ITPM)",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:prompt_tokens_total{deployments=\"$deployments\"}[1m]) * 60",
"legendFormat": "ITPM"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Input Tokens/min",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-23": {
"kind": "Panel",
"spec": {
"id": 23,
"title": "Output Tokens Per Minute (OTPM)",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:generation_tokens_total{deployments=\"$deployments\"}[1m]) * 60",
"legendFormat": "OTPM"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max",
"last"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Output Tokens/min",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-3": {
"kind": "Panel",
"spec": {
"id": 3,
"title": "Requests Waiting",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "vllm:num_requests_waiting{deployments=\"$deployments\"}"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "stat",
"version": "12.4.0",
"spec": {
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
},
{
"value": 10,
"color": "yellow"
},
{
"value": 50,
"color": "red"
}
]
},
"color": {
"mode": "thresholds"
}
},
"overrides": []
}
}
}
}
},
"panel-4": {
"kind": "Panel",
"spec": {
"id": 4,
"title": "KV Cache Usage",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "vllm:kv_cache_usage_perc{deployments=\"$deployments\"}"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "stat",
"version": "12.4.0",
"spec": {
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "thresholds"
}
},
"overrides": []
}
}
}
}
},
"panel-5": {
"kind": "Panel",
"spec": {
"id": 5,
"title": "Request Rate",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"editorMode": "code",
"expr": "rate(vllm:request_success_total{deployments=\"$deployments\"}[$__rate_interval])",
"range": true
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"fieldConfig": {
"defaults": {
"unit": "reqps",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-6": {
"kind": "Panel",
"spec": {
"id": 6,
"title": "Tokens Generated/sec",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "rate(vllm:generation_tokens_total{deployments=\"$deployments\"}[5m])"
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "stat",
"version": "12.4.0",
"spec": {
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "thresholds"
}
},
"overrides": []
}
}
}
}
},
"panel-7": {
"kind": "Panel",
"spec": {
"id": 7,
"title": "Request States Over Time",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "vllm:num_requests_running{deployments=\"$deployments\"}",
"legendFormat": "Running"
}
},
"refId": "A",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "vllm:num_requests_waiting{deployments=\"$deployments\"}",
"legendFormat": "Waiting"
}
},
"refId": "B",
"hidden": false
}
},
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"expr": "vllm:num_requests_swapped{deployments=\"$deployments\"}",
"legendFormat": "Swapped"
}
},
"refId": "C",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
},
"panel-8": {
"kind": "Panel",
"spec": {
"id": 8,
"title": "KV Cache Usage Over Time",
"description": "",
"links": [],
"data": {
"kind": "QueryGroup",
"spec": {
"queries": [
{
"kind": "PanelQuery",
"spec": {
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"editorMode": "code",
"expr": "vllm:kv_cache_usage_perc{deployments=\"$deployments\"}",
"legendFormat": "KV Cache Usage",
"range": true
}
},
"refId": "A",
"hidden": false
}
}
],
"transformations": [],
"queryOptions": {
"maxDataPoints": 11000,
"interval": "1m"
}
}
},
"vizConfig": {
"kind": "VizConfig",
"group": "timeseries",
"version": "12.4.0",
"spec": {
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"thresholds": {
"mode": "absolute",
"steps": [
{
"value": 0,
"color": "green"
}
]
},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
}
},
"overrides": []
}
}
}
}
}
},
"layout": {
"kind": "RowsLayout",
"spec": {
"rows": [
{
"kind": "RowsLayoutRow",
"spec": {
"title": "Request Stats",
"collapse": false,
"layout": {
"kind": "GridLayout",
"spec": {
"items": [
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 0,
"width": 4,
"height": 4,
"element": {
"kind": "ElementReference",
"name": "panel-2"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 4,
"y": 0,
"width": 4,
"height": 4,
"element": {
"kind": "ElementReference",
"name": "panel-3"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 8,
"y": 0,
"width": 4,
"height": 4,
"element": {
"kind": "ElementReference",
"name": "panel-4"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 12,
"y": 0,
"width": 6,
"height": 4,
"element": {
"kind": "ElementReference",
"name": "panel-5"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 18,
"y": 0,
"width": 6,
"height": 4,
"element": {
"kind": "ElementReference",
"name": "panel-6"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 4,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-7"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 12,
"y": 4,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-8"
}
}
}
]
}
}
}
},
{
"kind": "RowsLayoutRow",
"spec": {
"title": "Per-Minute Metrics (RPM/ITPM/OTPM)",
"collapse": false,
"layout": {
"kind": "GridLayout",
"spec": {
"items": [
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 0,
"width": 8,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-21"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 8,
"y": 0,
"width": 8,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-22"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 16,
"y": 0,
"width": 8,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-23"
}
}
}
]
}
}
}
},
{
"kind": "RowsLayoutRow",
"spec": {
"title": "Latency Metrics",
"collapse": false,
"layout": {
"kind": "GridLayout",
"spec": {
"items": [
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 0,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-10"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 12,
"y": 0,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-11"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 8,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-12"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 12,
"y": 8,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-13"
}
}
}
]
}
}
}
},
{
"kind": "RowsLayoutRow",
"spec": {
"title": "Token Metrics",
"collapse": false,
"layout": {
"kind": "GridLayout",
"spec": {
"items": [
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 0,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-15"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 12,
"y": 0,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-16"
}
}
}
]
}
}
}
},
{
"kind": "RowsLayoutRow",
"spec": {
"title": "Prefix Cache",
"collapse": false,
"layout": {
"kind": "GridLayout",
"spec": {
"items": [
{
"kind": "GridLayoutItem",
"spec": {
"x": 0,
"y": 0,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-18"
}
}
},
{
"kind": "GridLayoutItem",
"spec": {
"x": 12,
"y": 0,
"width": 12,
"height": 8,
"element": {
"kind": "ElementReference",
"name": "panel-19"
}
}
}
]
}
}
}
}
]
}
},
"links": [],
"liveNow": false,
"preload": false,
"tags": [
"vllm",
"llm",
"inference"
],
"timeSettings": {
"timezone": "browser",
"from": "now-1h",
"to": "now",
"autoRefresh": "5s",
"autoRefreshIntervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"hideTimepicker": false,
"fiscalYearStartMonth": 0
},
"title": "vLLM Performance Dashboard",
"variables": [
{
"kind": "DatasourceVariable",
"spec": {
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"refresh": "onDashboardLoad",
"regex": "",
"current": {
"text": "default",
"value": "default"
},
"options": [],
"multi": false,
"includeAll": false,
"label": "Data Source",
"hide": "dontHide",
"skipUrlSync": false,
"allowCustomValue": true
}
},
{
"kind": "QueryVariable",
"spec": {
"name": "deployments",
"current": {
"text": "mistral-medium",
"value": "mistral-medium"
},
"label": "Deployment",
"hide": "dontHide",
"refresh": "onDashboardLoad",
"skipUrlSync": false,
"query": {
"kind": "DataQuery",
"group": "prometheus",
"version": "v0",
"datasource": {
"name": "${DS_PROMETHEUS}"
},
"spec": {
"query": "label_values(vllm:num_requests_running, deployments)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
}
},
"regex": "",
"regexApplyTo": "value",
"sort": "disabled",
"definition": "label_values(vllm:num_requests_running, deployments)",
"options": [],
"multi": false,
"includeAll": false,
"allowCustomValue": true
}
}
]
}
}