{ "apiVersion": "dashboard.grafana.app/v2", "kind": "Dashboard", "metadata": { "name": "vllm-perf", "namespace": "default", "uid": "AaGqVjAfCOd8D9DSWglyypyK6YhbvBOrEUJn5zrryYIX", "resourceVersion": "1775139425991994", "generation": 12, "creationTimestamp": "2026-02-24T11:40:30Z", "labels": { "grafana.app/deprecatedInternalID": "4994" }, "annotations": { "grafana.app/createdBy": "user:ffc8gwlm9q2gwb", "grafana.app/message": "Restored from version 4", "grafana.app/saved-from-ui": "Grafana v12.4.2 (ebade4c739)", "grafana.app/updatedBy": "user:ffc8gwlm9q2gwb", "grafana.app/updatedTimestamp": "2026-04-02T14:17:05Z", "grafana.app/folder": "" } }, "spec": { "annotations": [ { "kind": "AnnotationQuery", "spec": { "query": { "kind": "DataQuery", "group": "grafana", "version": "v0", "datasource": { "name": "-- Grafana --" }, "spec": {} }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "builtIn": true, "legacyOptions": { "type": "dashboard" } } } ], "cursorSync": "Off", "editable": true, "elements": { "panel-10": { "kind": "Panel", "spec": { "id": 10, "title": "Time to First Token (TTFT)", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.50, rate(vllm:time_to_first_token_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P50" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:time_to_first_token_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P95" } }, "refId": "B", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.99, rate(vllm:time_to_first_token_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P99" } }, "refId": "C", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "s", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-11": { "kind": "Panel", "spec": { "id": 11, "title": "Inter-Token Latency (TPOT)", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.50, rate(vllm:inter_token_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P50" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:inter_token_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P95" } }, "refId": "B", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.99, rate(vllm:inter_token_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P99" } }, "refId": "C", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "s", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-12": { "kind": "Panel", "spec": { "id": 12, "title": "End-to-End Request Latency", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.50, rate(vllm:e2e_request_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P50" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:e2e_request_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P95" } }, "refId": "B", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.99, rate(vllm:e2e_request_latency_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "P99" } }, "refId": "C", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "s", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-13": { "kind": "Panel", "spec": { "id": 13, "title": "Request Processing Times (P95)", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:request_queue_time_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Queue Time P95" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:request_prefill_time_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Prefill Time P95" } }, "refId": "B", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:request_decode_time_seconds_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Decode Time P95" } }, "refId": "C", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "s", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-15": { "kind": "Panel", "spec": { "id": 15, "title": "Token Throughput", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:prompt_tokens_total{deployments=\"$deployments\"}[5m])", "legendFormat": "Prompt Tokens/sec" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:generation_tokens_total{deployments=\"$deployments\"}[5m])", "legendFormat": "Generation Tokens/sec" } }, "refId": "B", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "ops", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-16": { "kind": "Panel", "spec": { "id": 16, "title": "Request Token Lengths", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.50, rate(vllm:request_prompt_tokens_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Prompt Length P50" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:request_prompt_tokens_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Prompt Length P95" } }, "refId": "B", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.50, rate(vllm:request_generation_tokens_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Generation Length P50" } }, "refId": "C", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "histogram_quantile(0.95, rate(vllm:request_generation_tokens_bucket{deployments=\"$deployments\"}[5m]))", "legendFormat": "Generation Length P95" } }, "refId": "D", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-18": { "kind": "Panel", "spec": { "id": 18, "title": "Prefix Cache Activity", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:prefix_cache_queries_total{deployments=\"$deployments\"}[5m])", "legendFormat": "Queries" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:prefix_cache_hits_total{deployments=\"$deployments\"}[5m])", "legendFormat": "Hits" } }, "refId": "B", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.3.3", "spec": { "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "ops", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-19": { "kind": "Panel", "spec": { "id": 19, "title": "Prefix Cache Hit Rate", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:prefix_cache_hits_total{deployments=\"$deployments\"}[5m]) / rate(vllm:prefix_cache_queries_total{deployments=\"$deployments\"}[5m])" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "gauge", "version": "12.3.3", "spec": { "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "red" }, { "value": 0.5, "color": "yellow" }, { "value": 0.8, "color": "green" } ] }, "color": { "mode": "thresholds" } }, "overrides": [] } } } } }, "panel-2": { "kind": "Panel", "spec": { "id": 2, "title": "Requests Running", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "vllm:num_requests_running{deployments=\"$deployments\"}" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "stat", "version": "12.4.0", "spec": { "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" }, { "value": 5, "color": "yellow" }, { "value": 20, "color": "red" } ] }, "color": { "mode": "thresholds" } }, "overrides": [] } } } } }, "panel-21": { "kind": "Panel", "spec": { "id": 21, "title": "Requests Per Minute (RPM)", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "editorMode": "code", "expr": "sum(rate(vllm:request_success_total{deployments=\"$deployments\"}[1m])) * 60", "legendFormat": "RPM", "range": true } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "reqpm", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Requests/min", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-22": { "kind": "Panel", "spec": { "id": 22, "title": "Input Tokens Per Minute (ITPM)", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:prompt_tokens_total{deployments=\"$deployments\"}[1m]) * 60", "legendFormat": "ITPM" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Input Tokens/min", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-23": { "kind": "Panel", "spec": { "id": 23, "title": "Output Tokens Per Minute (OTPM)", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:generation_tokens_total{deployments=\"$deployments\"}[1m]) * 60", "legendFormat": "OTPM" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Output Tokens/min", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-3": { "kind": "Panel", "spec": { "id": 3, "title": "Requests Waiting", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "vllm:num_requests_waiting{deployments=\"$deployments\"}" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "stat", "version": "12.4.0", "spec": { "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" }, { "value": 10, "color": "yellow" }, { "value": 50, "color": "red" } ] }, "color": { "mode": "thresholds" } }, "overrides": [] } } } } }, "panel-4": { "kind": "Panel", "spec": { "id": 4, "title": "KV Cache Usage", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "vllm:kv_cache_usage_perc{deployments=\"$deployments\"}" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "stat", "version": "12.4.0", "spec": { "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "fieldConfig": { "defaults": { "unit": "percentunit", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "thresholds" } }, "overrides": [] } } } } }, "panel-5": { "kind": "Panel", "spec": { "id": 5, "title": "Request Rate", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "editorMode": "code", "expr": "rate(vllm:request_success_total{deployments=\"$deployments\"}[$__rate_interval])", "range": true } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "fieldConfig": { "defaults": { "unit": "reqps", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-6": { "kind": "Panel", "spec": { "id": 6, "title": "Tokens Generated/sec", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "rate(vllm:generation_tokens_total{deployments=\"$deployments\"}[5m])" } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "stat", "version": "12.4.0", "spec": { "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "fieldConfig": { "defaults": { "unit": "ops", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "thresholds" } }, "overrides": [] } } } } }, "panel-7": { "kind": "Panel", "spec": { "id": 7, "title": "Request States Over Time", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "vllm:num_requests_running{deployments=\"$deployments\"}", "legendFormat": "Running" } }, "refId": "A", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "vllm:num_requests_waiting{deployments=\"$deployments\"}", "legendFormat": "Waiting" } }, "refId": "B", "hidden": false } }, { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "expr": "vllm:num_requests_swapped{deployments=\"$deployments\"}", "legendFormat": "Swapped" } }, "refId": "C", "hidden": false } } ], "transformations": [], "queryOptions": {} } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } }, "panel-8": { "kind": "Panel", "spec": { "id": 8, "title": "KV Cache Usage Over Time", "description": "", "links": [], "data": { "kind": "QueryGroup", "spec": { "queries": [ { "kind": "PanelQuery", "spec": { "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "editorMode": "code", "expr": "vllm:kv_cache_usage_perc{deployments=\"$deployments\"}", "legendFormat": "KV Cache Usage", "range": true } }, "refId": "A", "hidden": false } } ], "transformations": [], "queryOptions": { "maxDataPoints": 11000, "interval": "1m" } } }, "vizConfig": { "kind": "VizConfig", "group": "timeseries", "version": "12.4.0", "spec": { "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "sortBy": "Name", "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "fieldConfig": { "defaults": { "unit": "percentunit", "thresholds": { "mode": "absolute", "steps": [ { "value": 0, "color": "green" } ] }, "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } } }, "overrides": [] } } } } } }, "layout": { "kind": "RowsLayout", "spec": { "rows": [ { "kind": "RowsLayoutRow", "spec": { "title": "Request Stats", "collapse": false, "layout": { "kind": "GridLayout", "spec": { "items": [ { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 0, "width": 4, "height": 4, "element": { "kind": "ElementReference", "name": "panel-2" } } }, { "kind": "GridLayoutItem", "spec": { "x": 4, "y": 0, "width": 4, "height": 4, "element": { "kind": "ElementReference", "name": "panel-3" } } }, { "kind": "GridLayoutItem", "spec": { "x": 8, "y": 0, "width": 4, "height": 4, "element": { "kind": "ElementReference", "name": "panel-4" } } }, { "kind": "GridLayoutItem", "spec": { "x": 12, "y": 0, "width": 6, "height": 4, "element": { "kind": "ElementReference", "name": "panel-5" } } }, { "kind": "GridLayoutItem", "spec": { "x": 18, "y": 0, "width": 6, "height": 4, "element": { "kind": "ElementReference", "name": "panel-6" } } }, { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 4, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-7" } } }, { "kind": "GridLayoutItem", "spec": { "x": 12, "y": 4, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-8" } } } ] } } } }, { "kind": "RowsLayoutRow", "spec": { "title": "Per-Minute Metrics (RPM/ITPM/OTPM)", "collapse": false, "layout": { "kind": "GridLayout", "spec": { "items": [ { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 0, "width": 8, "height": 8, "element": { "kind": "ElementReference", "name": "panel-21" } } }, { "kind": "GridLayoutItem", "spec": { "x": 8, "y": 0, "width": 8, "height": 8, "element": { "kind": "ElementReference", "name": "panel-22" } } }, { "kind": "GridLayoutItem", "spec": { "x": 16, "y": 0, "width": 8, "height": 8, "element": { "kind": "ElementReference", "name": "panel-23" } } } ] } } } }, { "kind": "RowsLayoutRow", "spec": { "title": "Latency Metrics", "collapse": false, "layout": { "kind": "GridLayout", "spec": { "items": [ { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 0, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-10" } } }, { "kind": "GridLayoutItem", "spec": { "x": 12, "y": 0, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-11" } } }, { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 8, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-12" } } }, { "kind": "GridLayoutItem", "spec": { "x": 12, "y": 8, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-13" } } } ] } } } }, { "kind": "RowsLayoutRow", "spec": { "title": "Token Metrics", "collapse": false, "layout": { "kind": "GridLayout", "spec": { "items": [ { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 0, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-15" } } }, { "kind": "GridLayoutItem", "spec": { "x": 12, "y": 0, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-16" } } } ] } } } }, { "kind": "RowsLayoutRow", "spec": { "title": "Prefix Cache", "collapse": false, "layout": { "kind": "GridLayout", "spec": { "items": [ { "kind": "GridLayoutItem", "spec": { "x": 0, "y": 0, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-18" } } }, { "kind": "GridLayoutItem", "spec": { "x": 12, "y": 0, "width": 12, "height": 8, "element": { "kind": "ElementReference", "name": "panel-19" } } } ] } } } } ] } }, "links": [], "liveNow": false, "preload": false, "tags": [ "vllm", "llm", "inference" ], "timeSettings": { "timezone": "browser", "from": "now-1h", "to": "now", "autoRefresh": "5s", "autoRefreshIntervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "hideTimepicker": false, "fiscalYearStartMonth": 0 }, "title": "vLLM Performance Dashboard", "variables": [ { "kind": "DatasourceVariable", "spec": { "name": "DS_PROMETHEUS", "pluginId": "prometheus", "refresh": "onDashboardLoad", "regex": "", "current": { "text": "default", "value": "default" }, "options": [], "multi": false, "includeAll": false, "label": "Data Source", "hide": "dontHide", "skipUrlSync": false, "allowCustomValue": true } }, { "kind": "QueryVariable", "spec": { "name": "deployments", "current": { "text": "mistral-medium", "value": "mistral-medium" }, "label": "Deployment", "hide": "dontHide", "refresh": "onDashboardLoad", "skipUrlSync": false, "query": { "kind": "DataQuery", "group": "prometheus", "version": "v0", "datasource": { "name": "${DS_PROMETHEUS}" }, "spec": { "query": "label_values(vllm:num_requests_running, deployments)", "refId": "PrometheusVariableQueryEditor-VariableQuery" } }, "regex": "", "regexApplyTo": "value", "sort": "disabled", "definition": "label_values(vllm:num_requests_running, deployments)", "options": [], "multi": false, "includeAll": false, "allowCustomValue": true } } ] } }