Skip to content

Commit

Permalink
Extend the VSHN Managed OpenShift queries to tolerate changes to unus…
Browse files Browse the repository at this point in the history
…ed labels

We extend the queries with `max by (relevant_label_set)` where necessary
to ensure that changes to unused labels (e.g. `cluster_name`) don't
break the billing queries.

Note that the list of labels which we keep for `node_cpu_info` must
contain `core` and `cpu` for the vCPU-Hour billed clusters. The same
list must contain `core` but mustn't not contain `cpu` for the Core-Hour
billed clusters.
  • Loading branch information
simu committed Oct 31, 2024
1 parent 7017b62 commit 2a81b8b
Show file tree
Hide file tree
Showing 5 changed files with 818 additions and 349 deletions.
42 changes: 28 additions & 14 deletions main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -507,21 +507,26 @@ parameters:
max_over_time(
# Sum the vCPUs by cluster
sum by(cluster_id, role) (
# Get the node vCPUs
node_cpu_info
# Get the node vCPUs (do not remove `cpu and `core` from the
# `by` clause, otherwise the vCPU amount is incorrect)
max by (cluster_id, instance, cpu, core) (node_cpu_info)
# Limit to worker nodes only
* on (cluster_id, instance) group_left(role) (
# node_cpu_info and kube_node_role use different labels to identify the node.
label_join(kube_node_role{role="%(role)s"}, "instance", "", "node")
max by (role, instance, cluster_id) (
label_join(kube_node_role{role="%(role)s"}, "instance", "", "node")
)
)
)[59m:1m]
)
# Pull in the APPUiO managed info labels
* on(cluster_id) group_left(sales_order) appuio_managed_info{
* on(cluster_id) group_left(sales_order) max by (cluster_id, sales_order) (
appuio_managed_info{
cloud_provider=~"%(cloud_provider)s",
distribution=~"%(distribution)s",
vshn_service_level="%(vshn_service_level)s",
}
}
)
, "flavor_display", "%(flavor_display)s", "", "")
appuio_managed_core:
Expand All @@ -547,21 +552,26 @@ parameters:
max_over_time(
# Sum the vCPUs by cluster
sum by(cluster_id, role) (
# Get the node cores (without hyperthreads)
max without (cpu) (node_cpu_info)
# Get the node cores (without hyperthreads, otherwise we'd
# include `cpu` in the `by`)
max by (cluster_id, instance, core) (node_cpu_info)
# Limit to worker nodes only
* on (cluster_id, instance) group_left(role) (
# node_cpu_info and kube_node_role use different labels to identify the node.
label_join(kube_node_role{role="worker"}, "instance", "", "node")
max by (cluster_id, instance, role) (
label_join(kube_node_role{role="worker"}, "instance", "", "node")
)
)
)[59m:1m]
)
# Pull in the APPUiO managed info labels
* on(cluster_id) group_left(sales_order) appuio_managed_info{
* on(cluster_id) group_left(sales_order) max by (cluster_id, sales_order) (
appuio_managed_info{
cloud_provider=~"%(cloud_provider)s",
distribution=~"%(distribution)s",
vshn_service_level="%(vshn_service_level)s",
}
}
)
legacy_appuio_rke_cluster:
enabled: true
Expand Down Expand Up @@ -1052,18 +1062,22 @@ parameters:
# Sum the vCPUs by cluster
sum by(cluster_id) (
# Get the node vCPUs
node_cpu_info
max by (cluster_id, instance, core, cpu) (node_cpu_info)
# Limit to worker nodes only
* on (cluster_id, instance) group_left(role) (
# node_cpu_info and kube_node_role use different labels to identify the node.
label_join(kube_node_role{role=~"app|storage"}, "instance", "", "node")
max by (cluster_id, instance, role) (
label_join(kube_node_role{role=~"app|storage"}, "instance", "", "node")
)
)
)[59m:1m]
)
# Pull in the APPUiO managed info labels
* on(cluster_id) group_left(sales_order) appuio_managed_info{
* on(cluster_id) group_left(sales_order) max by (cluster_id, sales_order) (
appuio_managed_info{
vshn_service_level=~"%(vshn_service_level)s",
cilium_addons=~".*%(cilium_addon)s.*"
}
}
)
, "cilium_addon", "%(cilium_addon)s", "", "")
, "addon_display", "%(addon_display)s", "", "")
81 changes: 72 additions & 9 deletions querytests/appuio_managed_core.jsonnet
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
local c = import 'promtest.libsonnet'; // provided by promtest-jsonnet
local c = import 'promtest.libsonnet'; // provided by promtest-jsonnet

local config = std.extVar("main.yml");
local queryPattern = config.parameters.appuio_reporting_aldebaran.rules.appuio_managed_core.query_pattern ;
local config = std.extVar('main.yml');
local queryPattern = config.parameters.appuio_reporting_aldebaran.rules.appuio_managed_core.query_pattern;

local appParams = {
cloud_provider: "baremetal",
distribution: "oke",
vshn_service_level: "best_effort",
cloud_provider: 'baremetal',
distribution: 'oke',
vshn_service_level: 'best_effort',
};

local commonLabels = {
Expand Down Expand Up @@ -56,9 +56,59 @@ local baseSeries = {
appuioInfoLabel: c.series('appuio_managed_info', infoLabels, '1x120'),
};

local displayNameChange = {
appNodeRoleLabel: c.series('kube_node_role', commonLabels {
node: 'app-test',
role: 'worker',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('kube_node_role', commonLabels {
node: 'app-test',
role: 'worker',
cluster_name: 'Foo',
}, '_x60 1x60'),

appNodeCPUInfoLabel0: c.series('node_cpu_info', commonLabels {
instance: 'app-test',
cpu: '1',
core: '0',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('node_cpu_info', commonLabels {
instance: 'app-test',
cpu: '1',
core: '0',
cluster_name: 'Foo',
}, '_x60 1x60'),
appNodeCPUInfoLabel2: c.series('node_cpu_info', commonLabels {
instance: 'app-test',
cpu: '2',
core: '0',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('node_cpu_info', commonLabels {
instance: 'app-test',
cpu: '2',
core: '0',
cluster_name: 'Foo',
}, '_x60 1x60'),
appNodeCPUInfoLabel1: c.series('node_cpu_info', commonLabels {
instance: 'app-test',
cpu: '1',
core: '1',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('node_cpu_info', commonLabels {
instance: 'app-test',
cpu: '1',
core: '1',
cluster_name: 'Foo',
}, '_x60 1x60'),

appuioInfoLabel:
c.series('appuio_managed_info', infoLabels { cluster_name: 'foo' }, '1x60 _x60') +
c.series('appuio_managed_info', infoLabels { cluster_name: 'Foo' }, '_x60 1x60'),
};

local baseCalculatedLabels = {
cluster_id: "c-managed-openshift",
sales_order: "SO123123",
cluster_id: 'c-managed-openshift',
sales_order: 'SO123123',
};

{
Expand All @@ -76,10 +126,23 @@ local baseCalculatedLabels = {
},
]
),
c.test(
'two app CPUs with display name change',
baseSeries + displayNameChange,
queryPattern % appParams,
[
{
labels: c.formatLabels(baseCalculatedLabels {
role: 'worker',
}),
value: 2,
},
]
),
c.test(
'no openshift',
baseSeries {
appuioInfoLabel: c.series('appuio_managed_info', infoLabels {distribution: 'openshift4'}, '1x120')
appuioInfoLabel: c.series('appuio_managed_info', infoLabels { distribution: 'openshift4' }, '1x120'),
},
queryPattern % appParams,
[
Expand Down
79 changes: 63 additions & 16 deletions querytests/appuio_managed_vcpu.jsonnet
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
local c = import 'promtest.libsonnet'; // provided by promtest-jsonnet
local c = import 'promtest.libsonnet'; // provided by promtest-jsonnet

local config = std.extVar("main.yml");
local queryPattern = config.parameters.appuio_reporting_aldebaran.rules.appuio_managed_vcpu.query_pattern ;
local config = std.extVar('main.yml');
local queryPattern = config.parameters.appuio_reporting_aldebaran.rules.appuio_managed_vcpu.query_pattern;

local appParams = {
cloud_provider: "cloudscale",
vshn_service_level: "best_effort",
distribution: "openshift4",
role: "app",
flavor_display: "OpenShift Container Platform"
cloud_provider: 'cloudscale',
vshn_service_level: 'best_effort',
distribution: 'openshift4',
role: 'app',
flavor_display: 'OpenShift Container Platform',
};
local storageParams = {
cloud_provider: "cloudscale",
vshn_service_level: "best_effort",
distribution: "openshift4",
role: "storage",
flavor_display: "OpenShift Container Platform"
cloud_provider: 'cloudscale',
vshn_service_level: 'best_effort',
distribution: 'openshift4',
role: 'storage',
flavor_display: 'OpenShift Container Platform',
};

local commonLabels = {
Expand Down Expand Up @@ -58,10 +58,44 @@ local baseSeries = {
appuioInfoLabel: c.series('appuio_managed_info', infoLabels, '1x120'),
};

local displayNameChange = {
appNodeRoleLabel: c.series('kube_node_role', commonLabels {
node: 'app-test',
role: 'app',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('kube_node_role', commonLabels {
node: 'app-test',
role: 'app',
cluster_name: 'Foo',
}, '_x60 1x60'),
appNodeCPUInfoLabel0: c.series('node_cpu_info', commonLabels {
instance: 'app-test',
core: '0',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('node_cpu_info', commonLabels {
instance: 'app-test',
core: '0',
cluster_name: 'Foo',
}, '_x60 1x60'),
appNodeCPUInfoLabel1: c.series('node_cpu_info', commonLabels {
instance: 'app-test',
core: '1',
cluster_name: 'foo',
}, '1x60 _x60') + c.series('node_cpu_info', commonLabels {
instance: 'app-test',
core: '1',
cluster_name: 'Foo',
}, '_x60 1x60'),

appuioInfoLabel:
c.series('appuio_managed_info', infoLabels { cluster_name: 'foo' }, '1x60 _x60') +
c.series('appuio_managed_info', infoLabels { cluster_name: 'Foo' }, '_x60 1x60'),
};

local baseCalculatedLabels = {
cluster_id: "c-managed-openshift",
sales_order: "SO123123",
flavor_display: "OpenShift Container Platform",
cluster_id: 'c-managed-openshift',
sales_order: 'SO123123',
flavor_display: 'OpenShift Container Platform',
};

{
Expand Down Expand Up @@ -92,5 +126,18 @@ local baseCalculatedLabels = {
},
]
),
c.test(
'and two app CPUs with a display name change',
baseSeries + displayNameChange,
queryPattern % appParams,
[
{
labels: c.formatLabels(baseCalculatedLabels {
role: 'app',
}),
value: 2,
},
]
),
],
}
Loading

0 comments on commit 2a81b8b

Please sign in to comment.