Skip to content

Commit

Permalink
operator: Enable multi-home support
Browse files Browse the repository at this point in the history
- Add variable options to pass a map of host IPs to secondary IPs
- Add variable to attach network definition annotations to pods
- Add playbook for creating networkAttachmentDefinitions

Signed-off-by: Aaron Wilson <[email protected]>
  • Loading branch information
aaronnw committed Dec 29, 2023
1 parent 9b11945 commit ddd270e
Show file tree
Hide file tree
Showing 19 changed files with 167 additions and 3 deletions.
7 changes: 7 additions & 0 deletions operator/api/v1beta1/aistore_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ type AIStoreSpec struct {
InitImage string `json:"initImage"` // init image for nodes
HostpathPrefix string `json:"hostpathPrefix"`
ConfigToUpdate *ConfigToUpdate `json:"configToUpdate,omitempty"`
// Map of primary hosts to additional hosts for multi-home
// Syntax: node_1_host=node_1_host,node_1_secondary node_2_host=node_2_host,node_2_secondary
// +optional
HostnameMap *string `json:"hostnameMap,omitempty"`
// Commma-separated list of names of additional network attachment definitions to attach to each pod
// +optional
NetAttachment *string `json:"networkAttachment,omitempty"`

ProxySpec DaemonSpec `json:"proxySpec"` // spec for proxy
TargetSpec TargetSpec `json:"targetSpec"` // spec for target
Expand Down
5 changes: 5 additions & 0 deletions operator/api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions operator/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/NVIDIA/aistore v1.3.21
github.com/go-logr/logr v1.2.4
github.com/json-iterator/go v1.1.12
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.28.0
k8s.io/api v0.28.3
Expand Down
2 changes: 2 additions & 0 deletions operator/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 h1:VzM3TYHDgqPkettiP6I6q2jOeQFL4nrJM+UcAc4f6Fs=
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0/go.mod h1:nqCI7aelBJU61wiBeeZWJ6oi4bJy5nrjkM6lWIMA4j0=
github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI=
github.com/karrick/godirwalk v1.17.0/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
Expand Down
1 change: 1 addition & 0 deletions operator/pkg/resources/cmn/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func NewGlobalCM(ais *aisv1.AIStore, toUpdate *aiscmn.ConfigToSet) (*corev1.Conf
"ais.json": conf,
"ais_liveness.sh": livenessSh,
"ais_readiness.sh": readinessSh,
"hostname_map": *ais.Spec.HostnameMap,
},
}, nil
}
4 changes: 4 additions & 0 deletions operator/pkg/resources/cmn/res.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ func NewInitVolumeMounts(antiAffinityDisabled *bool) []corev1.VolumeMount {
Name: "config-template-mount",
MountPath: "/var/ais_config_template",
},
{
Name: "config-global",
MountPath: "/var/global_config",
},
{
Name: "env-mount",
MountPath: "/var/ais_env",
Expand Down
19 changes: 19 additions & 0 deletions operator/pkg/resources/cmn/statefulset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Package cmn provides utilities for common AIS cluster resources
/*
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
*/
package cmn

import (
aisv1 "github.com/ais-operator/api/v1beta1"
nadv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
)

func ParseAnnotations(ais *aisv1.AIStore) map[string]string {
if ais.Spec.NetAttachment != nil {
return map[string]string{
nadv1.NetworkAttachmentAnnot: *ais.Spec.NetAttachment,
}
}
return nil
}
18 changes: 18 additions & 0 deletions operator/pkg/resources/proxy/scripts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,24 @@ pod_dns="${MY_POD}.${MY_SERVICE}.${K8S_NS}.svc.${cluster_domain}"
export AIS_INTRA_HOSTNAME=${pod_dns}
export AIS_DATA_HOSTNAME=${pod_dns}
# Lookup the hostnames in the hostname config map (allows for multiple host ips)
hostname_map="/var/global_config/hostname_map"
if [ -f "$hostname_map" ]; then
read -ra pairs <<< "$(cat "$hostname_map")"
for pair in "${pairs[@]}"; do
IFS='=' read -ra parts <<< "$pair"
key="${parts[0]}"
value="${parts[1]}"
if [ "$key" = "$AIS_PUBLIC_HOSTNAME" ]; then
echo "Setting AIS_PUBLIC_HOSTNAME to value from configMap: ${value}"
export AIS_PUBLIC_HOSTNAME="$value"
break
fi
done
fi
local_conf_template="/var/ais_config_template/ais_local.json"
local_conf_file="/var/ais_config/ais_local.json"
envsubst < ${local_conf_template} > ${local_conf_file}
Expand Down
4 changes: 3 additions & 1 deletion operator/pkg/resources/proxy/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func DefaultPrimaryNSName(ais *aisv1.AIStore) types.NamespacedName {
func NewProxyStatefulSet(ais *aisv1.AIStore, size int32) *apiv1.StatefulSet {
ls := PodLabels(ais)
proxySpec := proxyPodSpec(ais)

return &apiv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: statefulSetName(ais),
Expand All @@ -62,7 +63,8 @@ func NewProxyStatefulSet(ais *aisv1.AIStore, size int32) *apiv1.StatefulSet {
Replicas: &size,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: ls,
Labels: ls,
Annotations: cmn.ParseAnnotations(ais),
},
Spec: proxySpec,
},
Expand Down
19 changes: 19 additions & 0 deletions operator/pkg/resources/target/scripts.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,25 @@ cluster_domain=${AIS_K8S_CLUSTER_DOMAIN:-"cluster.local"}
pod_dns="${MY_POD}.${MY_SERVICE}.${K8S_NS}.svc.${cluster_domain}"
export AIS_INTRA_HOSTNAME=${pod_dns}
export AIS_DATA_HOSTNAME=${pod_dns}
# Lookup the hostnames in the hostname config map (allows for multiple host ips)
hostname_map="/var/global_config/hostname_map"
if [ -f "$hostname_map" ]; then
read -ra pairs <<< "$(cat "$hostname_map")"
for pair in "${pairs[@]}"; do
IFS='=' read -ra parts <<< "$pair"
key="${parts[0]}"
value="${parts[1]}"
if [ "$key" = "$AIS_PUBLIC_HOSTNAME" ]; then
echo "Setting AIS_PUBLIC_HOSTNAME to value from configMap: ${value}"
export AIS_PUBLIC_HOSTNAME="$value"
break
fi
done
fi
local_conf_template="/var/ais_config_template/ais_local.json"
local_conf_file="/var/ais_config/ais_local.json"
envsubst < ${local_conf_template} > ${local_conf_file}
Expand Down
3 changes: 2 additions & 1 deletion operator/pkg/resources/target/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ func NewTargetSS(ais *aisv1.AIStore) *apiv1.StatefulSet {
VolumeClaimTemplates: targetVC(ais),
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: ls,
Labels: ls,
Annotations: cmn.ParseAnnotations(ais),
},
Spec: corev1.PodSpec{
InitContainers: []corev1.Container{
Expand Down
1 change: 1 addition & 0 deletions playbooks/ais_deploy_cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
vars_files:
- "vars/ais_mpaths.yml"
- "vars/https_config.yml"
- "vars/multihome.yml"

pre_tasks:
- name: check mountpath list
Expand Down
16 changes: 16 additions & 0 deletions playbooks/create_network_definition.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
- hosts: "controller"
gather_facts: false
vars_files:
- "vars/multihome.yml"
pre_tasks:
- name: Check cluster variable
fail:
msg: "No cluster specified!"
when: cluster is undefined
- name: Check network_attachment variable
fail:
msg: "`network_attachment` name must be provided!"
when: network_attachment is undefined or network_attachment | length == 0
roles:
- create_network_definition
8 changes: 8 additions & 0 deletions playbooks/roles/ais_deploy_cluster/templates/ais.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,12 @@ spec:
# For local testing with `minikube` run `minikube tunnel` in background for emulation. ref: https://minikube.sigs.k8s.io/docs/commands/tunnel/
enableExternalLB: false

{% if hostname_map != "" %}
hostnameMap: "{{ hostname_map }}"
{% endif %}

{% if network_attachment != "" %}
networkAttachment: "{{ network_attachment }}"
{% endif %}

clusterDomain: "cluster.local"
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
# Deploy AIS operator
#

release_version=${RELEASE:-v0.94}
release_version=${RELEASE:-v0.96}

kubectl delete -f https://github.com/NVIDIA/ais-k8s/releases/download/${release_version}/ais-operator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
export NAD_NAME="$NAME"
export NAD_NAMESPACE="$NAMESPACE"
export NAD_IFACE="$INTERFACE"
source_dir=$(dirname "${BASH_SOURCE[0]}")

envsubst < "${source_dir}"/nad.template.yaml > /tmp/network-attachment-def.yaml
kubectl apply -f /tmp/network-attachment-def.yaml
rm /tmp/network-attachment-def.yaml
22 changes: 22 additions & 0 deletions playbooks/roles/create_network_definition/files/nad.template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: $NAD_NAME
namespace: $NAD_NAMESPACE
spec:
config: '{
"cniVersion": "0.3.0",
"type": "macvlan",
"master": "$NAD_IFACE",
"mode": "bridge",
"ipam": {
"type": "host-local",
"subnet": "192.168.1.0/24",
"rangeStart": "192.168.1.200",
"rangeEnd": "192.168.1.216",
"routes": [
{ "dst": "0.0.0.0/0" }
],
"gateway": "192.168.1.1"
}
}'
16 changes: 16 additions & 0 deletions playbooks/roles/create_network_definition/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
- name: Create namespace if it does not exist
shell: "kubectl create ns {{ cluster }} || true"
register: namespaceout
changed_when: "'created' in namespaceout.stdout"

- name: Copy network attachment definition files
copy:
src: "{{ item }}"
dest: "/tmp"
mode: 0777
loop:
- "create-network-definition.sh"
- "nad.template.yaml"

- name: Apply network attachment definition
shell: "NAMESPACE={{ cluster }} NAME={{ network_attachment }} INTERFACE={{ network_interface }} /tmp/create-network-definition.sh"
13 changes: 13 additions & 0 deletions playbooks/vars/multihome.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Map primary host address to list of all addresses available for each host in the format:
# primary1=primary1,secondary1 primary2=primary2,secondary2
# Example:
# hostname_map: "10.51.248.22=10.51.248.22,10.51.248.55 10.51.248.25=10.51.248.25,10.51.248.52 10.51.248.28=10.51.248.28,10.51.248.61"
# TODO: Pull this from ansible inventory
hostname_map: ""

# Name of the Network Attachment Definition to be referenced as an annotation by the AIS stateful set
# See the multus example: https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/docs/quickstart.md#storing-a-configuration-as-a-custom-resource
network_attachment: ""

# Name of the interface for which to create a network attachment definition
network_interface: ""

0 comments on commit ddd270e

Please sign in to comment.