From 0555034b39b53975108af95802a71dd4c769bd23 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Thu, 25 Apr 2024 13:09:53 +0200 Subject: [PATCH 01/11] Add nomad join template --- artifacts/lrms/nomad_ai4eosc_fe_install.yml | 9 +- templates/nomad_join_ai4eosc.yaml | 364 ++++++++++++++++++++ 2 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 templates/nomad_join_ai4eosc.yaml diff --git a/artifacts/lrms/nomad_ai4eosc_fe_install.yml b/artifacts/lrms/nomad_ai4eosc_fe_install.yml index b32da8c..1c318ab 100644 --- a/artifacts/lrms/nomad_ai4eosc_fe_install.yml +++ b/artifacts/lrms/nomad_ai4eosc_fe_install.yml @@ -10,7 +10,14 @@ - name: Convert server_list to list set_fact: server_list: [server_list] - when: server_list is string + when: + - consul_server_join is not defined or consul_server_join == '' + - server_list is string + + - name: Use consul_server_join to join the consul cluster + set_fact: + server_list: [consul_server_join] + when: consul_server_join is defined and consul_server_join != '' roles: - role: 'grycap.consul' consul_server: true diff --git a/templates/nomad_join_ai4eosc.yaml b/templates/nomad_join_ai4eosc.yaml new file mode 100644 index 0000000..d5a3670 --- /dev/null +++ b/templates/nomad_join_ai4eosc.yaml @@ -0,0 +1,364 @@ +tosca_definitions_version: tosca_simple_yaml_1_0 + +imports: + - grycap_custom_types: https://raw.githubusercontent.com/grycap/tosca/main/custom_types.yaml + +description: Deploy an Consul + Nomad Virtual Cluster joining an previous existing one (AI4EOSC). + +metadata: + template_name: Nomad Join + template_version: "1.0.0" + display_name: Deploy and Join a Consul + Nomad Virtual Cluster + icon: images/nomad.png + tabs: + Server Features: fe_.* + WNs Features: wn_.* + GPU WNs Features: wn_gpu_.* + Pub WNs Features: wn_pub_.* + Nomad Data: + - launch_traefik + - consul_.* + - nomad_.* + +topology_template: + inputs: + + fe_num: + type: integer + description: Number of Nomad Servers in the cluster. + default: 1 + required: yes + constraints: + - valid_values: [ 1, 3, 5 ] + fe_cpus: + type: integer + description: Number of CPUs for the front-end node + default: 2 + required: yes + constraints: + - valid_values: [ 2, 4, 8, 16, 32, 64 ] + fe_mem: + type: scalar-unit.size + description: Amount of Memory for the front-end node + default: 4 GB + required: yes + constraints: + - valid_values: [ 4 GB, 8 GB, 16 GB, 32 GB, 64 GB, 128 GB, 256 GB, 512 GB ] + + wn_num: + type: integer + description: Number of Nomad Clients in the cluster (without public IP) + default: 1 + required: yes + wn_cpus: + type: integer + description: Number of CPUs for the WNs (without public IP) + default: 2 + required: yes + constraints: + - valid_values: [ 2, 4, 8, 16, 32, 64 ] + wn_mem: + type: scalar-unit.size + description: Amount of Memory for the WNs (without public IP) + default: 4 GB + required: yes + constraints: + - valid_values: [ 4 GB, 8 GB, 16 GB, 32 GB, 64 GB, 128 GB, 256 GB, 512 GB ] + + wn_pub_num: + type: integer + description: Number of Nomad Clients in the cluster (with public IP) + default: 1 + required: yes + wn_pub_cpus: + type: integer + description: Number of CPUs for the WNs (with public IP) + default: 2 + required: yes + constraints: + - valid_values: [ 2, 4, 8, 16, 32, 64 ] + wn_pub_mem: + type: scalar-unit.size + description: Amount of Memory for the WNs (with public IP) + default: 4 GB + required: yes + constraints: + - valid_values: [ 4 GB, 8 GB, 16 GB, 32 GB, 64 GB, 128 GB, 256 GB, 512 GB ] + + wn_gpu_num: + type: integer + description: Number of Nomad Clients in the cluster (with GPU) + default: 0 + required: yes + wn_gpu_cpus: + type: integer + description: Number of CPUs for the GPU WNs + default: 2 + required: yes + constraints: + - valid_values: [ 2, 4, 8, 16, 32, 64 ] + wn_gpu_mem: + type: scalar-unit.size + description: Amount of Memory for the GPU WNs + default: 4 GB + required: yes + constraints: + - valid_values: [ 4 GB, 8 GB, 16 GB, 32 GB, 64 GB, 128 GB, 256 GB, 512 GB ] + wn_gpu_num_gpus: + type: integer + description: Number of GPUs to assing to this VM + default: 1 + constraints: + - valid_values: [ 1, 2, 3, 4 ] + wn_gpu_vendor: + type: string + description: GPU Vendor + default: '' + constraints: + - valid_values: [ '', 'NVIDIA', 'AMD' ] + wn_gpu_model: + type: string + description: GPU Model + default: '' + + + launch_traefik: + type: boolean + description: Launch Traefik job as reverse proxy + default: false + constraints: + - valid_values: [ false, true ] + consul_version: + type: string + description: Consul version to install + default: 1.17.1 + required: yes + nomad_version: + type: string + description: Nomad version to install + default: 1.7.3 + required: yes + + + consul_cert_url: + type: string + description: URL to download the Consul certificates and tokens + default: '' + nomad_cert_url: + type: string + description: URL to download the Nomad certificates + default: '' + consul_server_join: + type: string + description: IP address of the Consul server to join + default: '' + + node_templates: + + lrms_front_end: + type: tosca.nodes.indigo.LRMS.FrontEnd.Nomad + capabilities: + endpoint: + properties: + ports: + port_4646: + protocol: tcp + source: 4646 + port_8501: + protocol: tcp + source: 8501 + port_80: + protocol: tcp + source: 80 + port_443: + protocol: tcp + source: 443 + artifacts: + nomad_role: + file: grycap.nomad,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + consul_role: + file: grycap.consul,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + requirements: + - host: front + interfaces: + Standard: + configure: + implementation: https://raw.githubusercontent.com/grycap/tosca/main/artifacts/lrms/nomad_ai4eosc_fe_install.yml + inputs: + nomad_server_list: { get_attribute: [ front, private_address ] } + nomad_launch_traefik: { get_input: launch_traefik } + nomad_version: { get_input: nomad_version } + consul_version: { get_input: consul_version } + consul_certs_url: { get_input: consul_cert_url } + nomad_certs_url: { get_input: nomad_cert_url } + consul_server_join: { get_input: consul_server_join } + + front: + type: tosca.nodes.indigo.Compute + capabilities: + scalable: + properties: + count: { get_input: fe_num } + endpoint: + properties: + dns_name: server#N# + network_name: PUBLIC + host: + properties: + num_cpus: { get_input: fe_cpus } + mem_size: { get_input: fe_mem } + os: + properties: + type: linux + + wn_node: + type: tosca.nodes.indigo.LRMS.WorkerNode.Nomad + properties: + front_end_ip: { get_attribute: [ front, private_address, 0 ] } + requirements: + - host: wn + artifacts: + docker_role: + file: grycap.docker + type: tosca.artifacts.AnsibleGalaxy.role + nomad_role: + file: grycap.nomad,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + consul_role: + file: grycap.consul,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + interfaces: + Standard: + configure: + implementation: https://raw.githubusercontent.com/grycap/tosca/main/artifacts/lrms/nomad_ai4eosc_wn_install.yml + inputs: + nomad_server_list: { get_attribute: [ front, private_address ] } + nomad_version: { get_input: nomad_version } + consul_version: { get_input: consul_version } + consul_certs_url: { get_input: consul_cert_url } + nomad_certs_url: { get_input: nomad_cert_url } + consul_server_join: { get_input: consul_server_join } + + wn: + type: tosca.nodes.indigo.Compute + capabilities: + scalable: + properties: + count: { get_input: wn_num } + host: + properties: + num_cpus: { get_input: wn_cpus } + mem_size: { get_input: wn_mem } + os: + properties: + type: linux + + wn_pub_node: + type: tosca.nodes.indigo.LRMS.WorkerNode.Nomad + properties: + front_end_ip: { get_attribute: [ front, private_address, 0 ] } + requirements: + - host: wn_pub + artifacts: + docker_role: + file: grycap.docker + type: tosca.artifacts.AnsibleGalaxy.role + nomad_role: + file: grycap.nomad,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + consul_role: + file: grycap.consul,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + interfaces: + Standard: + configure: + implementation: https://raw.githubusercontent.com/grycap/tosca/main/artifacts/lrms/nomad_ai4eosc_wn_install.yml + inputs: + nomad_server_list: { get_attribute: [ front, private_address ] } + nomad_version: { get_input: nomad_version } + consul_version: { get_input: consul_version } + + wn_pub: + type: tosca.nodes.indigo.Compute + capabilities: + endpoint: + properties: + network_name: PUBLIC + ports: + port_80: + protocol: tcp + source: 80 + port_443: + protocol: tcp + source: 443 + scalable: + properties: + count: { get_input: wn_pub_num } + host: + properties: + num_cpus: { get_input: wn_pub_cpus } + mem_size: { get_input: wn_pub_mem } + os: + properties: + type: linux + + wn_gpu_node: + type: tosca.nodes.indigo.LRMS.WorkerNode.Nomad + properties: + front_end_ip: { get_attribute: [ front, private_address, 0 ] } + requirements: + - host: wn_gpu + artifacts: + docker_role: + file: grycap.docker + type: tosca.artifacts.AnsibleGalaxy.role + nomad_role: + file: grycap.nomad,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + consul_role: + file: grycap.consul,ai4eosc + type: tosca.artifacts.AnsibleGalaxy.role + interfaces: + Standard: + configure: + implementation: https://raw.githubusercontent.com/grycap/tosca/main/artifacts/lrms/nomad_ai4eosc_wn_install.yml + inputs: + nomad_server_list: { get_attribute: [ front, private_address ] } + nomad_version: { get_input: nomad_version } + consul_version: { get_input: consul_version } + nomad_nvidia_support: true + + wn_gpu: + type: tosca.nodes.indigo.Compute + capabilities: + scalable: + properties: + count: { get_input: wn_gpu_num } + host: + properties: + num_cpus: { get_input: wn_gpu_cpus } + mem_size: { get_input: wn_gpu_mem } + num_gpus: { get_input: wn_gpu_num_gpus } + gpu_vendor: { get_input: wn_gpu_vendor } + gpu_model: { get_input: wn_gpu_model } + os: + properties: + type: linux + + outputs: + nomad_ui: + value: { concat: [ 'https://', get_attribute: [ front, public_address, 0 ], ':4646' ] } + consul_ui: + value: { concat: [ 'https://', get_attribute: [ front, public_address, 0 ], ':8501' ] } + nomad_token: + value: { get_attribute: [ front, ansible_output, lrms_front_end_front_conf_front, tasks, 'grycap.nomad : nomad_secret_id', output ] } + consul_token: + value: { get_attribute: [ front, ansible_output, lrms_front_end_front_conf_front, tasks, 'grycap.consul : consul_secret_id', output ] } + cluster_ip: + value: { get_attribute: [ front, public_address, 0 ] } + cluster_creds: + value: { get_attribute: [ front, endpoint, credential, 0 ] } + traefik_endpoint: + value: { concat: [ 'https://', get_attribute: [ wn_pub, public_address, 0 ] ] } From 0523304f28837e968595bb06f4cdd94fa144af1d Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Thu, 25 Apr 2024 13:13:50 +0200 Subject: [PATCH 02/11] Fix style --- artifacts/lrms/nomad_ai4eosc_fe_install.yml | 4 ++-- templates/nomad_join_ai4eosc.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/artifacts/lrms/nomad_ai4eosc_fe_install.yml b/artifacts/lrms/nomad_ai4eosc_fe_install.yml index 1c318ab..8fe2841 100644 --- a/artifacts/lrms/nomad_ai4eosc_fe_install.yml +++ b/artifacts/lrms/nomad_ai4eosc_fe_install.yml @@ -11,8 +11,8 @@ set_fact: server_list: [server_list] when: - - consul_server_join is not defined or consul_server_join == '' - - server_list is string + - consul_server_join is not defined or consul_server_join == '' + - server_list is string - name: Use consul_server_join to join the consul cluster set_fact: diff --git a/templates/nomad_join_ai4eosc.yaml b/templates/nomad_join_ai4eosc.yaml index d5a3670..e7a5dc4 100644 --- a/templates/nomad_join_ai4eosc.yaml +++ b/templates/nomad_join_ai4eosc.yaml @@ -143,7 +143,7 @@ topology_template: consul_cert_url: type: string description: URL to download the Consul certificates and tokens - default: '' + default: '' nomad_cert_url: type: string description: URL to download the Nomad certificates From a764ace0da27e051b3a61dec513d57c32e678a5d Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Thu, 25 Apr 2024 13:18:50 +0200 Subject: [PATCH 03/11] Fix style --- artifacts/lrms/helm_chart.yml | 2 +- templates/nomad_join_ai4eosc.yaml | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/artifacts/lrms/helm_chart.yml b/artifacts/lrms/helm_chart.yml index 575bd97..429b272 100644 --- a/artifacts/lrms/helm_chart.yml +++ b/artifacts/lrms/helm_chart.yml @@ -48,7 +48,7 @@ - name: Create params fact set_fact: - helm_params: "{{ item.key }}={{ item.value }},{{ helm_params }}" + helm_params: "{{ item.key }}='{{ item.value }}',{{ helm_params }}" with_dict: "{{ helm_values }}" - name: Create values file diff --git a/templates/nomad_join_ai4eosc.yaml b/templates/nomad_join_ai4eosc.yaml index e7a5dc4..0ca7975 100644 --- a/templates/nomad_join_ai4eosc.yaml +++ b/templates/nomad_join_ai4eosc.yaml @@ -17,8 +17,11 @@ metadata: Pub WNs Features: wn_pub_.* Nomad Data: - launch_traefik - - consul_.* - - nomad_.* + - consul_version + - nomad_version + - consul_cert_url + - nomad_cert_url + - consul_server_join topology_template: inputs: From 0d21c20c18527d2f61849e2b50c9058fec72fbea Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Thu, 25 Apr 2024 13:20:28 +0200 Subject: [PATCH 04/11] Fix style --- templates/nomad_join_ai4eosc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/nomad_join_ai4eosc.yaml b/templates/nomad_join_ai4eosc.yaml index 0ca7975..9948843 100644 --- a/templates/nomad_join_ai4eosc.yaml +++ b/templates/nomad_join_ai4eosc.yaml @@ -19,7 +19,7 @@ metadata: - launch_traefik - consul_version - nomad_version - - consul_cert_url + - consul_cert_url - nomad_cert_url - consul_server_join From 0595526345aa58e896c55d60c98cf3917695465f Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Thu, 25 Apr 2024 13:21:44 +0200 Subject: [PATCH 05/11] Fix style --- artifacts/lrms/nomad_ai4eosc_fe_install.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifacts/lrms/nomad_ai4eosc_fe_install.yml b/artifacts/lrms/nomad_ai4eosc_fe_install.yml index 8fe2841..ed55e30 100644 --- a/artifacts/lrms/nomad_ai4eosc_fe_install.yml +++ b/artifacts/lrms/nomad_ai4eosc_fe_install.yml @@ -10,7 +10,7 @@ - name: Convert server_list to list set_fact: server_list: [server_list] - when: + when: - consul_server_join is not defined or consul_server_join == '' - server_list is string From 8feca429e43400425ec8e464b9016a1709b5863a Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Thu, 25 Apr 2024 14:01:03 +0200 Subject: [PATCH 06/11] Fix nomad join --- artifacts/lrms/nomad_ai4eosc_fe_install.yml | 6 ++++-- artifacts/lrms/nomad_ai4eosc_wn_install.yml | 14 ++++++++++++-- templates/nomad_join_ai4eosc.yaml | 6 ++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/artifacts/lrms/nomad_ai4eosc_fe_install.yml b/artifacts/lrms/nomad_ai4eosc_fe_install.yml index ed55e30..604d2f0 100644 --- a/artifacts/lrms/nomad_ai4eosc_fe_install.yml +++ b/artifacts/lrms/nomad_ai4eosc_fe_install.yml @@ -9,14 +9,16 @@ pre_tasks: - name: Convert server_list to list set_fact: - server_list: [server_list] + server_list: + - "{{ server_list }}" when: - consul_server_join is not defined or consul_server_join == '' - server_list is string - name: Use consul_server_join to join the consul cluster set_fact: - server_list: [consul_server_join] + server_list: + - "{{ consul_server_join }}" when: consul_server_join is defined and consul_server_join != '' roles: - role: 'grycap.consul' diff --git a/artifacts/lrms/nomad_ai4eosc_wn_install.yml b/artifacts/lrms/nomad_ai4eosc_wn_install.yml index 52b45c9..b206df7 100644 --- a/artifacts/lrms/nomad_ai4eosc_wn_install.yml +++ b/artifacts/lrms/nomad_ai4eosc_wn_install.yml @@ -8,8 +8,18 @@ pre_tasks: - name: Convert server_list to list set_fact: - server_list: [server_list] - when: server_list is string + server_list: + - "{{ server_list }}" + when: + - consul_server_join is not defined or consul_server_join == '' + - server_list is string + + - name: Use consul_server_join to join the consul cluster + set_fact: + server_list: + - "{{ consul_server_join }}" + when: consul_server_join is defined and consul_server_join != '' + - name: Set nomad plugins var set_fact: nomad_plugins_var: diff --git a/templates/nomad_join_ai4eosc.yaml b/templates/nomad_join_ai4eosc.yaml index 9948843..05567d6 100644 --- a/templates/nomad_join_ai4eosc.yaml +++ b/templates/nomad_join_ai4eosc.yaml @@ -282,6 +282,9 @@ topology_template: nomad_server_list: { get_attribute: [ front, private_address ] } nomad_version: { get_input: nomad_version } consul_version: { get_input: consul_version } + consul_certs_url: { get_input: consul_cert_url } + nomad_certs_url: { get_input: nomad_cert_url } + consul_server_join: { get_input: consul_server_join } wn_pub: type: tosca.nodes.indigo.Compute @@ -332,6 +335,9 @@ topology_template: nomad_version: { get_input: nomad_version } consul_version: { get_input: consul_version } nomad_nvidia_support: true + consul_certs_url: { get_input: consul_cert_url } + nomad_certs_url: { get_input: nomad_cert_url } + consul_server_join: { get_input: consul_server_join } wn_gpu: type: tosca.nodes.indigo.Compute From 8e1922ff0d863955f763a78cca6ee6efddb23670 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Tue, 7 May 2024 13:02:28 +0200 Subject: [PATCH 07/11] Add support for longhorn csi --- artifacts/lrms/kube_fe_install.yml | 7 ++++++- artifacts/lrms/kube_wn_install.yml | 2 ++ templates/k8s_new_wn_type.yaml | 1 + templates/kubernetes.yaml | 15 ++++++++++++--- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/artifacts/lrms/kube_fe_install.yml b/artifacts/lrms/kube_fe_install.yml index ac89b1b..fed00f6 100644 --- a/artifacts/lrms/kube_fe_install.yml +++ b/artifacts/lrms/kube_fe_install.yml @@ -6,6 +6,7 @@ export_hosts: "*.localdomain" export_line: "(fsid=0,rw,async,no_root_squash,no_subtree_check,insecure)" nfs_path: "{{ kube_nfs_path | default('/pv') }}" + kube_csi_driver: "{{ csi_driver | default('NFS') }}" pre_tasks: - name: Create dir for the NFS PV @@ -22,11 +23,15 @@ set_fact: export_hosts: "{{ ansible_default_ipv4.network }}/{{ ansible_default_ipv4.netmask }}" when: IM_NODE_CLOUD_TYPE is defined and IM_NODE_CLOUD_TYPE == 'EC2' - + - name: Set to install Longhorn CSI driver + set_fact: + kube_install_longhorn: true + when: kube_csi_driver == 'Longhorn' roles: - role: grycap.nfs nfs_mode: 'front' nfs_exports: [{path: '{{ nfs_path }}', export: '{{ export_hosts }}{{ export_line }}'}] + when: kube_csi_driver == 'NFS' - role: grycap.kubernetes kube_server: '{{ kube_front_end_ip }}' kube_api_server: '{{ kube_front_end_ip }}' diff --git a/artifacts/lrms/kube_wn_install.yml b/artifacts/lrms/kube_wn_install.yml index ceecfdc..e34ae95 100644 --- a/artifacts/lrms/kube_wn_install.yml +++ b/artifacts/lrms/kube_wn_install.yml @@ -3,9 +3,11 @@ connection: local vars: vnode_prefix: vnode- + kube_csi_driver: "{{ csi_driver | default('NFS') }}" roles: - role: grycap.nfs nfs_mode: 'wn' + when: kube_csi_driver == 'NFS' - role: 'grycap.kubernetes' kube_type_of_node: 'wn' kube_server: '{{ kube_front_end_ip }}' diff --git a/templates/k8s_new_wn_type.yaml b/templates/k8s_new_wn_type.yaml index fa4c81d..4ebdeee 100644 --- a/templates/k8s_new_wn_type.yaml +++ b/templates/k8s_new_wn_type.yaml @@ -82,6 +82,7 @@ topology_template: version: { get_input: kube_version } nvidia_support: { get_input: wng_kube_nvidia_support } cri_runtime: { get_input: kube_cri_runtime } + csi_driver: { get_input: kube_csi_driver } requirements: - host: wng diff --git a/templates/kubernetes.yaml b/templates/kubernetes.yaml index 9e1088d..8db4a9d 100644 --- a/templates/kubernetes.yaml +++ b/templates/kubernetes.yaml @@ -164,11 +164,18 @@ topology_template: description: DNS name of the public interface of the FE node to generate the certificate default: "" - allowed_cidr: + kube_allowed_cidr: type: string description: Allowed remote CIDR to extenal access default: "0.0.0.0/0" + kube_csi_driver: + type: string + description: Name of the CSI driver to install + default: NFS + constraints: + - valid_values: [ NFS, Longhorn ] + node_templates: lrms_front_end: @@ -180,11 +187,11 @@ topology_template: http_port: protocol: tcp source: 80 - remote_cidr: { get_input: allowed_cidr } + remote_cidr: { get_input: kube_allowed_cidr } https_port: protocol: tcp source: 443 - remote_cidr: { get_input: allowed_cidr } + remote_cidr: { get_input: kube_allowed_cidr } properties: admin_username: kubeuser install_nfs_client: true @@ -196,6 +203,7 @@ topology_template: cert_user_email: { get_input: kube_cert_user_email } public_dns_name: { get_input: kube_public_dns_name} cri_runtime: { get_input: kube_cri_runtime } + csi_driver: { get_input: kube_csi_driver } requirements: - host: front @@ -254,6 +262,7 @@ topology_template: gpu_vendor: { get_input: wn_gpu_vendor } gpu_model: { get_input: wn_gpu_model } instance_type: { get_input: wn_instance_type } + csi_driver: { get_input: kube_csi_driver } os: properties: distribution: ubuntu From f475b2828775c1da138b2a413da827ce13a3059f Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Tue, 7 May 2024 13:08:17 +0200 Subject: [PATCH 08/11] Add support for longhorn csi --- artifacts/lrms/kube_fe_install.yml | 6 +++--- artifacts/lrms/kube_wn_install.yml | 4 ++-- custom_types.yaml | 18 ++++++++++++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/artifacts/lrms/kube_fe_install.yml b/artifacts/lrms/kube_fe_install.yml index fed00f6..2a4e625 100644 --- a/artifacts/lrms/kube_fe_install.yml +++ b/artifacts/lrms/kube_fe_install.yml @@ -6,7 +6,7 @@ export_hosts: "*.localdomain" export_line: "(fsid=0,rw,async,no_root_squash,no_subtree_check,insecure)" nfs_path: "{{ kube_nfs_path | default('/pv') }}" - kube_csi_driver: "{{ csi_driver | default('NFS') }}" + csi_driver: "{{ kube_csi_driver | default('NFS') }}" pre_tasks: - name: Create dir for the NFS PV @@ -26,12 +26,12 @@ - name: Set to install Longhorn CSI driver set_fact: kube_install_longhorn: true - when: kube_csi_driver == 'Longhorn' + when: csi_driver == 'Longhorn' roles: - role: grycap.nfs nfs_mode: 'front' nfs_exports: [{path: '{{ nfs_path }}', export: '{{ export_hosts }}{{ export_line }}'}] - when: kube_csi_driver == 'NFS' + when: csi_driver == 'NFS' - role: grycap.kubernetes kube_server: '{{ kube_front_end_ip }}' kube_api_server: '{{ kube_front_end_ip }}' diff --git a/artifacts/lrms/kube_wn_install.yml b/artifacts/lrms/kube_wn_install.yml index e34ae95..5516c69 100644 --- a/artifacts/lrms/kube_wn_install.yml +++ b/artifacts/lrms/kube_wn_install.yml @@ -3,11 +3,11 @@ connection: local vars: vnode_prefix: vnode- - kube_csi_driver: "{{ csi_driver | default('NFS') }}" + csi_driver: "{{ kube_csi_driver | default('NFS') }}" roles: - role: grycap.nfs nfs_mode: 'wn' - when: kube_csi_driver == 'NFS' + when: csi_driver == 'NFS' - role: 'grycap.kubernetes' kube_type_of_node: 'wn' kube_server: '{{ kube_front_end_ip }}' diff --git a/custom_types.yaml b/custom_types.yaml index 1959d24..26e7856 100644 --- a/custom_types.yaml +++ b/custom_types.yaml @@ -584,7 +584,13 @@ node_types: type: string default: "docker" constraints: - - valid_values: [ docker, containerd ] + - valid_values: [ docker, containerd, crio ] + csi_driver: + required: no + type: string + default: "NFS" + constraints: + - valid_values: [ NFS, Longhorn ] artifacts: kube_role: file: grycap.kubernetes @@ -601,6 +607,7 @@ node_types: kube_version: { get_property: [ SELF, version ] } kube_nvidia_support: { get_property: [ SELF, nvidia_support ] } kube_cri_runtime: { get_property: [ SELF, cri_runtime ] } + kube_csi_driver: { get_property: [ SELF, csi_driver ] } tosca.nodes.indigo.LRMS.FrontEnd.Kubernetes: derived_from: tosca.nodes.indigo.LRMS.FrontEnd @@ -686,7 +693,13 @@ node_types: type: string default: "docker" constraints: - - valid_values: [ docker, containerd ] + - valid_values: [ docker, containerd, crio ] + csi_driver: + required: no + type: string + default: "NFS" + constraints: + - valid_values: [ NFS, Longhorn ] artifacts: kube_role: file: grycap.kubernetes @@ -728,6 +741,7 @@ node_types: kube_cri_runtime: { get_property: [ SELF, cri_runtime ] } kube_install_yunikorn: { get_property: [ SELF, install_yunikorn ] } kube_deploy_dashboard: { get_property: [ SELF, install_dashboard ] } + kube_csi_driver: { get_property: [ SELF, csi_driver ] } tosca.nodes.indigo.LRMS.WorkerNode.Slurm: derived_from: tosca.nodes.indigo.LRMS.WorkerNode From 93c475229b92e62742bc4c275e6b05ae53ed2d1f Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Tue, 7 May 2024 13:10:57 +0200 Subject: [PATCH 09/11] Set devel for import --- templates/kubernetes.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/kubernetes.yaml b/templates/kubernetes.yaml index 8db4a9d..58aded6 100644 --- a/templates/kubernetes.yaml +++ b/templates/kubernetes.yaml @@ -1,7 +1,7 @@ tosca_definitions_version: tosca_simple_yaml_1_0 imports: - - grycap_custom_types: https://raw.githubusercontent.com/grycap/tosca/main/custom_types.yaml + - grycap_custom_types: https://raw.githubusercontent.com/grycap/devel/main/custom_types.yaml description: Deploy a Kubernetes Virtual Cluster. From 997f962745396eb2bf0bfd913e9f4a5a6c1660fc Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Tue, 7 May 2024 13:13:08 +0200 Subject: [PATCH 10/11] Set devel for import --- templates/kubernetes.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/kubernetes.yaml b/templates/kubernetes.yaml index 58aded6..cf0dd0f 100644 --- a/templates/kubernetes.yaml +++ b/templates/kubernetes.yaml @@ -1,7 +1,7 @@ tosca_definitions_version: tosca_simple_yaml_1_0 imports: - - grycap_custom_types: https://raw.githubusercontent.com/grycap/devel/main/custom_types.yaml + - grycap_custom_types: https://raw.githubusercontent.com/grycap/tosca/devel/custom_types.yaml description: Deploy a Kubernetes Virtual Cluster. From 075dddd5260465f753c05dd67a567b4985f3eae9 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Tue, 7 May 2024 13:15:30 +0200 Subject: [PATCH 11/11] Fix error --- templates/kubernetes.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/kubernetes.yaml b/templates/kubernetes.yaml index cf0dd0f..cca4bfe 100644 --- a/templates/kubernetes.yaml +++ b/templates/kubernetes.yaml @@ -244,6 +244,7 @@ topology_template: version: { get_input: kube_version } nvidia_support: { get_input: wn_kube_nvidia_support } cri_runtime: { get_input: kube_cri_runtime } + csi_driver: { get_input: kube_csi_driver } requirements: - host: wn @@ -262,7 +263,6 @@ topology_template: gpu_vendor: { get_input: wn_gpu_vendor } gpu_model: { get_input: wn_gpu_model } instance_type: { get_input: wn_instance_type } - csi_driver: { get_input: kube_csi_driver } os: properties: distribution: ubuntu