Skip to content

Commit

Permalink
Merge branch 'main' into ofed
Browse files Browse the repository at this point in the history
  • Loading branch information
sjpb committed Apr 4, 2024
2 parents 84485ed + 3c2512d commit 3223846
Show file tree
Hide file tree
Showing 14 changed files with 40 additions and 36 deletions.
9 changes: 6 additions & 3 deletions ansible/bootstrap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,17 @@

- hosts: cluster
gather_facts: false
become: yes
tasks:
- name: Prevent ssh hanging if shared home is unavailable
lineinfile:
path: /etc/profile
search_string: HOSTNAME=$(/usr/bin/hostnamectl --transient 2>/dev/null) || \
state: absent
become: yes
- name: Remove RHEL cockpit
dnf:
name: cockpit-ws
state: "{{ appliances_cockpit_state }}"
- name: Add system user groups
ansible.builtin.group: "{{ item.group }}"
loop: "{{ appliances_local_users }}"
Expand All @@ -55,17 +59,16 @@
become_method: "sudo"
# Need to change working directory otherwise we try to switch back to non-existent directory.
become_flags: '-i'
become: true
- name: Add system users
ansible.builtin.user: "{{ item.user }}"
loop: "{{ appliances_local_users }}"
when: item.enable | default(true) | bool
become_method: "sudo"
# Need to change working directory otherwise we try to switch back to non-existent directory.
become_flags: '-i'
become: true
- name: Reset ssh connection to allow user changes to affect ansible_user
meta: reset_connection
become: no

- hosts: systemd
become: yes
Expand Down
7 changes: 6 additions & 1 deletion ansible/cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,14 @@
path: /etc/NetworkManager/conf.d/99-cloud-init.conf
state: absent

- name: Get remote environment for ansible_user
setup:
gather_subset: env
become: no

- name: Delete any injected ssh config for ansible_user
file:
path: "/home/{{ ansible_user }}/.ssh/"
path: "{{ ansible_env.HOME }}/.ssh/"
state: absent

- name: Run cloud-init cleanup
Expand Down
3 changes: 2 additions & 1 deletion ansible/roles/basic_users/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ Role Variables
`basic_users_users`: Required. A list of mappings defining information for each user. In general, mapping keys/values are passed through as parameters to [ansible.builtin.user](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/user_module.html) and default values are as given there. However:
- `create_home`, `generate_ssh_key` and `ssh_key_comment` are set automatically and should not be overriden.
- `uid` should be set, so that the UID/GID is consistent across the cluster (which Slurm requires).
- `shell` may be set if required, but will be overriden with `/sbin/nologin` on `control` nodes to prevent user login.
- `shell` if *not* set will be `/sbin/nologin` on the `control` node and the default shell on other users. Explicitly setting this defines the shell for all nodes.
- An additional key `public_key` may optionally be specified to define a key to log into the cluster.
- An additional key `sudo` may optionally be specified giving a string (possibly multiline) defining sudo rules to be templated.
- Any other keys may present for other purposes (i.e. not used by this role).

Dependencies
Expand Down
10 changes: 10 additions & 0 deletions ansible/roles/basic_users/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,13 @@
- item.ssh_public_key is defined
- basic_users_manage_homedir
run_once: true

- name: Write sudo rules
blockinfile:
path: /etc/sudoers.d/80-{{ item.name}}-user
block: "{{ item.sudo }}"
create: true
loop: "{{ basic_users_users }}"
loop_control:
label: "{{ item.name }}"
when: "'sudo' in item"
7 changes: 0 additions & 7 deletions ansible/roles/cluster_infra/defaults/main.yml

This file was deleted.

18 changes: 0 additions & 18 deletions ansible/roles/cluster_infra/templates/resources.tf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -298,18 +298,12 @@ resource "openstack_compute_instance_v2" "login" {
user_data = <<-EOF
#cloud-config
ssh_authorized_keys:
{%- if cluster_user_ssh_public_key is defined %}
- {{ cluster_user_ssh_public_key }}
{%- endif %}
{%- if cluster_deploy_ssh_public_key is defined %}
- {{ cluster_deploy_ssh_public_key }}
{%- endif %}
{%- if cluster_ssh_private_key_file is not defined %}
- "${openstack_compute_keypair_v2.cluster_keypair.public_key}"
{%- endif %}
{%- for ssh_key in cluster_deploy_ssh_keys_extra %}
- {{ ssh_key }}
{%- endfor %}
EOF
}

Expand Down Expand Up @@ -365,18 +359,12 @@ resource "openstack_compute_instance_v2" "control" {
user_data = <<-EOF
#cloud-config
ssh_authorized_keys:
{%- if cluster_user_ssh_public_key is defined %}
- {{ cluster_user_ssh_public_key }}
{%- endif %}
{%- if cluster_deploy_ssh_public_key is defined %}
- {{ cluster_deploy_ssh_public_key }}
{%- endif %}
{%- if cluster_ssh_private_key_file is not defined %}
- "${openstack_compute_keypair_v2.cluster_keypair.public_key}"
{%- endif %}
{%- for ssh_key in cluster_deploy_ssh_keys_extra %}
- {{ ssh_key }}
{%- endfor %}
bootcmd:
%{for volume in [openstack_blockstorage_volume_v3.state, {% if not cluster_home_manila_share | bool %} openstack_blockstorage_volume_v3.home {% endif %}]}
- BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(volume.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L ${lower(split(" ", volume.description)[0])} $BLKDEV
Expand Down Expand Up @@ -426,18 +414,12 @@ resource "openstack_compute_instance_v2" "{{ partition.name }}" {
user_data = <<-EOF
#cloud-config
ssh_authorized_keys:
{%- if cluster_user_ssh_public_key is defined %}
- {{ cluster_user_ssh_public_key }}
{%- endif %}
{%- if cluster_deploy_ssh_public_key is defined %}
- {{ cluster_deploy_ssh_public_key }}
{%- endif %}
{%- if cluster_ssh_private_key_file is not defined %}
- "${openstack_compute_keypair_v2.cluster_keypair.public_key}"
{%- endif %}
{%- for ssh_key in cluster_deploy_ssh_keys_extra %}
- {{ ssh_key }}
{%- endfor %}
EOF
}

Expand Down
3 changes: 1 addition & 2 deletions ansible/slurm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
blockinfile:
path: /etc/security/access.conf
block: |
+:wheel:ALL
+:{{ ansible_user }}:ALL
+:adm:ALL
-:ALL:ALL
# vagrant uses (deprecated) ansible_ssh_user
6 changes: 6 additions & 0 deletions environments/.caas/inventory/group_vars/all/basic_users.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ basic_users_users:
password: "{{ vault_azimuth_user_password | password_hash('sha512', 65534 | random(seed=inventory_hostname) | string) }}"
uid: 1005
public_key: "{{ cluster_user_ssh_public_key }}"
shell: /bin/bash
append: true
groups:
- adm
- systemd-journal
sudo: azimuth ALL=(ALL) NOPASSWD:ALL
4 changes: 4 additions & 0 deletions environments/.caas/inventory/group_vars/all/hpctests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ hpctests_pingpong_plot: false
# In Azimuth, the Ansible controller is an ephemeral pod, so all that matters is that
# this is a location that is writable by the container user
hpctests_outdir: "{{ playbook_dir }}/.tmp/hpctests"

# hpctests run by default in Azimuth but not trying to stress-test the nodes
# just check compiler, mpi etc works
hpctests_hpl_mem_frac: 0.05 # 5% node memory
2 changes: 1 addition & 1 deletion environments/.caas/inventory/group_vars/all/nfs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ caas_nfs_home:
- comment: Export /exports/home from Slurm control node as /home
nfs_enable:
server: "{{ inventory_hostname in groups['control'] }}"
clients: "{{ inventory_hostname in groups['cluster'] and inventory_hostname not in groups['control'] }}"
clients: "{{ inventory_hostname in groups['cluster'] }}"
nfs_export: "/exports/home" # assumes skeleton TF is being used
nfs_client_mnt_point: "/home"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ usage_template: |-
compute* up 60-00:00:0 {{ "%3s" | format(cluster.parameter_values.compute_count) }} idle {{ cluster.name }}-compute-[0-{{ cluster.parameter_values.compute_count - 1 }}]
```
The `rocky` user can be accessed the same way and has passwordless `sudo` enabled.
The `azimuth` user can ssh between nodes and has passwordless sudo.
SSH access can be granted to additional users by placing their SSH public key in `~azimuth/.ssh/authorized_keys`.
Expand Down
2 changes: 1 addition & 1 deletion environments/.caas/ui-meta/slurm-infra-manila-home.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ usage_template: |-
compute* up 60-00:00:0 {{ "%3s" | format(cluster.parameter_values.compute_count) }} idle {{ cluster.name }}-compute-[0-{{ cluster.parameter_values.compute_count - 1 }}]
```
The `rocky` user can be accessed the same way and has passwordless `sudo` enabled.
The `azimuth` user can ssh between nodes and has passwordless sudo.
SSH access can be granted to additional users by placing their SSH public key in `~azimuth/.ssh/authorized_keys`.
Expand Down
2 changes: 1 addition & 1 deletion environments/.caas/ui-meta/slurm-infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ usage_template: |-
compute* up 60-00:00:0 {{ "%3s" | format(cluster.parameter_values.compute_count) }} idle {{ cluster.name }}-compute-[0-{{ cluster.parameter_values.compute_count - 1 }}]
```
The `rocky` user can be accessed the same way and has passwordless `sudo` enabled.
The `azimuth` user can ssh between nodes and has passwordless sudo.
SSH access can be granted to additional users by placing their SSH public key in `~azimuth/.ssh/authorized_keys`.
Expand Down
1 change: 1 addition & 0 deletions environments/common/inventory/group_vars/all/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ansible_user: rocky
appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}"
appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}"
appliances_environment_name: "{{ appliances_environment_root | basename | regex_replace('\\W+', '') }}" # [a-zA-Z0-9_] only
appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in genericcloud images; appliance defaults to removing it
#appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform

# Address(ip/dns) for internal communication between services. This is
Expand Down

0 comments on commit 3223846

Please sign in to comment.