diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index 11ddbff11..8ea2cd54c 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -39,13 +39,17 @@ - hosts: cluster gather_facts: false + become: yes tasks: - name: Prevent ssh hanging if shared home is unavailable lineinfile: path: /etc/profile search_string: HOSTNAME=$(/usr/bin/hostnamectl --transient 2>/dev/null) || \ state: absent - become: yes + - name: Remove RHEL cockpit + dnf: + name: cockpit-ws + state: "{{ appliances_cockpit_state }}" - name: Add system user groups ansible.builtin.group: "{{ item.group }}" loop: "{{ appliances_local_users }}" @@ -55,7 +59,6 @@ become_method: "sudo" # Need to change working directory otherwise we try to switch back to non-existent directory. become_flags: '-i' - become: true - name: Add system users ansible.builtin.user: "{{ item.user }}" loop: "{{ appliances_local_users }}" @@ -63,9 +66,9 @@ become_method: "sudo" # Need to change working directory otherwise we try to switch back to non-existent directory. become_flags: '-i' - become: true - name: Reset ssh connection to allow user changes to affect ansible_user meta: reset_connection + become: no - hosts: systemd become: yes diff --git a/ansible/cleanup.yml b/ansible/cleanup.yml index c428e9072..fc3391a23 100644 --- a/ansible/cleanup.yml +++ b/ansible/cleanup.yml @@ -23,9 +23,14 @@ path: /etc/NetworkManager/conf.d/99-cloud-init.conf state: absent +- name: Get remote environment for ansible_user + setup: + gather_subset: env + become: no + - name: Delete any injected ssh config for ansible_user file: - path: "/home/{{ ansible_user }}/.ssh/" + path: "{{ ansible_env.HOME }}/.ssh/" state: absent - name: Run cloud-init cleanup diff --git a/ansible/roles/basic_users/README.md b/ansible/roles/basic_users/README.md index 0d3c750f2..4d6c5485c 100644 --- a/ansible/roles/basic_users/README.md +++ b/ansible/roles/basic_users/README.md @@ -19,8 +19,9 @@ Role Variables `basic_users_users`: Required. A list of mappings defining information for each user. In general, mapping keys/values are passed through as parameters to [ansible.builtin.user](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/user_module.html) and default values are as given there. However: - `create_home`, `generate_ssh_key` and `ssh_key_comment` are set automatically and should not be overriden. - `uid` should be set, so that the UID/GID is consistent across the cluster (which Slurm requires). -- `shell` may be set if required, but will be overriden with `/sbin/nologin` on `control` nodes to prevent user login. +- `shell` if *not* set will be `/sbin/nologin` on the `control` node and the default shell on other users. Explicitly setting this defines the shell for all nodes. - An additional key `public_key` may optionally be specified to define a key to log into the cluster. +- An additional key `sudo` may optionally be specified giving a string (possibly multiline) defining sudo rules to be templated. - Any other keys may present for other purposes (i.e. not used by this role). Dependencies diff --git a/ansible/roles/basic_users/tasks/main.yml b/ansible/roles/basic_users/tasks/main.yml index f680e25ac..d2d3d0d4a 100644 --- a/ansible/roles/basic_users/tasks/main.yml +++ b/ansible/roles/basic_users/tasks/main.yml @@ -42,3 +42,13 @@ - item.ssh_public_key is defined - basic_users_manage_homedir run_once: true + +- name: Write sudo rules + blockinfile: + path: /etc/sudoers.d/80-{{ item.name}}-user + block: "{{ item.sudo }}" + create: true + loop: "{{ basic_users_users }}" + loop_control: + label: "{{ item.name }}" + when: "'sudo' in item" diff --git a/ansible/roles/cluster_infra/defaults/main.yml b/ansible/roles/cluster_infra/defaults/main.yml deleted file mode 100644 index ef8ea609b..000000000 --- a/ansible/roles/cluster_infra/defaults/main.yml +++ /dev/null @@ -1,7 +0,0 @@ -cluster_deploy_ssh_keys_extra: [] - -# List of hw_scsi_models that result in block devices presenting as /dev/sdX -# rather than /dev/vdX -scsi_models: - # Ceph [https://docs.ceph.com/en/quincy/rbd/rbd-openstack/#image-properties] - - virtio-scsi diff --git a/ansible/roles/cluster_infra/templates/resources.tf.j2 b/ansible/roles/cluster_infra/templates/resources.tf.j2 index 68f5cbfb6..3de64f12c 100644 --- a/ansible/roles/cluster_infra/templates/resources.tf.j2 +++ b/ansible/roles/cluster_infra/templates/resources.tf.j2 @@ -298,18 +298,12 @@ resource "openstack_compute_instance_v2" "login" { user_data = <<-EOF #cloud-config ssh_authorized_keys: - {%- if cluster_user_ssh_public_key is defined %} - - {{ cluster_user_ssh_public_key }} - {%- endif %} {%- if cluster_deploy_ssh_public_key is defined %} - {{ cluster_deploy_ssh_public_key }} {%- endif %} {%- if cluster_ssh_private_key_file is not defined %} - "${openstack_compute_keypair_v2.cluster_keypair.public_key}" {%- endif %} - {%- for ssh_key in cluster_deploy_ssh_keys_extra %} - - {{ ssh_key }} - {%- endfor %} EOF } @@ -365,18 +359,12 @@ resource "openstack_compute_instance_v2" "control" { user_data = <<-EOF #cloud-config ssh_authorized_keys: - {%- if cluster_user_ssh_public_key is defined %} - - {{ cluster_user_ssh_public_key }} - {%- endif %} {%- if cluster_deploy_ssh_public_key is defined %} - {{ cluster_deploy_ssh_public_key }} {%- endif %} {%- if cluster_ssh_private_key_file is not defined %} - "${openstack_compute_keypair_v2.cluster_keypair.public_key}" {%- endif %} - {%- for ssh_key in cluster_deploy_ssh_keys_extra %} - - {{ ssh_key }} - {%- endfor %} bootcmd: %{for volume in [openstack_blockstorage_volume_v3.state, {% if not cluster_home_manila_share | bool %} openstack_blockstorage_volume_v3.home {% endif %}]} - BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(volume.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L ${lower(split(" ", volume.description)[0])} $BLKDEV @@ -426,18 +414,12 @@ resource "openstack_compute_instance_v2" "{{ partition.name }}" { user_data = <<-EOF #cloud-config ssh_authorized_keys: - {%- if cluster_user_ssh_public_key is defined %} - - {{ cluster_user_ssh_public_key }} - {%- endif %} {%- if cluster_deploy_ssh_public_key is defined %} - {{ cluster_deploy_ssh_public_key }} {%- endif %} {%- if cluster_ssh_private_key_file is not defined %} - "${openstack_compute_keypair_v2.cluster_keypair.public_key}" {%- endif %} - {%- for ssh_key in cluster_deploy_ssh_keys_extra %} - - {{ ssh_key }} - {%- endfor %} EOF } diff --git a/ansible/slurm.yml b/ansible/slurm.yml index 080c74dcb..0b7397242 100644 --- a/ansible/slurm.yml +++ b/ansible/slurm.yml @@ -59,7 +59,6 @@ blockinfile: path: /etc/security/access.conf block: | - +:wheel:ALL - +:{{ ansible_user }}:ALL + +:adm:ALL -:ALL:ALL # vagrant uses (deprecated) ansible_ssh_user diff --git a/environments/.caas/inventory/group_vars/all/basic_users.yml b/environments/.caas/inventory/group_vars/all/basic_users.yml index 6105df821..2823a4862 100644 --- a/environments/.caas/inventory/group_vars/all/basic_users.yml +++ b/environments/.caas/inventory/group_vars/all/basic_users.yml @@ -4,3 +4,9 @@ basic_users_users: password: "{{ vault_azimuth_user_password | password_hash('sha512', 65534 | random(seed=inventory_hostname) | string) }}" uid: 1005 public_key: "{{ cluster_user_ssh_public_key }}" + shell: /bin/bash + append: true + groups: + - adm + - systemd-journal + sudo: azimuth ALL=(ALL) NOPASSWD:ALL diff --git a/environments/.caas/inventory/group_vars/all/hpctests.yml b/environments/.caas/inventory/group_vars/all/hpctests.yml index a31437be3..192c90c5a 100644 --- a/environments/.caas/inventory/group_vars/all/hpctests.yml +++ b/environments/.caas/inventory/group_vars/all/hpctests.yml @@ -4,3 +4,7 @@ hpctests_pingpong_plot: false # In Azimuth, the Ansible controller is an ephemeral pod, so all that matters is that # this is a location that is writable by the container user hpctests_outdir: "{{ playbook_dir }}/.tmp/hpctests" + +# hpctests run by default in Azimuth but not trying to stress-test the nodes +# just check compiler, mpi etc works +hpctests_hpl_mem_frac: 0.05 # 5% node memory diff --git a/environments/.caas/inventory/group_vars/all/nfs.yml b/environments/.caas/inventory/group_vars/all/nfs.yml index cb4067979..14fff6295 100644 --- a/environments/.caas/inventory/group_vars/all/nfs.yml +++ b/environments/.caas/inventory/group_vars/all/nfs.yml @@ -13,7 +13,7 @@ caas_nfs_home: - comment: Export /exports/home from Slurm control node as /home nfs_enable: server: "{{ inventory_hostname in groups['control'] }}" - clients: "{{ inventory_hostname in groups['cluster'] and inventory_hostname not in groups['control'] }}" + clients: "{{ inventory_hostname in groups['cluster'] }}" nfs_export: "/exports/home" # assumes skeleton TF is being used nfs_client_mnt_point: "/home" diff --git a/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml b/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml index 3ca5477d3..ab10eff20 100644 --- a/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml +++ b/environments/.caas/ui-meta/slurm-infra-fast-volume-type.yml @@ -125,7 +125,7 @@ usage_template: |- compute* up 60-00:00:0 {{ "%3s" | format(cluster.parameter_values.compute_count) }} idle {{ cluster.name }}-compute-[0-{{ cluster.parameter_values.compute_count - 1 }}] ``` - The `rocky` user can be accessed the same way and has passwordless `sudo` enabled. + The `azimuth` user can ssh between nodes and has passwordless sudo. SSH access can be granted to additional users by placing their SSH public key in `~azimuth/.ssh/authorized_keys`. diff --git a/environments/.caas/ui-meta/slurm-infra-manila-home.yml b/environments/.caas/ui-meta/slurm-infra-manila-home.yml index 7371e9f7d..4a01bb6fa 100644 --- a/environments/.caas/ui-meta/slurm-infra-manila-home.yml +++ b/environments/.caas/ui-meta/slurm-infra-manila-home.yml @@ -115,7 +115,7 @@ usage_template: |- compute* up 60-00:00:0 {{ "%3s" | format(cluster.parameter_values.compute_count) }} idle {{ cluster.name }}-compute-[0-{{ cluster.parameter_values.compute_count - 1 }}] ``` - The `rocky` user can be accessed the same way and has passwordless `sudo` enabled. + The `azimuth` user can ssh between nodes and has passwordless sudo. SSH access can be granted to additional users by placing their SSH public key in `~azimuth/.ssh/authorized_keys`. diff --git a/environments/.caas/ui-meta/slurm-infra.yml b/environments/.caas/ui-meta/slurm-infra.yml index 822f59c41..36b89281d 100644 --- a/environments/.caas/ui-meta/slurm-infra.yml +++ b/environments/.caas/ui-meta/slurm-infra.yml @@ -112,7 +112,7 @@ usage_template: |- compute* up 60-00:00:0 {{ "%3s" | format(cluster.parameter_values.compute_count) }} idle {{ cluster.name }}-compute-[0-{{ cluster.parameter_values.compute_count - 1 }}] ``` - The `rocky` user can be accessed the same way and has passwordless `sudo` enabled. + The `azimuth` user can ssh between nodes and has passwordless sudo. SSH access can be granted to additional users by placing their SSH public key in `~azimuth/.ssh/authorized_keys`. diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 91db4dc3a..15340820f 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -4,6 +4,7 @@ ansible_user: rocky appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}" appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" appliances_environment_name: "{{ appliances_environment_root | basename | regex_replace('\\W+', '') }}" # [a-zA-Z0-9_] only +appliances_cockpit_state: absent # RHEL cockpit installed but not enabled in genericcloud images; appliance defaults to removing it #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform # Address(ip/dns) for internal communication between services. This is