Skip to content

Commit

Permalink
Fix repeated login node deployments
Browse files Browse the repository at this point in the history
Replace OHPC Lmod with system Lmod
Add opt mount and tests
  • Loading branch information
tom91136 committed Sep 22, 2024
1 parent ff7649f commit ce87925
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 7 deletions.
2 changes: 1 addition & 1 deletion playbook-svc-idm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
ipaadmin_password: "{{ipa_password}}"
defaultgroup: "{{idm_default_group}}"
defaultshell: /bin/bash
homedirectory: /nfs/home
homedirectory: /home

- name: Trim
ansible.builtin.shell: fstrim -av
Expand Down
10 changes: 10 additions & 0 deletions playbook-svc-login.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
- include_tasks: tasks/backup_or_restore_host_keys.yml
- include_tasks: tasks/setup_unattended_security_updates.yml

- name: Setup missing Slurm dependencies
ansible.builtin.dnf:
name: ["/bin/mailx", "Lmod"]

- name: Setup motd
ansible.builtin.copy:
content: "{{login_message_of_the_day}}"
Expand All @@ -30,6 +34,11 @@
state: enabled
permanent: true

- name: Remove stale CA from past IPA joins
ansible.builtin.file:
state: absent
path: /etc/ipa/ca.crt

roles:
- role: linux-system-roles.postfix
postfix_conf: { relayhost: "{{postfix_smtp_relay}}" }
Expand All @@ -47,6 +56,7 @@
openhpc_slurm_service_enabled: true
openhpc_login_only_nodes: "{{inventory_hostname}}"
openhpc_cluster_name: "{{mgmt_cluster_name}}"
openhpc_module_system_install: false

- role: freeipa.ansible_freeipa.ipaclient
state: present
Expand Down
8 changes: 5 additions & 3 deletions playbook-svc-mgmt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

pre_tasks:
- include_tasks: tasks/setup_backup_dir.yml
- include_tasks: tasks/backup_or_restore_host_keys.yml
- include_tasks: tasks/setup_unattended_security_updates.yml

- name: Setup missing Slurm dependencies
ansible.builtin.dnf:
name: ["/bin/mailx"]
name: ["/bin/mailx", "Lmod"]

roles:
- role: linux-system-roles.postfix
Expand Down Expand Up @@ -50,6 +51,7 @@
openhpc_packages: []
openhpc_slurm_partitions:
- name: dummy # empty partition causes slurm to stall, we'll delete this partition later to make slurm start
openhpc_module_system_install: false

- role: freeipa.ansible_freeipa.ipaclient
state: present
Expand Down Expand Up @@ -280,15 +282,15 @@
ipaadmin_password: "{{ipa_password}}"
name: auto.rds1
location: default
mount: "/nfs"
mount: "/-"
state: present

- name: Create rds1 mount keys
freeipa.ansible_freeipa.ipaautomountkey:
ipaadmin_password: "{{ipa_password}}"
location: default
mapname: auto.rds1
key: "{{item}}" # points to /nfs/{{item}}
key: "/{{item}}"
info: "mgmt.{{domain}}:/mnt/rds1/{{item}}"
state: present
loop: "{{mgmt_exported_directories}}"
Expand Down
2 changes: 1 addition & 1 deletion staging.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash:
mgmt_compute_dhcp_start: '10.10.10.150',
mgmt_compute_dhcp_end: '10.10.10.254',
mgmt_webhook_port: '808',
mgmt_exported_directories: %w[home shared],
mgmt_exported_directories: %w[home shared opt],
mgmt_cluster_name: 'staging'
}
login_node_vars = {
Expand Down
15 changes: 13 additions & 2 deletions tasks/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,21 @@
ansible.builtin.shell: pwd
register: dir

- ansible.builtin.fail:
- name: Assert home dir path is correct
ansible.builtin.fail:
msg: "Bad home dir path"
when:
- dir.rc != 0 or dir.stdout.strip() != "/nfs/home/" + ansible_user
- dir.rc != 0 or dir.stdout.strip() != "/home/" + ansible_user

- name: Get df
ansible.builtin.shell: df
register: df

- name: Assert mountpoint is correct
ansible.builtin.fail:
msg: "Missing mount in df"
when:
- df.rc != 0 or 'mgmt.{{domain}}:/mnt/rds1/home' not in df.stdout

- name: Get srun node hostnames
ansible.builtin.shell: srun --nodelist={{node}} hostname
Expand Down

0 comments on commit ce87925

Please sign in to comment.