diff --git a/roles/common/files/sysstat-default b/roles/common/files/sysstat-default new file mode 100644 index 00000000..1b029ba6 --- /dev/null +++ b/roles/common/files/sysstat-default @@ -0,0 +1,9 @@ +# +# Default settings for /etc/init.d/sysstat, /etc/cron.d/sysstat +# and /etc/cron.daily/sysstat files +# + +# Should sadc collect system activity informations? Valid values +# are "true" and "false". Please do not put other values, they +# will be overwritten by debconf! +ENABLED="true" diff --git a/roles/common/files/sysstat.conf b/roles/common/files/sysstat.conf new file mode 100644 index 00000000..e983b0f1 --- /dev/null +++ b/roles/common/files/sysstat.conf @@ -0,0 +1,31 @@ +# sysstat configuration file. See sysstat(5) manual page. + +# How long to keep log files (in days). +# Used by sa2(8) script +# If value is greater than 28, then log files are kept in +# multiple directories, one for each month. +HISTORY=28 + +# Compress (using xz, gzip or bzip2) sa and sar files older than (in days): +COMPRESSAFTER=7 + +# Parameters for the system activity data collector (see sadc(8) manual page) +# which are used for the generation of log files. +# By default contains the `-S DISK' option responsible for generating disk +# statisitcs. Use `-S XALL' to collect all available statistics. +SADC_OPTIONS="-S XALL" + +# Directory where sa and sar files are saved. +SA_DIR=/var/log/sysstat + +# Compression program to use. +ZIP="xz" + +# By default sa2 script generates yesterday's summary, since the cron job +# usually runs right after midnight. If you want sa2 to generate the summary +# of the same day (for example when cron job runs at 23:53) set this variable. +#YESTERDAY=no + +# By default sa2 script generates reports files (the so called sarDD files). +# Set this variable to false to disable reports generation. +#REPORTS=false diff --git a/roles/common/handlers/main.yml b/roles/common/handlers/main.yml index 31a1b93e..289c808a 100644 --- a/roles/common/handlers/main.yml +++ b/roles/common/handlers/main.yml @@ -27,6 +27,8 @@ - include: sysstat.yml +- include: sysctl.yml + - name: restart systemd-hostnamed systemd: daemon_reload: yes diff --git a/roles/common/handlers/sysctl.yml b/roles/common/handlers/sysctl.yml new file mode 100644 index 00000000..3ac92fe4 --- /dev/null +++ b/roles/common/handlers/sysctl.yml @@ -0,0 +1,2 @@ +- name: systemd-tmpfiles create + command: systemd-tmpfiles --create diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index eaf5fb00..b5c306d0 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -670,14 +670,6 @@ notify: - generate locales -# necessary to run a lot of containers, each which systemd launching several inotify -- name: increase fs.inotify.max_user_instances on host - sysctl: - name: fs.inotify.max_user_instances - value: 1024 - sysctl_file: /etc/sysctl.d/ansible.conf - when: "not 'vm' in group_names" - # configure lxfs so that VMs get their own load-average - name: create systemd override directory for lxcfs file: @@ -708,5 +700,11 @@ - include: munin-node.yml +- include: sysstat.yml + when: "'proxmox' in group_names" + +- include: sysctl.yml + when: "'proxmox' in group_names" + - include: ntp.yml when: "not 'vm' in group_names" diff --git a/roles/common/tasks/sysctl.yml b/roles/common/tasks/sysctl.yml new file mode 100644 index 00000000..3786573b --- /dev/null +++ b/roles/common/tasks/sysctl.yml @@ -0,0 +1,69 @@ +# necessary to run a lot of containers, each which systemd launching several inotify +- name: increase fs.inotify.max_user_instances on host + sysctl: + name: fs.inotify.max_user_instances + value: 1024 + sysctl_file: /etc/sysctl.d/ansible.conf + +- name: Reduce swappiness to 1 + sysctl: + name: vm.swappiness + value: 1 + sysctl_file: /etc/sysctl.d/ansible.conf + +# https://forum.proxmox.com/threads/increase-performance-with-sched_autogroup_enabled-0.41729/ +# https://www.postgresql.org/message-id/50E4AAB1.9040902@optionshouse.com +# +# * sched_migration_cost +# +# The migration cost is the total time the scheduler will consider a +# migrated process "cache hot" and thus less likely to be re-migrated. By +# default, this is 0.5ms (500000 ns), and as the size of the process table +# increases, eventually causes the scheduler to break down. On our +# systems, after a smooth degradation with increasing connection count, +# system CPU spiked from 20 to 70% sustained and TPS was cut by 5-10x once +# we crossed some invisible connection count threshold. For us, that was a +# pgbench with 900 or more clients. +# +# The migration cost should be increased, almost universally on server +# systems with many processes. This means systems like PostgreSQL or +# Apache would benefit from having higher migration costs. We've had good +# luck with a setting of 5ms (5000000 ns) instead. +# +# When the breakdown occurs, system CPU (as obtained from sar) increases +# from 20% on a heavy pgbench (scale 3500 on a 72GB system) to over 70%, +# and %nice/%user is cut by half or more. A higher migration cost +# essentially eliminates this artificial throttle. +# +# +# * sched_autogroup_enabled +# +# This is a relatively new patch which Linus lauded back in late 2010. It +# basically groups tasks by TTY so perceived responsiveness is improved. +# But on server systems, large daemons like PostgreSQL are going to be +# launched from the same pseudo-TTY, and be effectively choked out of CPU +# cycles in favor of less important tasks. +# +# The default setting is 1 (enabled) on some platforms. By setting this to +# 0 (disabled), we saw an outright 30% performance boost on the same +# pgbench test. A fully cached scale 3500 database on a 72GB system went +# from 67k TPS to 82k TPS with 900 client connections. + +# We use systemd-tmpfiles mechanism to write in pseudo filesystem +# https://sleeplessbeastie.eu/2022/11/18/how-to-create-persistent-sysfs-configuration-using-systemd/ +# https://wiki.archlinux.org/title/Systemd#systemd-tmpfiles_-_temporary_files +- name: Disable Transparent Huge Pages and apply scheduler changes on recent kernels + template: + src: 'systemd-tmpfiles.conf.j2' + dest: '/etc/tmpfiles.d/thp.conf' + notify: + - systemd-tmpfiles create + +# La mémoire n'est pas allouée/réservée. Le kernel essaiera d'allouer les hugepages si c'est possible, sinon tant pis. +# Ca marche bien au démarrage. Une fois que le serveur tourne et que la mémoire est utilisée pour le cache ou est fragmentée, +# il aura plus de mal à trouver des blocs consécutifs. +- name: Allow 2MB huge pages up to 60% of the RAM + sysctl: + name: vm.nr_overcommit_hugepages + value: "{{ ( ansible_memtotal_mb * 0.6 / 2)|int }}" + sysctl_file: /etc/sysctl.d/ansible.conf diff --git a/roles/common/tasks/sysstat.yml b/roles/common/tasks/sysstat.yml new file mode 100644 index 00000000..3ca5cafe --- /dev/null +++ b/roles/common/tasks/sysstat.yml @@ -0,0 +1,18 @@ +- name: install packages for sysstat and atop + apt: pkg={{ item }} update_cache=yes + with_items: + - sysstat + - xz-utils + - atop + when: ansible_distribution == 'Debian' or ansible_distribution == 'Ubuntu' + +- name: Enable sysstat + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: root + mode: "{{ item.mode }}" + loop: + - { src: 'sysstat-default', dest: '/etc/default/sysstat', mode: '0644' } + - { src: 'sysstat.conf', dest: '/etc/sysstat/sysstat', mode: '0644' } + notify: restart sysstat diff --git a/roles/common/templates/systemd-tmpfiles.conf.j2 b/roles/common/templates/systemd-tmpfiles.conf.j2 new file mode 100644 index 00000000..fd199f2b --- /dev/null +++ b/roles/common/templates/systemd-tmpfiles.conf.j2 @@ -0,0 +1,4 @@ +w /sys/kernel/mm/transparent_hugepage/enabled - - - - never +w /sys/kernel/mm/transparent_hugepage/defrag - - - - never +w /proc/sys/kernel/sched_autogroup_enabled - - - - 0 +w /sys/kernel/debug/sched/migration_cost_ns - - - - 5000000