osm-fr · jocelynj · Oct 8, 2023 · Jul 6, 2020 · Oct 6, 2023
diff --git a/roles/common/files/sysstat-default b/roles/common/files/sysstat-default
@@ -0,0 +1,9 @@
+#
+# Default settings for /etc/init.d/sysstat, /etc/cron.d/sysstat
+# and /etc/cron.daily/sysstat files
+#
+
+# Should sadc collect system activity informations? Valid values
+# are "true" and "false". Please do not put other values, they
+# will be overwritten by debconf!
+ENABLED="true"
diff --git a/roles/common/files/sysstat.conf b/roles/common/files/sysstat.conf
@@ -0,0 +1,31 @@
+# sysstat configuration file. See sysstat(5) manual page.
+
+# How long to keep log files (in days).
+# Used by sa2(8) script
+# If value is greater than 28, then log files are kept in
+# multiple directories, one for each month.
+HISTORY=28
+
+# Compress (using xz, gzip or bzip2) sa and sar files older than (in days):
+COMPRESSAFTER=7
+
+# Parameters for the system activity data collector (see sadc(8) manual page)
+# which are used for the generation of log files.
+# By default contains the `-S DISK' option responsible for generating disk
+# statisitcs. Use `-S XALL' to collect all available statistics.
+SADC_OPTIONS="-S XALL"
+
+# Directory where sa and sar files are saved.
+SA_DIR=/var/log/sysstat
+
+# Compression program to use.
+ZIP="xz"
+
+# By default sa2 script generates yesterday's summary, since the cron job
+# usually runs right after midnight. If you want sa2 to generate the summary
+# of the same day (for example when cron job runs at 23:53) set this variable.
+#YESTERDAY=no
+
+# By default sa2 script generates reports files (the so called sarDD files).
+# Set this variable to false to disable reports generation.
+#REPORTS=false
diff --git a/roles/common/handlers/main.yml b/roles/common/handlers/main.yml
@@ -27,6 +27,8 @@
 
 - include: sysstat.yml
 
+- include: sysctl.yml
+
 - name: restart systemd-hostnamed
   systemd:
     daemon_reload: yes

diff --git a/roles/common/handlers/sysctl.yml b/roles/common/handlers/sysctl.yml
@@ -0,0 +1,2 @@
+- name: systemd-tmpfiles create
+  command: systemd-tmpfiles --create
diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml
@@ -670,14 +670,6 @@
   notify:
     - generate locales
 
-# necessary to run a lot of containers, each which systemd launching several inotify
-- name: increase fs.inotify.max_user_instances on host
-  sysctl:
-    name: fs.inotify.max_user_instances
-    value: 1024
-    sysctl_file: /etc/sysctl.d/ansible.conf
-  when: "not 'vm' in group_names"
-
 # configure lxfs so that VMs get their own load-average
 - name: create systemd override directory for lxcfs
   file:
@@ -708,5 +700,11 @@
 
 - include: munin-node.yml
 
+- include: sysstat.yml
+  when: "'proxmox' in group_names"
+
+- include: sysctl.yml
+  when: "'proxmox' in group_names"
+
 - include: ntp.yml
   when: "not 'vm' in group_names"
diff --git a/roles/common/tasks/sysctl.yml b/roles/common/tasks/sysctl.yml
@@ -0,0 +1,69 @@
+# necessary to run a lot of containers, each which systemd launching several inotify
+- name: increase fs.inotify.max_user_instances on host
+  sysctl:
+    name: fs.inotify.max_user_instances
+    value: 1024
+    sysctl_file: /etc/sysctl.d/ansible.conf
+
+- name: Reduce swappiness to 1
+  sysctl:
+    name: vm.swappiness
+    value: 1
+    sysctl_file: /etc/sysctl.d/ansible.conf
+
+# https://forum.proxmox.com/threads/increase-performance-with-sched_autogroup_enabled-0.41729/
+# https://www.postgresql.org/message-id/[email protected]
+#
+# * sched_migration_cost
+#
+# The migration cost is the total time the scheduler will consider a
+# migrated process "cache hot" and thus less likely to be re-migrated. By
+# default, this is 0.5ms (500000 ns), and as the size of the process table
+# increases, eventually causes the scheduler to break down. On our
+# systems, after a smooth degradation with increasing connection count,
+# system CPU spiked from 20 to 70% sustained and TPS was cut by 5-10x once
+# we crossed some invisible connection count threshold. For us, that was a
+# pgbench with 900 or more clients.
+#
+# The migration cost should be increased, almost universally on server
+# systems with many processes. This means systems like PostgreSQL or
+# Apache would benefit from having higher migration costs. We've had good
+# luck with a setting of 5ms (5000000 ns) instead.
+#
+# When the breakdown occurs, system CPU (as obtained from sar) increases
+# from 20% on a heavy pgbench (scale 3500 on a 72GB system) to over 70%,
+# and %nice/%user is cut by half or more. A higher migration cost
+# essentially eliminates this artificial throttle.
+#
+#
+# * sched_autogroup_enabled
+#
+# This is a relatively new patch which Linus lauded back in late 2010. It
+# basically groups tasks by TTY so perceived responsiveness is improved.
+# But on server systems, large daemons like PostgreSQL are going to be
+# launched from the same pseudo-TTY, and be effectively choked out of CPU
+# cycles in favor of less important tasks.
+#
+# The default setting is 1 (enabled) on some platforms. By setting this to
+# 0 (disabled), we saw an outright 30% performance boost on the same
+# pgbench test. A fully cached scale 3500 database on a 72GB system went
+# from 67k TPS to 82k TPS with 900 client connections.
+
+# We use systemd-tmpfiles mechanism to write in pseudo filesystem
+# https://sleeplessbeastie.eu/2022/11/18/how-to-create-persistent-sysfs-configuration-using-systemd/
+# https://wiki.archlinux.org/title/Systemd#systemd-tmpfiles_-_temporary_files
+- name: Disable Transparent Huge Pages and apply scheduler changes on recent kernels
+  template:
+    src: 'systemd-tmpfiles.conf.j2'
+    dest: '/etc/tmpfiles.d/thp.conf'
+  notify:
+    - systemd-tmpfiles create
+
+# La mémoire n'est pas allouée/réservée. Le kernel essaiera d'allouer les hugepages si c'est possible, sinon tant pis.
+# Ca marche bien au démarrage. Une fois que le serveur tourne et que la mémoire est utilisée pour le cache ou est fragmentée,
+# il aura plus de mal à trouver des blocs consécutifs.
+- name: Allow 2MB huge pages up to 60% of the RAM
+  sysctl:
+    name: vm.nr_overcommit_hugepages
+    value: "{{ ( ansible_memtotal_mb * 0.6 / 2)|int }}"
+    sysctl_file: /etc/sysctl.d/ansible.conf
diff --git a/roles/common/tasks/sysstat.yml b/roles/common/tasks/sysstat.yml
@@ -0,0 +1,18 @@
+- name: install packages for sysstat and atop
+  apt: pkg={{ item }} update_cache=yes
+  with_items:
+    - sysstat
+    - xz-utils
+    - atop
+  when: ansible_distribution == 'Debian' or ansible_distribution == 'Ubuntu'
+
+- name: Enable sysstat
+  copy:
+    src: "{{ item.src }}"
+    dest: "{{ item.dest }}"
+    owner: root
+    mode: "{{ item.mode }}"
+  loop:
+    - { src: 'sysstat-default', dest: '/etc/default/sysstat', mode: '0644' }
+    - { src: 'sysstat.conf', dest: '/etc/sysstat/sysstat', mode: '0644' }
+  notify: restart sysstat
diff --git a/roles/common/templates/systemd-tmpfiles.conf.j2 b/roles/common/templates/systemd-tmpfiles.conf.j2
@@ -0,0 +1,4 @@
+w /sys/kernel/mm/transparent_hugepage/enabled - - - - never
+w /sys/kernel/mm/transparent_hugepage/defrag - - - - never
+w /proc/sys/kernel/sched_autogroup_enabled - - - - 0
+w /sys/kernel/debug/sched/migration_cost_ns - - - - 5000000
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		- name: systemd-tmpfiles create
		command: systemd-tmpfiles --create