Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Zviratko committed Jun 30, 2015
1 parent 5242067 commit 8db53b3
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 0 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# pincpus
Script for automatic pinning of ceph osd daemons to numa node via cgroups

Usage:

1) put prz-pincpus.conf into /etc/ and edit to your liking
2) run pincpus :-)
120 changes: 120 additions & 0 deletions pincpus
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash

CFGFILE=/etc/prz-pincpus.conf

if [ ! -f $CFGFILE ]; then
echo "ERROR: Missing configuration file ($CFGFILE)";
exit;
fi

. $CFGFILE

if [ $(cat /sys/kernel/mm/ksm/run) -eq 1 ] && \
[ $(cat /sys/kernel/mm/ksm/merge_across_nodes) -eq 1 ]; then
echo "ERROR: KSM w/ merge_across_nodes enabled! Exiting...";
exit;
fi

osds="$((`pgrep ceph-osd|wc -l`))";
max_osds_per_cgroup=$((($osds-1)/$cgroup_osd_count+1))

cgroup_id=0;
while [ $cgroup_id -lt $cgroup_osd_count ]; do

echo "- creating or updating cgroup #$cgroup_id";
mkdir /cgroup/cpuset/osd-group-$cgroup_id 2>/dev/null
cd /cgroup/cpuset/osd-group-$cgroup_id
echo ${cgroup_osd_cpu[$cgroup_id]} > cpuset.cpus
echo ${cgroup_osd_numanode[$cgroup_id]} > cpuset.mems
echo 1 > cpuset.memory_migrate
echo 1 > cpuset.mem_hardwall
echo 2 > cpuset.sched_relax_domain_level

count_osds[$cgroup_id]=0; # initialize for next step

cgroup_id=$((cgroup_id+1));
done

migrate_pids=();

echo "- number of osds: $osds"
echo "- maximum osds per cgroup: $max_osds_per_cgroup";

if [ $osds -gt 0 ]; then
echo "- counting # of OSD in cgroups"
for pid in $(pidof ceph-osd); do

# get cpuset for this OSD
cpuset="`cat /proc/$pid/cpuset`";

# get group# from cpuset (9999 = no cgroup assigned)
echo $cpuset | grep '^/osd-group-' >/dev/null && group="`echo $cpuset|cut -f3-3 -d\-`" || group="9999";

count_osds[$group]=$((${count_osds[$group]}+1));

# we want to migrate any OSD with matches any of these conditions:
#
# a) current cgroup is overutilized
# b) ceph-osd is not assigned to any cgroup

if [ ${count_osds[$group]} -gt $max_osds_per_cgroup ] || [ $group -eq 9999 ]; then
echo "- pid $pid scheduled for migration (currently in group '$group')";
migrate_pids+=($pid);
fi

done

echo "- going to migrate them (if needed)";

for pid in ${migrate_pids[*]}; do

# find the least utilized cgroup
cgroup_id=0;
lu_count=99999;
while [ $cgroup_id -lt $cgroup_osd_count ]; do
if [ ${count_osds[$cgroup_id]} -lt $lu_count ]; then
lu_count=${count_osds[$cgroup_id]}; lu_cgroup_id=$cgroup_id;
fi
cgroup_id=$(($cgroup_id+1))
done;

# migrate to the least utilized group
echo " - migrating $pid to the least utilized cgroup #$lu_cgroup_id (qty $lu_count)";
echo $pid > /cgroup/cpuset/osd-group-$lu_cgroup_id/tasks
for pid_task in `ls -1 /proc/$pid/task`; do
echo $pid_task > /cgroup/cpuset/osd-group-$lu_cgroup_id/tasks
done;
# need to do this twice (some task may have created another thread during the execution)
for pid_task in `ls -1 /proc/$pid/task`; do
echo $pid_task > /cgroup/cpuset/osd-group-$lu_cgroup_id/tasks
done;

# increase group utilization count
count_osds[$lu_cgroup_id]=$((${count_osds[$lu_cgroup_id]}+1));

done

else
echo "No Ceph OSD processes running....";
fi

echo "- finished OSDs"

cat /cgroup/cpuset/libvirt/cpuset.cpus | grep "^${cgroup_libvirt_cpus}$" >/dev/null \
&& echo "- libvirt cpuset is ok" \
|| (
echo "- going to set libvirt cpuset...";
IFS='
'

# need to set cpuset
# start with childs & finish with root
# using 'find' (starts with root and recurses) & 'tac' (reverse the order :)

find /cgroup/cpuset/libvirt -type d|tac| \
while read dir; do
echo ${cgroup_libvirt_cpus} > $dir/cpuset.cpus
done;
echo '- done';

)
15 changes: 15 additions & 0 deletions prz-pincpus.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#cpu_count=2
#cpus_osd[0]=0-39
#cpus_osd[1]=0-39
#cpus_vms=0-39

cgroup_osd_count=2
cgroup_osd_numanode[0]=0
cgroup_osd_numanode[1]=1

cgroup_osd_cpu[0]=0-3,20-23
cgroup_osd_cpu[1]=10-13,30-33

cgroup_libvirt_cpus=4-9,14-19,24-29,34-39

#/cgroup/cpuset/libvirt/cpuset.cpus

0 comments on commit 8db53b3

Please sign in to comment.