From 78d81c9cba5c7edb77bea108390b3843a8763b49 Mon Sep 17 00:00:00 2001 From: myfluxi Date: Wed, 11 Nov 2015 20:33:49 +0100 Subject: [PATCH] cpu_input_boost: Remove sync_threshold to recover from regression Load-based syncs successfully avoid boosting of lightweight threads. Unfortunately, CPUs with high frequencies over sync_threshold will be unnaturally throttled which becomes apparent in hackbench with its high number of thread migrations. None of the newer targets sets sync_threshold, either. Pre patch: root@hammerhead:/ # perf stat --repeat 10 hackbench 10 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.923 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 1.106 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.934 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.917 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.765 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.807 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.930 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.937 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.858 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.808 Performance counter stats for 'hackbench 10 ' (10 runs): 3575.129450 task-clock#3.498 CPUs utilized ( +- 3.95% ) 42637 context-switches #0.012 M/sec ( +- 12.54% ) 6152 CPU-migrations#0.002 M/sec ( +- 9.98% ) 37874 page-faults #0.011 M/sec ( +- 0.02% ) 6208354174 cycles#1.737 GHz <--- should not happen! ( +- 1.78% ) [90.87%] 0 stalled-cycles-frontend #0.00% frontend cycles idle ( +- 0.00% ) [88.02%] 0 stalled-cycles-backend#0.00% backend cycles idle ( +- 0.00% ) [87.13%] 1948037598 instructions #0.31 insns per cycle ( +- 1.53% ) [89.32%] 203196136 branches # 56.836 M/sec ( +- 1.46% ) [90.70%] 5345440 branch-misses #2.63% of all branches ( +- 4.62% ) [85.52%] 1.022038466 seconds time elapsed ( +- 4.09% ) Post patch: root@hammerhead:/ # perf stat --repeat 10 hackbench 10 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.735 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.815 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.754 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.721 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.770 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.767 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.762 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.689 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.748 Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks) Each sender will pass 100 messages of 100 bytes Time: 0.679 Performance counter stats for 'hackbench 10 ' (10 runs): 2838.930889 task-clock#3.343 CPUs utilized ( +- 1.81% ) 22301 context-switches #0.008 M/sec ( +- 11.18% ) 3641 CPU-migrations#0.001 M/sec ( +- 11.13% ) 37950 page-faults #0.013 M/sec ( +- 0.09% ) 5714558403 cycles#2.013 GHz ( +- 1.59% ) [91.42%] 0 stalled-cycles-frontend #0.00% frontend cycles idle ( +- 0.00% ) [89.07%] 0 stalled-cycles-backend#0.00% backend cycles idle ( +- 0.00% ) [89.74%] 1868219180 instructions #0.33 insns per cycle ( +- 0.82% ) [90.90%] 193711678 branches # 68.234 M/sec ( +- 1.44% ) [91.41%] 4927373 branch-misses #2.54% of all branches ( +- 3.57% ) [87.20%] 0.849242812 seconds time elapsed ( +- 1.58% ) Change-Id: I8744cc1f96fefa81149ded1c2dc54ff4d3b76665 --- drivers/cpufreq/cpu_input_boost.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/cpufreq/cpu_input_boost.c b/drivers/cpufreq/cpu_input_boost.c index f35ac2c11cb5..df200803a0ae 100644 --- a/drivers/cpufreq/cpu_input_boost.c +++ b/drivers/cpufreq/cpu_input_boost.c @@ -65,7 +65,6 @@ static unsigned int migration_boost_ms; static unsigned int migration_load_threshold; static unsigned int ib_nr_cpus_boosted; static unsigned int ib_nr_cpus_to_boost; -static unsigned int sync_threshold; /* Boost function for input boost (only for CPU0) */ static void boost_cpu0(unsigned int duration_ms) @@ -342,9 +341,6 @@ static int boost_mig_sync_thread(void *data) continue; } - if (sync_threshold) - req_freq = min(sync_threshold, req_freq); - cancel_delayed_work_sync(&b->mig_boost_rem); b->migration_freq = req_freq; @@ -598,20 +594,6 @@ static ssize_t migration_load_threshold_write(struct device *dev, return size; } -static ssize_t sync_threshold_write(struct device *dev, - struct device_attribute *attr, const char *buf, size_t size) -{ - unsigned int data; - int ret = sscanf(buf, "%u", &data); - - if (ret != 1) - return -EINVAL; - - sync_threshold = data; - - return size; -} - static ssize_t enabled_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -649,12 +631,6 @@ static ssize_t migration_load_threshold_read(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", migration_load_threshold); } -static ssize_t sync_threshold_read(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%u\n", sync_threshold); -} - static DEVICE_ATTR(enabled, 0644, enabled_read, enabled_write); static DEVICE_ATTR(ib_freqs, 0644, @@ -667,8 +643,6 @@ static DEVICE_ATTR(migration_boost_ms, 0644, migration_boost_ms_read, migration_boost_ms_write); static DEVICE_ATTR(migration_load_threshold, 0644, migration_load_threshold_read, migration_load_threshold_write); -static DEVICE_ATTR(sync_threshold, 0644, - sync_threshold_read, sync_threshold_write); static struct attribute *cpu_ib_attr[] = { &dev_attr_enabled.attr, @@ -677,7 +651,6 @@ static struct attribute *cpu_ib_attr[] = { &dev_attr_load_based_syncs.attr, &dev_attr_migration_boost_ms.attr, &dev_attr_migration_load_threshold.attr, - &dev_attr_sync_threshold.attr, NULL };