Skip to content

Commit

Permalink
cpu_input_boost: Remove sync_threshold to recover from regression
Browse files Browse the repository at this point in the history
Load-based syncs successfully avoid boosting of lightweight threads.
Unfortunately, CPUs with high frequencies over sync_threshold will
be unnaturally throttled which becomes apparent in hackbench with
its high number of thread migrations. None of the newer targets sets
sync_threshold, either.

Pre patch:
root@hammerhead:/ # perf stat --repeat 10 hackbench 10
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.923
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 1.106
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.934
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.917
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.765
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.807
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.930
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.937
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.858
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.808

 Performance counter stats for 'hackbench 10  ' (10 runs):

   3575.129450 task-clock#3.498 CPUs utilized
 ( +-  3.95% )
 42637 context-switches  #0.012 M/sec
 ( +- 12.54% )
  6152 CPU-migrations#0.002 M/sec
 ( +-  9.98% )
 37874 page-faults   #0.011 M/sec
 ( +-  0.02% )
6208354174 cycles#1.737 GHz <--- should not happen!
 ( +-  1.78% ) [90.87%]
 0 stalled-cycles-frontend   #0.00% frontend cycles idle
 ( +-  0.00% ) [88.02%]
 0 stalled-cycles-backend#0.00% backend  cycles idle
 ( +-  0.00% ) [87.13%]
1948037598 instructions  #0.31  insns per cycle
 ( +-  1.53% ) [89.32%]
 203196136 branches  #   56.836 M/sec
 ( +-  1.46% ) [90.70%]
   5345440 branch-misses kerneltoast#2.63% of all branches
 ( +-  4.62% ) [85.52%]

   1.022038466 seconds time elapsed
 ( +-  4.09% )

Post patch:
root@hammerhead:/ # perf stat --repeat 10 hackbench 10
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.735
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.815
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.754
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.721
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.770
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.767
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.762
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.689
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.748
Running in process mode with 10 groups using 40 file descriptors each (== 400 tasks)
Each sender will pass 100 messages of 100 bytes
Time: 0.679

 Performance counter stats for 'hackbench 10  ' (10 runs):

   2838.930889 task-clock#3.343 CPUs utilized
 ( +-  1.81% )
 22301 context-switches  #0.008 M/sec
 ( +- 11.18% )
  3641 CPU-migrations#0.001 M/sec
 ( +- 11.13% )
 37950 page-faults   #0.013 M/sec
 ( +-  0.09% )
5714558403 cycles#2.013 GHz
 ( +-  1.59% ) [91.42%]
 0 stalled-cycles-frontend   #0.00% frontend cycles idle
 ( +-  0.00% ) [89.07%]
 0 stalled-cycles-backend#0.00% backend  cycles idle
 ( +-  0.00% ) [89.74%]
1868219180 instructions  #0.33  insns per cycle
 ( +-  0.82% ) [90.90%]
 193711678 branches  #   68.234 M/sec
 ( +-  1.44% ) [91.41%]
   4927373 branch-misses kerneltoast#2.54% of all branches
 ( +-  3.57% ) [87.20%]

   0.849242812 seconds time elapsed
 ( +-  1.58% )

Change-Id: I8744cc1f96fefa81149ded1c2dc54ff4d3b76665
  • Loading branch information
myfluxi authored and nikhil18 committed Dec 15, 2015
1 parent 4f6e168 commit 78d81c9
Showing 1 changed file with 0 additions and 27 deletions.
27 changes: 0 additions & 27 deletions drivers/cpufreq/cpu_input_boost.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ static unsigned int migration_boost_ms;
static unsigned int migration_load_threshold;
static unsigned int ib_nr_cpus_boosted;
static unsigned int ib_nr_cpus_to_boost;
static unsigned int sync_threshold;

/* Boost function for input boost (only for CPU0) */
static void boost_cpu0(unsigned int duration_ms)
Expand Down Expand Up @@ -342,9 +341,6 @@ static int boost_mig_sync_thread(void *data)
continue;
}

if (sync_threshold)
req_freq = min(sync_threshold, req_freq);

cancel_delayed_work_sync(&b->mig_boost_rem);

b->migration_freq = req_freq;
Expand Down Expand Up @@ -598,20 +594,6 @@ static ssize_t migration_load_threshold_write(struct device *dev,
return size;
}

static ssize_t sync_threshold_write(struct device *dev,
struct device_attribute *attr, const char *buf, size_t size)
{
unsigned int data;
int ret = sscanf(buf, "%u", &data);

if (ret != 1)
return -EINVAL;

sync_threshold = data;

return size;
}

static ssize_t enabled_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
Expand Down Expand Up @@ -649,12 +631,6 @@ static ssize_t migration_load_threshold_read(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%u\n", migration_load_threshold);
}

static ssize_t sync_threshold_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u\n", sync_threshold);
}

static DEVICE_ATTR(enabled, 0644,
enabled_read, enabled_write);
static DEVICE_ATTR(ib_freqs, 0644,
Expand All @@ -667,8 +643,6 @@ static DEVICE_ATTR(migration_boost_ms, 0644,
migration_boost_ms_read, migration_boost_ms_write);
static DEVICE_ATTR(migration_load_threshold, 0644,
migration_load_threshold_read, migration_load_threshold_write);
static DEVICE_ATTR(sync_threshold, 0644,
sync_threshold_read, sync_threshold_write);

static struct attribute *cpu_ib_attr[] = {
&dev_attr_enabled.attr,
Expand All @@ -677,7 +651,6 @@ static struct attribute *cpu_ib_attr[] = {
&dev_attr_load_based_syncs.attr,
&dev_attr_migration_boost_ms.attr,
&dev_attr_migration_load_threshold.attr,
&dev_attr_sync_threshold.attr,
NULL
};

Expand Down

0 comments on commit 78d81c9

Please sign in to comment.