Skip to content

Commit

Permalink
rcu: Correctly handle sparse possible cpus
Browse files Browse the repository at this point in the history
In many cases in the RCU tree code, we iterate over the set of cpus for
a leaf node described by rcu_node::grplo and rcu_node::grphi, checking
per-cpu data for each cpu in this range. However, if the set of possible
cpus is sparse, some cpus described in this range are not possible, and
thus no per-cpu region will have been allocated (or initialised) for
them by the generic percpu code.

Erroneous accesses to a per-cpu area for these !possible cpus may fault
or may hit other data depending on the addressed generated when the
erroneous per cpu offset is applied. In practice, both cases have been
observed on arm64 hardware (the former being silent, but detectable with
additional patches).

To avoid issues resulting from this, we must iterate over the set of
*possible* cpus for a given leaf node. This patch add a new helper,
for_each_leaf_node_possible_cpu, to enable this. As iteration is often
intertwined with rcu_node local bitmask manipulation, a new
leaf_node_cpu_bit helper is added to make this simpler and more
consistent. The RCU tree code is made to use both of these where
appropriate.

Without this patch, running reboot at a shell can result in an oops
like:

[ 3369.075979] Unable to handle kernel paging request at virtual address ffffff8008b21b4c
[ 3369.083881] pgd = ffffffc3ecdda000
[ 3369.087270] [ffffff8008b21b4c] *pgd=00000083eca48003, *pud=00000083eca48003, *pmd=0000000000000000
[ 3369.096222] Internal error: Oops: 96000007 [#1] PREEMPT SMP
[ 3369.101781] Modules linked in:
[ 3369.104825] CPU: 2 PID: 1817 Comm: NetworkManager Tainted: G        W       4.6.0+ #3
[ 3369.121239] task: ffffffc0fa13e000 ti: ffffffc3eb940000 task.ti: ffffffc3eb940000
[ 3369.128708] PC is at sync_rcu_exp_select_cpus+0x188/0x510
[ 3369.134094] LR is at sync_rcu_exp_select_cpus+0x104/0x510
[ 3369.139479] pc : [<ffffff80081109a8>] lr : [<ffffff8008110924>] pstate: 200001c5
[ 3369.146860] sp : ffffffc3eb9435a0
[ 3369.150162] x29: ffffffc3eb9435a0 x28: ffffff8008be4f88
[ 3369.155465] x27: ffffff8008b66c80 x26: ffffffc3eceb2600
[ 3369.160767] x25: 0000000000000001 x24: ffffff8008be4f88
[ 3369.166070] x23: ffffff8008b51c3c x22: ffffff8008b66c80
[ 3369.171371] x21: 0000000000000001 x20: ffffff8008b21b40
[ 3369.176673] x19: ffffff8008b66c80 x18: 0000000000000000
[ 3369.181975] x17: 0000007fa951a010 x16: ffffff80086a30f0
[ 3369.187278] x15: 0000007fa9505590 x14: 0000000000000000
[ 3369.192580] x13: ffffff8008b51000 x12: ffffffc3eb940000
[ 3369.197882] x11: 0000000000000006 x10: ffffff8008b51b78
[ 3369.203184] x9 : 0000000000000001 x8 : ffffff8008be4000
[ 3369.208486] x7 : ffffff8008b21b40 x6 : 0000000000001003
[ 3369.213788] x5 : 0000000000000000 x4 : ffffff8008b27280
[ 3369.219090] x3 : ffffff8008b21b4c x2 : 0000000000000001
[ 3369.224406] x1 : 0000000000000001 x0 : 0000000000000140
...
[ 3369.972257] [<ffffff80081109a8>] sync_rcu_exp_select_cpus+0x188/0x510
[ 3369.978685] [<ffffff80081128b4>] synchronize_rcu_expedited+0x64/0xa8
[ 3369.985026] [<ffffff80086b987c>] synchronize_net+0x24/0x30
[ 3369.990499] [<ffffff80086ddb54>] dev_deactivate_many+0x28c/0x298
[ 3369.996493] [<ffffff80086b6bb8>] __dev_close_many+0x60/0xd0
[ 3370.002052] [<ffffff80086b6d48>] __dev_close+0x28/0x40
[ 3370.007178] [<ffffff80086bf62c>] __dev_change_flags+0x8c/0x158
[ 3370.012999] [<ffffff80086bf718>] dev_change_flags+0x20/0x60
[ 3370.018558] [<ffffff80086cf7f0>] do_setlink+0x288/0x918
[ 3370.023771] [<ffffff80086d0798>] rtnl_newlink+0x398/0x6a8
[ 3370.029158] [<ffffff80086cee84>] rtnetlink_rcv_msg+0xe4/0x220
[ 3370.034891] [<ffffff80086e274c>] netlink_rcv_skb+0xc4/0xf8
[ 3370.040364] [<ffffff80086ced8c>] rtnetlink_rcv+0x2c/0x40
[ 3370.045663] [<ffffff80086e1fe8>] netlink_unicast+0x160/0x238
[ 3370.051309] [<ffffff80086e24b8>] netlink_sendmsg+0x2f0/0x358
[ 3370.056956] [<ffffff80086a0070>] sock_sendmsg+0x18/0x30
[ 3370.062168] [<ffffff80086a21cc>] ___sys_sendmsg+0x26c/0x280
[ 3370.067728] [<ffffff80086a30ac>] __sys_sendmsg+0x44/0x88
[ 3370.073027] [<ffffff80086a3100>] SyS_sendmsg+0x10/0x20
[ 3370.078153] [<ffffff8008085e70>] el0_svc_naked+0x24/0x28

Signed-off-by: Mark Rutland <[email protected]>
Reported-by: Dennis Chen <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Josh Triplett <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: Steve Capper <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: [email protected]
Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
Mark Rutland authored and paulmck committed Jun 6, 2016
1 parent 06f6445 commit 9cd5dc5
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 23 deletions.
21 changes: 9 additions & 12 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1287,9 +1287,9 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
if (rnp->qsmask & (1UL << cpu))
dump_cpu_task(rnp->grplo + cpu);
for_each_leaf_node_possible_cpu(rnp, cpu)
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
dump_cpu_task(cpu);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
Expand Down Expand Up @@ -1361,10 +1361,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
if (rnp->qsmask & (1UL << cpu)) {
print_cpu_stall_info(rsp,
rnp->grplo + cpu);
for_each_leaf_node_possible_cpu(rnp, cpu)
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
print_cpu_stall_info(rsp, cpu);
ndetected++;
}
}
Expand Down Expand Up @@ -2885,7 +2884,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
unsigned long *maxj),
bool *isidle, unsigned long *maxj)
{
unsigned long bit;
int cpu;
unsigned long flags;
unsigned long mask;
Expand Down Expand Up @@ -2920,9 +2918,8 @@ static void force_qs_rnp(struct rcu_state *rsp,
continue;
}
}
cpu = rnp->grplo;
bit = 1;
for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
for_each_leaf_node_possible_cpu(rnp, cpu) {
unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
if ((rnp->qsmask & bit) != 0) {
if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
mask |= bit;
Expand Down Expand Up @@ -3751,7 +3748,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)

/* Set up local state, ensuring consistent view of global state. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
Expand Down
15 changes: 15 additions & 0 deletions kernel/rcu/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,13 @@ struct rcu_node {
wait_queue_head_t exp_wq[4];
} ____cacheline_internodealigned_in_smp;

/*
* Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
* are indexed relative to this interval rather than the global CPU ID space.
* This generates the bit for a CPU in node-local masks.
*/
#define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo))

/*
* Do a full breadth-first scan of the rcu_node structures for the
* specified rcu_state structure.
Expand Down Expand Up @@ -280,6 +287,14 @@ struct rcu_node {
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)

/*
* Iterate over all possible CPUs in a leaf RCU node.
*/
#define for_each_leaf_node_possible_cpu(rnp, cpu) \
for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
cpu <= rnp->grphi; \
cpu = cpumask_next((cpu), cpu_possible_mask))

/*
* Union to allow "aggregate OR" operation on the need for a quiescent
* state by the normal and expedited grace periods.
Expand Down
16 changes: 7 additions & 9 deletions kernel/rcu/tree_exp.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,6 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
{
int cpu;
unsigned long flags;
unsigned long mask;
unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi;
int ret;
Expand All @@ -356,7 +355,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,

/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
for_each_leaf_node_possible_cpu(rnp, cpu) {
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

Expand All @@ -376,8 +375,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);

/* IPI the remaining CPUs for expedited quiescent state. */
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
for_each_leaf_node_possible_cpu(rnp, cpu) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
if (!(mask_ofl_ipi & mask))
continue;
retry_ipi:
Expand Down Expand Up @@ -440,10 +439,10 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
ndetected = 0;
rcu_for_each_leaf_node(rsp, rnp) {
ndetected += rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
for_each_leaf_node_possible_cpu(rnp, cpu) {
struct rcu_data *rdp;

mask = leaf_node_cpu_bit(rnp, cpu);
if (!(rnp->expmask & mask))
continue;
ndetected++;
Expand All @@ -453,7 +452,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
"o."[!!(rdp->grpmask & rnp->expmaskinit)],
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
}
mask <<= 1;
}
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
jiffies - jiffies_start, rsp->expedited_sequence,
Expand All @@ -473,8 +471,8 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
pr_cont("\n");
}
rcu_for_each_leaf_node(rsp, rnp) {
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
for_each_leaf_node_possible_cpu(rnp, cpu) {
mask = leaf_node_cpu_bit(rnp, cpu);
if (!(rnp->expmask & mask))
continue;
dump_cpu_task(cpu);
Expand Down
5 changes: 3 additions & 2 deletions kernel/rcu/tree_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1164,8 +1164,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
return;
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
if ((mask & 0x1) && cpu != outgoingcpu)
for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
if (cpumask_weight(cm) == 0)
cpumask_setall(cm);
Expand Down

0 comments on commit 9cd5dc5

Please sign in to comment.