Skip to content

Commit

Permalink
rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings
Browse files Browse the repository at this point in the history
When the rcutree.use_softirq kernel boot parameter is set to zero, all
RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads.
If these kthreads are being starved, quiescent states will not be
reported, which in turn means that the grace period will not end, which
can in turn trigger RCU CPU stall warnings.  This commit therefore dumps
stack traces of stalled CPUs' rcuc kthreads, which can help identify
what is preventing those kthreads from running.

Suggested-by: Ammar Faizi <[email protected]>
Reviewed-by: Ammar Faizi <[email protected]>
Signed-off-by: Zqiang <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
qiangzh3 authored and KenHV committed Apr 9, 2022
1 parent de96e55 commit e74615a
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 0 deletions.
3 changes: 3 additions & 0 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2852,10 +2852,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
int spincnt;

trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
for (spincnt = 0; spincnt < 10; spincnt++) {
WRITE_ONCE(*j, jiffies);
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
local_irq_disable();
Expand All @@ -2876,6 +2878,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
schedule_timeout_idle(2);
trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
*statusp = RCU_KTHREAD_WAITING;
WRITE_ONCE(*j, jiffies);
}

static struct smp_hotplug_thread rcu_cpu_thread_spec = {
Expand Down
1 change: 1 addition & 0 deletions kernel/rcu/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ struct rcu_data {
/* rcuc per-CPU kthread or NULL. */
unsigned int rcu_cpu_kthread_status;
char rcu_cpu_has_work;
unsigned long rcuc_activity;

/* 7) Diagnostic data, including RCU CPU stall warnings. */
unsigned int softirq_snap; /* Snapshot of softirq activity. */
Expand Down
3 changes: 3 additions & 0 deletions kernel/rcu/tree_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
*/
static void rcu_cpu_kthread_setup(unsigned int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
#ifdef CONFIG_RCU_BOOST
struct sched_param sp;

sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
#endif /* #ifdef CONFIG_RCU_BOOST */

WRITE_ONCE(rdp->rcuc_activity, jiffies);
}

#ifdef CONFIG_RCU_BOOST
Expand Down
35 changes: 35 additions & 0 deletions kernel/rcu/tree_stall.h
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
return j > 2 * HZ;
}

static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
{
unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);

if (jp)
*jp = j;
return j > 2 * HZ;
}

/*
* Print out diagnostic information for the specified stalled CPU.
*
Expand Down Expand Up @@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
falsepositive ? " (false positive?)" : "");
}

static void rcuc_kthread_dump(struct rcu_data *rdp)
{
int cpu;
unsigned long j;
struct task_struct *rcuc;

rcuc = rdp->rcu_cpu_kthread_task;
if (!rcuc)
return;

cpu = task_cpu(rcuc);
if (cpu_is_offline(cpu) || idle_cpu(cpu))
return;

if (!rcu_is_rcuc_kthread_starving(rdp, &j))
return;

pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
sched_show_task(rcuc);
if (!trigger_single_cpu_backtrace(cpu))
dump_cpu_task(cpu);
}

/* Complain about starvation of grace-period kthread. */
static void rcu_check_gp_kthread_starvation(void)
{
Expand Down Expand Up @@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
rcu_check_gp_kthread_expired_fqs_timer();
rcu_check_gp_kthread_starvation();

if (!use_softirq)
rcuc_kthread_dump(rdp);

rcu_dump_cpu_stacks();

raw_spin_lock_irqsave_rcu_node(rnp, flags);
Expand Down

0 comments on commit e74615a

Please sign in to comment.