Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
scx: Dump debug info after an abort
Browse files Browse the repository at this point in the history
When a scx scheduler gets aborted, it's difficult to tell what the system
was doing after the fact as normal operation is restored by reverting to the
default scheduler. Let's capture runqueue and runnable task states in a
debug dump buffer to aid debugging.

Signed-off-by: Tejun Heo <[email protected]>
  • Loading branch information
htejun committed Jan 23, 2024
1 parent 47ae206 commit 986e8e7
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 1 deletion.
3 changes: 3 additions & 0 deletions include/linux/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ struct scx_exit_info {

/* informational message */
char *msg;

/* debug dump */
char *dump;
};

/* sched_ext_ops.flags */
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/build_policy.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <linux/livepatch.h>
#include <linux/pm.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
#include <linux/seqlock_api.h>
#include <linux/slab.h>
#include <linux/suspend.h>
Expand Down
110 changes: 109 additions & 1 deletion kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ enum scx_internal_consts {

SCX_EXIT_BT_LEN = 64,
SCX_EXIT_MSG_LEN = 1024,
SCX_EXIT_DUMP_LEN = 32768,
};

enum scx_ops_enable_state {
Expand Down Expand Up @@ -3220,6 +3221,7 @@ static void scx_ops_bypass(bool bypass)

static void free_exit_info(struct scx_exit_info *ei)
{
kfree(ei->dump);
kfree(ei->msg);
kfree(ei->bt);
kfree(ei);
Expand All @@ -3235,8 +3237,9 @@ static struct scx_exit_info *alloc_exit_info(void)

ei->bt = kcalloc(sizeof(ei->bt[0]), SCX_EXIT_BT_LEN, GFP_KERNEL);
ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL);
ei->dump = kzalloc(SCX_EXIT_DUMP_LEN, GFP_KERNEL);

if (!ei->bt || !ei->msg) {
if (!ei->bt || !ei->msg || !ei->dump) {
free_exit_info(ei);
return NULL;
}
Expand Down Expand Up @@ -3437,8 +3440,106 @@ static void scx_ops_disable(enum scx_exit_kind kind)
schedule_scx_ops_disable_work();
}

static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker,
unsigned long now)
{
static unsigned long bt[SCX_EXIT_BT_LEN];
char dsq_id_buf[19] = "(n/a)";
unsigned long ops_state = atomic_long_read(&p->scx.ops_state);
unsigned int bt_len;
size_t avail, used;
char *buf;

if (p->scx.dsq)
scnprintf(dsq_id_buf, sizeof(dsq_id_buf), "0x%llx",
(unsigned long long)p->scx.dsq->id);

seq_buf_printf(s, "\n %c%c %-16s: pid=%d state/flags=%u/0x%x dsq_flags=0x%x\n",
marker, task_state_to_char(p), p->comm, p->pid,
scx_get_task_state(p),
p->scx.flags & ~SCX_TASK_STATE_MASK,
p->scx.dsq_flags);
seq_buf_printf(s, "%*sops_state/qseq=%lu/%lu run_at=%+ldms\n", 22, "",
ops_state & SCX_OPSS_STATE_MASK,
ops_state >> SCX_OPSS_QSEQ_SHIFT,
jiffies_delta_msecs(p->scx.runnable_at, now));
seq_buf_printf(s, "%*sdsq_id=%s sticky/holding_cpu=%d/%d\n", 22, "",
dsq_id_buf, p->scx.sticky_cpu, p->scx.holding_cpu);

bt_len = stack_trace_save_tsk(p, bt, SCX_EXIT_BT_LEN, 1);

avail = seq_buf_get_buf(s, &buf);
used = stack_trace_snprint(buf, avail, bt, bt_len, 3);
seq_buf_commit(s, used < avail ? used : -1);
}

static void scx_dump_state(struct scx_exit_info *ei)
{
const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
unsigned long now = jiffies;
struct seq_buf s;
size_t avail, used;
char *buf;
int cpu;

seq_buf_init(&s, ei->dump, SCX_EXIT_DUMP_LEN - sizeof(trunc_marker));

seq_buf_printf(&s, "%s[%d] triggered exit kind %d:\n %s (%s)\n\n",
current->comm, current->pid, ei->kind, ei->reason, ei->msg);
seq_buf_printf(&s, "Backtrace:\n");
avail = seq_buf_get_buf(&s, &buf);
used = stack_trace_snprint(buf, avail, ei->bt, ei->bt_len, 1);
seq_buf_commit(&s, used < avail ? used : -1);

seq_buf_printf(&s, "\nRunqueue states\n");
seq_buf_printf(&s, "---------------\n");

for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
struct task_struct *p;

rq_lock(rq, &rf);

if (list_empty(&rq->scx.runnable_list) &&
rq->curr->sched_class == &idle_sched_class)
goto next;

seq_buf_printf(&s, "\nCPU %-4d: nr_run=%u flags=0x%x cpu_rel=%d ops_qseq=%lu pnt_seq=%lu\n",
cpu, rq->scx.nr_running, rq->scx.flags,
rq->scx.cpu_released, rq->scx.ops_qseq,
rq->scx.pnt_seq);
seq_buf_printf(&s, " curr=%s[%d] class=%ps\n",
rq->curr->comm, rq->curr->pid,
rq->curr->sched_class);
if (!cpumask_empty(rq->scx.cpus_to_kick))
seq_buf_printf(&s, " cpus_to_kick : %*pb\n",
cpumask_pr_args(rq->scx.cpus_to_kick));
if (!cpumask_empty(rq->scx.cpus_to_preempt))
seq_buf_printf(&s, " cpus_to_preempt: %*pb\n",
cpumask_pr_args(rq->scx.cpus_to_preempt));
if (!cpumask_empty(rq->scx.cpus_to_wait))
seq_buf_printf(&s, " cpus_to_wait : %*pb\n",
cpumask_pr_args(rq->scx.cpus_to_wait));

if (rq->curr->sched_class == &ext_sched_class)
scx_dump_task(&s, rq->curr, '*', now);

list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node)
scx_dump_task(&s, p, ' ', now);
next:
rq_unlock(rq, &rf);
}

if (seq_buf_has_overflowed(&s)) {
used = strlen(seq_buf_str(&s));
memcpy(ei->dump + used, trunc_marker, sizeof(trunc_marker));
}
}

static void scx_ops_error_irq_workfn(struct irq_work *irq_work)
{
scx_dump_state(scx_exit_info);
schedule_scx_ops_disable_work();
}

Expand All @@ -3460,6 +3561,13 @@ __printf(2, 3) void scx_ops_error_kind(enum scx_exit_kind kind,
vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
va_end(args);

/*
* Set ei->kind and ->reason for scx_dump_state(). They'll be set again
* in scx_ops_disable_workfn().
*/
ei->kind = kind;
ei->reason = scx_exit_reason(ei->kind);

irq_work_queue(&scx_ops_error_irq_work);
}

Expand Down

0 comments on commit 986e8e7

Please sign in to comment.