Skip to content

Commit

Permalink
BACKPORT: psi: Fix uaf issue when psi trigger is destroyed while bein…
Browse files Browse the repository at this point in the history
…g polled

commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream.

With write operation on psi files replacing old trigger with a new one,
the lifetime of its waitqueue is totally arbitrary. Overwriting an
existing trigger causes its waitqueue to be freed and pending poll()
will stumble on trigger->event_wait which was destroyed.
Fix this by disallowing to redefine an existing psi trigger. If a write
operation is used on a file descriptor with an already existing psi
trigger, the operation will fail with EBUSY error.
Also bypass a check for psi_disabled in the psi_trigger_destroy as the
flag can be flipped after the trigger is created, leading to a memory
leak.

Fixes: 0e94682b73bf ("psi: introduce psi monitor")
Reported-by: [email protected]
Suggested-by: Linus Torvalds <[email protected]>
Analyzed-by: Eric Biggers <[email protected]>
Signed-off-by: Suren Baghdasaryan <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Eric Biggers <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Cc: [email protected]
Link: https://lore.kernel.org/r/[email protected]
[surenb: backported to 5.10 kernel]
CC: [email protected] # 5.10
Signed-off-by: Suren Baghdasaryan <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

Conflicts:
        include/linux/psi.h
        kernel/cgroup/cgroup.c
        kernel/sched/psi.c

1. Resolved trivial merge conflicts.

Bug: 233410456
Signed-off-by: Suren Baghdasaryan <[email protected]>
Change-Id: I7143fef51b874c2df8d792808b6a9b666eec2c7b
  • Loading branch information
surenbaghdasaryan committed Jun 13, 2022
1 parent 37bc600 commit 7688024
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 44 deletions.
2 changes: 1 addition & 1 deletion include/linux/psi.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);

struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res);
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
void psi_trigger_destroy(struct psi_trigger *t);

__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);
Expand Down
3 changes: 0 additions & 3 deletions include/linux/psi_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,6 @@ struct psi_trigger {
* events to one per window
*/
u64 last_event_time;

/* Refcounting to prevent premature destruction */
struct kref refcount;
};

struct psi_group {
Expand Down
11 changes: 8 additions & 3 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -3530,14 +3530,19 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
cgroup_get(cgrp);
cgroup_kn_unlock(of->kn);

/* Allow only one trigger per file descriptor */
if (of->priv) {
cgroup_put(cgrp);
return -EBUSY;
}

new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
}

psi_trigger_replace(&of->priv, new);

smp_store_release(&of->priv, new);
cgroup_put(cgrp);

return nbytes;
Expand Down Expand Up @@ -3572,7 +3577,7 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,

static void cgroup_pressure_release(struct kernfs_open_file *of)
{
psi_trigger_replace(&of->priv, NULL);
psi_trigger_destroy(of->priv);
}

bool cgroup_psi_enabled(void)
Expand Down
66 changes: 29 additions & 37 deletions kernel/sched/psi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->event = 0;
t->last_event_time = 0;
init_waitqueue_head(&t->event_wait);
kref_init(&t->refcount);

mutex_lock(&group->trigger_lock);

Expand Down Expand Up @@ -1083,15 +1082,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
return t;
}

static void psi_trigger_destroy(struct kref *ref)
void psi_trigger_destroy(struct psi_trigger *t)
{
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
struct psi_group *group = t->group;
struct psi_group *group;
struct kthread_worker *kworker_to_destroy = NULL;

if (static_branch_likely(&psi_disabled))
/*
* We do not check psi_disabled since it might have been disabled after
* the trigger got created.
*/
if (!t)
return;

group = t->group;
/*
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
* from under a polling process.
Expand Down Expand Up @@ -1126,9 +1129,9 @@ static void psi_trigger_destroy(struct kref *ref)
mutex_unlock(&group->trigger_lock);

/*
* Wait for both *trigger_ptr from psi_trigger_replace and
* poll_kworker RCUs to complete their read-side critical sections
* before destroying the trigger and optionally the poll_kworker
* Wait for psi_schedule_poll_work RCU to complete its read-side
* critical section before destroying the trigger and optionally the
* poll_task.
*/
synchronize_rcu();
/*
Expand All @@ -1150,18 +1153,6 @@ static void psi_trigger_destroy(struct kref *ref)
kfree(t);
}

void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
{
struct psi_trigger *old = *trigger_ptr;

if (static_branch_likely(&psi_disabled))
return;

rcu_assign_pointer(*trigger_ptr, new);
if (old)
kref_put(&old->refcount, psi_trigger_destroy);
}

__poll_t psi_trigger_poll(void **trigger_ptr,
struct file *file, poll_table *wait)
{
Expand All @@ -1171,24 +1162,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (static_branch_likely(&psi_disabled))
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;

rcu_read_lock();

t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
if (!t) {
rcu_read_unlock();
t = smp_load_acquire(trigger_ptr);
if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
}
kref_get(&t->refcount);

rcu_read_unlock();

poll_wait(file, &t->event_wait, wait);

if (cmpxchg(&t->event, 1, 0) == 1)
ret |= EPOLLPRI;

kref_put(&t->refcount, psi_trigger_destroy);

return ret;
}

Expand All @@ -1212,14 +1194,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,

buf[buf_size - 1] = '\0';

new = psi_trigger_create(&psi_system, buf, nbytes, res);
if (IS_ERR(new))
return PTR_ERR(new);

seq = file->private_data;

/* Take seq->lock to protect seq->private from concurrent writes */
mutex_lock(&seq->lock);
psi_trigger_replace(&seq->private, new);

/* Allow only one trigger per file descriptor */
if (seq->private) {
mutex_unlock(&seq->lock);
return -EBUSY;
}

new = psi_trigger_create(&psi_system, buf, nbytes, res);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
}

smp_store_release(&seq->private, new);
mutex_unlock(&seq->lock);

return nbytes;
Expand Down Expand Up @@ -1254,7 +1246,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;

psi_trigger_replace(&seq->private, NULL);
psi_trigger_destroy(seq->private);
return single_release(inode, file);
}

Expand Down

0 comments on commit 7688024

Please sign in to comment.