From 3373954dbefe193824db6695b1fdf4c83cae5427 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 3 Feb 2023 00:29:02 +0000 Subject: [PATCH] maintain a list of processes sharing each vmspace Use the list to stop/start all threads and to scan all processes for horders. This has the side effect of removing a lock order reversal between the allproc lock and the proc lock in the start/stop code. It's also considerably more efficent since most vmspaces aren't shared. --- sys/kern/init_main.c | 3 ++ sys/kern/kern_cheri_revoke.c | 10 ++---- sys/kern/kern_proc.c | 54 ++++++++++++++++--------------- sys/sys/proc.h | 2 ++ sys/vm/vm_glue.c | 1 + sys/vm/vm_map.c | 62 ++++++++++++++++++++++++++++++++++-- sys/vm/vm_map.h | 8 +++++ 7 files changed, 105 insertions(+), 35 deletions(-) diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index cc18676383d0..a62344bfe5a3 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -627,7 +627,10 @@ proc0_init(void *dummy __unused) /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; + mtx_init(&vmspace0.vm_mtx, "vmspace", NULL, MTX_DEF); refcount_init(&vmspace0.vm_refcnt, 1); + LIST_INIT(&vmspace0.vm_proclist); + LIST_INSERT_HEAD(&vmspace0.vm_proclist, p, p_vm_proclist); pmap_pinit0(vmspace_pmap(&vmspace0)); /* diff --git a/sys/kern/kern_cheri_revoke.c b/sys/kern/kern_cheri_revoke.c index 05b64ab38537..aab79611925f 100644 --- a/sys/kern/kern_cheri_revoke.c +++ b/sys/kern/kern_cheri_revoke.c @@ -461,9 +461,8 @@ kern_cheri_revoke(struct thread *td, int flags, PROC_UNLOCK(td->td_proc); /* Per-thread kernel hoarders */ - FOREACH_PROC_IN_SYSTEM(proc) { - if (proc->p_vmspace != td->td_proc->p_vmspace) - continue; + LIST_FOREACH(proc, &td->td_proc->p_vmspace->vm_proclist, + p_vm_proclist) { FOREACH_THREAD_IN_PROC (proc, ptd) { cheri_revoke_td_frame(ptd, &vmcrc); sigaltstack_cheri_revoke(ptd, &vmcrc); @@ -472,11 +471,8 @@ kern_cheri_revoke(struct thread *td, int flags, } /* Per-process kernel hoarders */ - FOREACH_PROC_IN_SYSTEM(proc) { - if (proc->p_vmspace != td->td_proc->p_vmspace) - continue; + LIST_FOREACH(proc, &td->td_proc->p_vmspace->vm_proclist, p_vm_proclist) cheri_revoke_hoarders(proc, &vmcrc); - } switch(myst) { default: diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 58da44ab7b3e..ad830b0f2f87 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -3862,6 +3862,7 @@ void stop_vmspace_proc(void) { struct proc *cp, *p; + struct vmspace *vm; int r, gen; bool restart, seen_stopped, seen_exiting, stopped_some; @@ -3871,23 +3872,23 @@ stop_vmspace_proc(void) */ cp = curproc; -allproc_loop: - sx_xlock(&allproc_lock); + vm = cp->p_vmspace; +vmspace_loop: + VMSPACE_LOCK(vm); + /* + * XXX: allproc_gen should work, but should vmspaces have a generation? + */ gen = allproc_gen; seen_exiting = seen_stopped = stopped_some = restart = false; - LIST_REMOVE(cp, p_list); - LIST_INSERT_HEAD(&allproc, cp, p_list); + LIST_REMOVE(cp, p_vm_proclist); + LIST_INSERT_HEAD(&vm->vm_proclist, cp, p_vm_proclist); for (;;) { - p = LIST_NEXT(cp, p_list); + p = LIST_NEXT(cp, p_vm_proclist); if (p == NULL) break; - LIST_REMOVE(cp, p_list); - LIST_INSERT_AFTER(p, cp, p_list); + LIST_REMOVE(cp, p_vm_proclist); + LIST_INSERT_AFTER(p, cp, p_vm_proclist); PROC_LOCK(p); - if (p->p_vmspace != cp->p_vmspace) { - PROC_UNLOCK(p); - continue; - } if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) { PROC_UNLOCK(p); continue; @@ -3909,9 +3910,8 @@ stop_vmspace_proc(void) PROC_UNLOCK(p); continue; } - sx_xunlock(&allproc_lock); + VMSPACE_UNLOCK(vm); _PHOLD(p); - printf("%s: suspending pid %d\n", __func__, p->p_pid); r = thread_single(p, SINGLE_VMSPACE); if (r != 0) restart = true; @@ -3919,15 +3919,15 @@ stop_vmspace_proc(void) stopped_some = true; _PRELE(p); PROC_UNLOCK(p); - sx_xlock(&allproc_lock); + VMSPACE_LOCK(vm); } /* Catch forked children we did not see in iteration. */ if (gen != allproc_gen) restart = true; - sx_xunlock(&allproc_lock); + VMSPACE_UNLOCK(vm); if (restart || stopped_some || seen_exiting || seen_stopped) { kern_yield(PRI_USER); - goto allproc_loop; + goto vmspace_loop; } } @@ -3935,31 +3935,33 @@ void resume_vmspace_proc(void) { struct proc *cp, *p; + struct vmspace *vm; cp = curproc; - sx_xlock(&allproc_lock); + vm = cp->p_vmspace; + + VMSPACE_LOCK(vm); again: - LIST_REMOVE(cp, p_list); - LIST_INSERT_HEAD(&allproc, cp, p_list); + LIST_REMOVE(cp, p_vm_proclist); + LIST_INSERT_HEAD(&vm->vm_proclist, cp, p_vm_proclist); for (;;) { - p = LIST_NEXT(cp, p_list); + p = LIST_NEXT(cp, p_vm_proclist); if (p == NULL) break; - LIST_REMOVE(cp, p_list); - LIST_INSERT_AFTER(p, cp, p_list); + LIST_REMOVE(cp, p_vm_proclist); + LIST_INSERT_AFTER(p, cp, p_vm_proclist); PROC_LOCK(p); if (p->p_vmspace != cp->p_vmspace) { PROC_UNLOCK(p); continue; } if ((p->p_flag & P_TOTAL_STOP) != 0) { - sx_xunlock(&allproc_lock); + VMSPACE_UNLOCK(vm); _PHOLD(p); - printf("%s: pid %d\n", __func__, p->p_pid); thread_single_end(p, SINGLE_VMSPACE); _PRELE(p); PROC_UNLOCK(p); - sx_xlock(&allproc_lock); + VMSPACE_LOCK(vm); } else { PROC_UNLOCK(p); } @@ -3970,7 +3972,7 @@ resume_vmspace_proc(void) if ((p->p_flag & P_TOTAL_STOP) != 0) goto again; } - sx_xunlock(&allproc_lock); + VMSPACE_UNLOCK(vm); /* See "XXX-BD: Prevent multiple callers on a vmspace" above */ } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 2e3e2a777371..ac968463904b 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -682,6 +682,8 @@ struct proc { (if I am reaper). */ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of the same reaper. */ + LIST_ENTRY(proc) p_vm_proclist; /* (b) List of processes sharing + p_vmspace */ struct mtx p_mtx; /* (n) Lock for this struct. */ struct mtx p_statmtx; /* Lock for the stats */ struct mtx p_itimmtx; /* Lock for the virt/prof timers */ diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index a110bbf483a5..d061c9171021 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -585,6 +585,7 @@ vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2, if (p1->p_vmspace->vm_shm) shmfork(p1, p2); } + vmspace_insert_proc(p2->p_vmspace, p2); /* * cpu_fork will copy and update the pcb, set up the kernel stack, diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index a1936e668cff..31c833f3cf0b 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -387,6 +387,7 @@ vmspace_zinit(void *mem, int size, int flags) vm_map_t map; vm = (struct vmspace *)mem; + mtx_init(&vm->vm_mtx, "vmspace", NULL, MTX_DEF); map = &vm->vm_map; memset(map, 0, sizeof(*map)); @@ -435,6 +436,7 @@ vmspace_alloc(vm_pointer_t min, vm_pointer_t max, pmap_pinit_t pinit) CTR1(KTR_VM, "vmspace_alloc: %p", vm); _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max); refcount_init(&vm->vm_refcnt, 1); + LIST_INIT(&vm->vm_proclist); vm->vm_shm = NULL; vm->vm_swrss = 0; vm->vm_tsize = 0; @@ -473,6 +475,8 @@ vmspace_dofree(struct vmspace *vm) */ shmexit(vm); + KASSERT(LIST_EMPTY(&vm->vm_proclist), ("vm_proclist isn't empty")); + /* * Lock the map, to wait out all other references to it. * Delete all of the mappings and pages they hold, then call @@ -505,10 +509,49 @@ vmspace_exitfree(struct proc *p) vm = p->p_vmspace; p->p_vmspace = NULL; PROC_VMSPACE_UNLOCK(p); + vmspace_remove_proc(vm, p); KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace")); vmspace_free(vm); } +void +vmspace_insert_proc(struct vmspace *vm, struct proc *p) +{ +#ifdef INVARIANTS + struct proc *proc; + + LIST_FOREACH(proc, &vm->vm_proclist, p_vm_proclist) + KASSERT(proc != p, ("proc %d is already in vm_proclist", + p->p_pid)); +#endif + VMSPACE_LOCK(vm); + LIST_INSERT_HEAD(&vm->vm_proclist, p, p_vm_proclist); + VMSPACE_UNLOCK(vm); +} + +void +vmspace_remove_proc(struct vmspace *vm, struct proc *p) +{ +#ifdef INVARIANTS + struct proc *proc; + bool found = false; +#endif + + VMSPACE_LOCK(vm); +#ifdef INVARIANTS + LIST_FOREACH(proc, &vm->vm_proclist, p_vm_proclist) { + if (proc == p) { + found = true; + break; + } + } + KASSERT(found, ("proc %d not in vm_proclist", p->p_pid)); +#endif + if (!LIST_EMPTY(&vm->vm_proclist)) + LIST_REMOVE(p, p_vm_proclist); + VMSPACE_UNLOCK(vm); +} + void vmspace_exit(struct thread *td) { @@ -529,9 +572,11 @@ vmspace_exit(struct thread *td) refcount_acquire(&vmspace0.vm_refcnt); if (!(released = refcount_release_if_last(&vm->vm_refcnt))) { if (p->p_vmspace != &vmspace0) { + vmspace_remove_proc(p->p_vmspace, p); PROC_VMSPACE_LOCK(p); p->p_vmspace = &vmspace0; PROC_VMSPACE_UNLOCK(p); + vmspace_insert_proc(p->p_vmspace, p); pmap_activate(td); } released = refcount_release(&vm->vm_refcnt); @@ -542,15 +587,19 @@ vmspace_exit(struct thread *td) * back first if necessary. */ if (p->p_vmspace != vm) { + vmspace_remove_proc(p->p_vmspace, p); PROC_VMSPACE_LOCK(p); p->p_vmspace = vm; PROC_VMSPACE_UNLOCK(p); + vmspace_insert_proc(p->p_vmspace, p); pmap_activate(td); } pmap_remove_pages(vmspace_pmap(vm)); + vmspace_remove_proc(p->p_vmspace, p); PROC_VMSPACE_LOCK(p); p->p_vmspace = &vmspace0; PROC_VMSPACE_UNLOCK(p); + vmspace_insert_proc(p->p_vmspace, p); pmap_activate(td); vmspace_dofree(vm); } @@ -601,21 +650,26 @@ void vmspace_switch_aio(struct vmspace *newvm) { struct vmspace *oldvm; + struct proc *p; /* XXX: Need some way to assert that this is an aio daemon. */ KASSERT(refcount_load(&newvm->vm_refcnt) > 0, ("vmspace_switch_aio: newvm unreferenced")); - oldvm = curproc->p_vmspace; + p = curproc; + + oldvm = p->p_vmspace; if (oldvm == newvm) return; /* * Point to the new address space and refer to it. */ - curproc->p_vmspace = newvm; + vmspace_remove_proc(oldvm, p); + p->p_vmspace = newvm; refcount_acquire(&newvm->vm_refcnt); + vmspace_insert_proc(newvm, p); /* Activate the new mapping. */ pmap_activate(curthread); @@ -5722,9 +5776,11 @@ vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser) * run it down. Even though there is little or no chance of blocking * here, it is a good idea to keep this form for future mods. */ + vmspace_remove_proc(p->p_vmspace, p); PROC_VMSPACE_LOCK(p); p->p_vmspace = newvmspace; PROC_VMSPACE_UNLOCK(p); + vmspace_insert_proc(p->p_vmspace, p); if (p == curthread->td_proc) pmap_activate(curthread); curthread->td_pflags |= TDP_EXECVMSPC; @@ -5756,9 +5812,11 @@ vmspace_unshare(struct proc *p) vmspace_free(newvmspace); return (ENOMEM); } + vmspace_remove_proc(oldvmspace, p); PROC_VMSPACE_LOCK(p); p->p_vmspace = newvmspace; PROC_VMSPACE_UNLOCK(p); + vmspace_insert_proc(newvmspace, p); if (p == curthread->td_proc) pmap_activate(curthread); vmspace_free(oldvmspace); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 6cab5d55023e..0208042abde4 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -369,6 +369,9 @@ struct vmspace { vm_offset_t vm_maxsaddr; /* user VA at max stack growth */ vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */ uintcap_t vm_shp_base; /* shared page pointer */ + + struct mtx vm_mtx; + LIST_HEAD(, proc) vm_proclist; /* processes sharing this vmspace */ u_int vm_refcnt; /* number of references */ /* * Keep the PMAP last, so that CPU-specific variations of that @@ -378,6 +381,9 @@ struct vmspace { struct pmap vm_pmap; /* private physical map */ }; +#define VMSPACE_LOCK(vm) mtx_lock(&(vm)->vm_mtx) +#define VMSPACE_UNLOCK(vm) mtx_unlock(&(vm)->vm_mtx) + #ifdef _KERNEL static __inline pmap_t vmspace_pmap(struct vmspace *vmspace) @@ -431,6 +437,8 @@ bool vm_map_range_valid_KBI(vm_map_t map, vm_offset_t start, vm_offset_t end); _vm_map_lock_downgrade(map, LOCK_FILE, LOCK_LINE) long vmspace_resident_count(struct vmspace *vmspace); +void vmspace_insert_proc(struct vmspace *vm, struct proc *p); +void vmspace_remove_proc(struct vmspace *vm, struct proc *p); #endif /* _KERNEL */ /*