Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

several caprevoke optimizations #2195

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions sys/vm/swap_pager.c
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,16 @@
modpi = pindex % SWAP_META_PAGES;
memcpy(&dstsb->t[modpi], &sb->t[srcmodpi], sizeof(sb->t[srcmodpi]));
}

static bool
swp_pager_cheri_has_tags(struct swblk *sb, int p)
{
for (int i = 0; i < TAG_WORDS_PER_PAGE; i++) {
if (sb->t[p][i] != 0)
return (true);
}
return (false);
}
#endif

static bool
Expand Down Expand Up @@ -3341,6 +3351,45 @@
object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
VM_OBJECT_WUNLOCK(object);
}

#ifdef CHERI_CAPREVOKE
/*
* Return the smallest pindex greater than or equal to "pindex" for which at
* least one of the following applies:
* 1) there is a resident page
* 2) there is a swap block with at least one capability tag.
* During a revocation scan, there is no need to page in until that point.
*/
vm_pindex_t
swap_pager_cheri_revoke_next(vm_object_t object, vm_pindex_t pindex)
{
vm_page_t m_next;
struct swblk *sb;
vm_pindex_t respindex, swpindex;

VM_OBJECT_ASSERT_LOCKED(object);

m_next = vm_page_find_least(object, pindex);
respindex = m_next != NULL ? m_next->pindex : object->size;

for (swpindex = rounddown2(pindex, SWAP_META_PAGES);;

Check failure on line 3375 in sys/vm/swap_pager.c

View workflow job for this annotation

GitHub Actions / Style Checker

superfluous trailing semicolon
swpindex = sb->p + SWAP_META_PAGES) {
sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
swpindex);
if (sb == NULL || sb->p >= respindex)
break;
for (int i = 0; i < SWAP_META_PAGES; i++) {
if (sb->p + i >= pindex && sb->p + i < respindex &&
sb->d[i] != SWAPBLK_NONE &&
swp_pager_cheri_has_tags(sb, i))
return (sb->p + i);
}
}

return (respindex);
}
#endif

// CHERI CHANGES START
// {
// "updated": 20230509,
Expand Down
6 changes: 6 additions & 0 deletions sys/vm/swap_pager.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,11 @@ u_long swap_pager_swapped_pages(vm_object_t object);
void swapoff_all(void);
bool swap_pager_init_object(vm_object_t object, void *handle,
struct ucred *cred, vm_ooffset_t size, vm_ooffset_t offset);

#ifdef CHERI_CAPREVOKE
vm_pindex_t swap_pager_cheri_revoke_next(vm_object_t object,
vm_pindex_t pindex);
#endif

#endif /* _KERNEL */
#endif /* _VM_SWAP_PAGER_H_ */
115 changes: 87 additions & 28 deletions sys/vm/vm_cheri_revoke.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/swap_pager.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
Expand All @@ -66,7 +67,8 @@

static void vm_cheri_revoke_pass_pre(vm_map_t);
static void vm_cheri_revoke_pass_post(vm_map_t);
static int vm_cheri_revoke_pass_locked(const struct vm_cheri_revoke_cookie *);
static int vm_cheri_revoke_pass_locked(struct vmspace *,
const struct vm_cheri_revoke_cookie *);

static SYSCTL_NODE(_vm_stats, OID_AUTO, cheri_revoke,
CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
Expand All @@ -82,6 +84,21 @@
&cheri_revoke_restarts,
"Number of VM map re-lookups");

static COUNTER_U64_DEFINE_EARLY(cheri_skip_prot_no_readcap);
SYSCTL_COUNTER_U64(_vm_stats_cheri_revoke, OID_AUTO, skip_prot_no_readcap, CTLFLAG_RD,

Check warning on line 88 in sys/vm/vm_cheri_revoke.c

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
&cheri_skip_prot_no_readcap,
"Virtual pages skipped in map entries without readcap permission");

static COUNTER_U64_DEFINE_EARLY(cheri_skip_obj_no_hascap);
SYSCTL_COUNTER_U64(_vm_stats_cheri_revoke, OID_AUTO, skip_obj_no_hascap, CTLFLAG_RD,

Check warning on line 93 in sys/vm/vm_cheri_revoke.c

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
&cheri_skip_obj_no_hascap,
"Virtual pages skipped in VM objects with no capabilities");

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reason to prefer this over the caprevoke stats infrastructure? (Though admittedly I'd not be sad to see the latter get ripped out.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly that I wanted to have a global view of the counter value, and sysctl is more convenient for that. The existing stats infrastructure collects per-process stats and I believe is limited to printing them when the process exits. The right direction might be a hybrid scheme wherein we maintain per-process (really, per-vmspace) and global counters, and use the former to update the latter after each scan.

static COUNTER_U64_DEFINE_EARLY(cheri_last_ref_early_finish);
SYSCTL_COUNTER_U64(_vm_stats_cheri_revoke, OID_AUTO, last_ref_early_finish, CTLFLAG_RD,

Check warning on line 98 in sys/vm/vm_cheri_revoke.c

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
&cheri_last_ref_early_finish,
"Scans finished early because the target exited");

/***************************** KERNEL THREADS ***************************/

static MALLOC_DEFINE(M_REVOKE, "cheri_revoke", "cheri_revoke temporary data");
Expand Down Expand Up @@ -171,6 +188,7 @@
mtx_unlock(&async_revoke_mtx);

vmspace_switch_aio(arc->vm);
vmspace_free(arc->vm);

/*
* Advance the async state machine.
Expand All @@ -181,15 +199,14 @@
/*
* Do the actual revocation pass.
*/
error = vm_cheri_revoke_pass_locked(&arc->cookie);
error = vm_cheri_revoke_pass_locked(arc->vm, &arc->cookie);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks a bit weird to use this after it was seemingly free'd above. It might be clearer to the reader with a comment and a local vm temporary like so:

     vmspace_switch_aio(&arc->vm);
     vm = arc->vm;

     /*
      * Drop arc's reference on the vmspace.  The scanner kproc holds a reference via
      * its p_vmspace pointer while the scanning the address space.  Dropping the
      * reference here permits the scan to return early if target process exits early
      * leaving the scanner kproc's reference as the only reference.
      */
     vmspace_free(arc->vm);


/*
* A revocation pass is done. Advance the state machine again
* so that the application can see the result.
*/
vm_cheri_revoke_pass_async_post(map, error);

vmspace_free(arc->vm);
free(arc, M_REVOKE);
}

Expand Down Expand Up @@ -512,7 +529,6 @@
vm_cheri_revoke_skip_fault(vm_object_t object)
{
return (cheri_revoke_avoid_faults && (object->flags & OBJ_SWAP) != 0 &&
pctrie_is_empty(&object->un_pager.swp.swp_blks) &&
(object->backing_object == NULL ||
(object->backing_object->flags & OBJ_HASCAP) == 0));
}
Expand Down Expand Up @@ -638,26 +654,24 @@
(void)vm_page_grab_valid(&m, obj, ipi, VM_ALLOC_NOZERO);

if (m == NULL) {
/* Can we avoid calling vm_fault() when the page is not resident? */
/*
* Can we avoid calling vm_fault() for non-resident pages, or
* for paged-out pages that do not contain capabilities?
*/
if (vm_cheri_revoke_skip_fault(obj)) {
/* Look forward in the object's collection of pages */
vm_page_t obj_next_pg = vm_page_find_least(obj, ipi);

vm_offset_t lastoff =
entry->end - entry->start + entry->offset;
vm_pindex_t nextpindex;
vm_offset_t lastoff;

if ((obj_next_pg == NULL) ||
(obj_next_pg->pindex >= OFF_TO_IDX(lastoff))) {
lastoff = entry->end - entry->start + entry->offset;
nextpindex = swap_pager_cheri_revoke_next(obj, ipi);
if (nextpindex >= OFF_TO_IDX(lastoff)) {
CHERI_REVOKE_STATS_INC(crst, pages_skip_fast,
(entry->end - addr) >> PAGE_SHIFT);
*ooff = lastoff;
} else {
KASSERT(obj_next_pg->object == obj,
("Fast find page in bad object?"));

*ooff = IDX_TO_OFF(obj_next_pg->pindex);
CHERI_REVOKE_STATS_INC(crst, pages_skip_fast,
obj_next_pg->pindex - ipi);
nextpindex - ipi);
*ooff = IDX_TO_OFF(nextpindex);
}
return (VM_CHERI_REVOKE_AT_OK);
}
Expand Down Expand Up @@ -899,14 +913,19 @@
* The map must be read-locked on entry and will be read-locked on exit, but
* the lock may be dropped internally. The map must, therefore, also be
* held across invocation.
*
* The containing vmspace may be referenced by the "vm" parameter to signal that
* the revocation sweep is asynchronous and should terminate if the caller ends
* up holding the final vmspace reference.
*/
static int
vm_cheri_revoke_map_entry(const struct vm_cheri_revoke_cookie *crc,
vm_map_entry_t entry, vm_offset_t *addr)
struct vmspace *vm, vm_map_entry_t entry, vm_offset_t *addr)
{
vm_offset_t ooffset;
vm_object_t obj;
enum vm_cro_at res;
int flags;

KASSERT(!(entry->eflags & MAP_ENTRY_IS_SUB_MAP),
("cheri_revoke SUB_MAP"));
Expand All @@ -917,9 +936,33 @@
if (!obj)
goto fini;

/* Skip entire mappings that do not permit capability reads */
if ((entry->max_protection & VM_PROT_READ_CAP) == 0)
/*
* Skip mappings outright if we know they can't bear capabilities.
* Specifically, if one of the following applies:
* 1. OBJ_NOCAP is set (which implies it is set in backing objects), as
* is the case for the quarantine bitmap,
* 2. the mapping cannot acquire PROT_READ_CAP permission for the rest
* of its existence,
* 3. OBJ_HASCAP is unset, meaning that the object and its backing
* object cannot bear capabilities,
* we can safely jump to the next map entry.
*
* The object flags are not toggled after initialization, so it is safe
* to check them without the object lock.
*/
flags = atomic_load_int(&obj->flags);
if ((flags & OBJ_NOCAP) != 0)
goto fini;
if ((entry->max_protection & VM_PROT_READ_CAP) == 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit message should probably say "have PROT_READ_CAP clear"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, but the commit log is not very clear. I meant that the existing check (max_prot & PROT_READ_CAP) == 0 does not exclude, e.g., mappings of executable ELF file segments, so without this change we end up scanning those pages unnecessarily.

counter_u64_add(cheri_skip_prot_no_readcap,
atop(entry->end - *addr));
goto fini;
}
if ((flags & OBJ_HASCAP) == 0) {
counter_u64_add(cheri_skip_obj_no_hascap,
atop(entry->end - *addr));
goto fini;
}

VM_OBJECT_WLOCK(obj);
while (*addr < entry->end) {
Expand All @@ -928,6 +971,13 @@
vm_offset_t oaddr = *addr;
#endif

/* Has the target process already exited? */
if (vm != NULL && refcount_load(&vm->vm_refcnt) == 1) {
VM_OBJECT_WUNLOCK(obj);
counter_u64_add(cheri_last_ref_early_finish, 1);
return (KERN_NOT_RECEIVER);
}

/* Find ourselves in this object */
ooffset = *addr - entry->start + entry->offset;

Expand Down Expand Up @@ -1000,7 +1050,8 @@
* returning.
*/
static int
vm_cheri_revoke_pass_locked(const struct vm_cheri_revoke_cookie *crc)
vm_cheri_revoke_pass_locked(struct vmspace *vm,
const struct vm_cheri_revoke_cookie *crc)
{
int res = KERN_SUCCESS;
const vm_map_t map = crc->map;
Expand All @@ -1023,13 +1074,21 @@
* XXX Somewhere around here we should be resetting
* MPROT_QUARANTINE'd map entries to be usable again, yes?
*/
res = vm_cheri_revoke_map_entry(crc, entry, &addr);
res = vm_cheri_revoke_map_entry(crc, vm, entry, &addr);

/*
* We might be bailing out because a page fault failed for
* catastrophic reasons (or polite ones like ptrace()).
*/
if (res != KERN_SUCCESS) {
switch (res) {
case KERN_SUCCESS:
break;
case KERN_NOT_RECEIVER:
/* The vmspace is orphaned, there is nothing to do. */
res = KERN_SUCCESS;
goto out;
default:
/*
* We might be bailing out because a page fault failed
* for catastrophic reasons (or polite ones like
* ptrace()).
*/
printf("CHERI revoke bail va=%lx res=%d\n", addr, res);
goto out;
}
Expand Down Expand Up @@ -1073,10 +1132,10 @@
int res;

vm_cheri_revoke_pass_pre(crc->map);
res = vm_cheri_revoke_pass_locked(crc);
res = vm_cheri_revoke_pass_locked(NULL, crc);
vm_cheri_revoke_pass_post(crc->map);

return (res);

Check warning on line 1138 in sys/vm/vm_cheri_revoke.c

View workflow job for this annotation

GitHub Actions / Style Checker

Missing Signed-off-by: line
}

void
Expand Down
Loading