Skip to content

Commit

Permalink
tcp: Add sndbuf accounting
Browse files Browse the repository at this point in the history
Add per-socket send buffer limits. Also significantly improve segment
sending and fix some latent TCP/IP bugs while we're at it. This patch
doesn't yet introduce this functionality for UDP or UNIX sockets.

Signed-off-by: Pedro Falcato <[email protected]>
  • Loading branch information
heatd committed Jan 25, 2025
1 parent c6912bf commit c539e0e
Show file tree
Hide file tree
Showing 15 changed files with 459 additions and 119 deletions.
4 changes: 2 additions & 2 deletions kernel/drivers/net/e1000/e1000.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ struct page_frag_res

/* TODO: Put this in an actual header */

extern "C" struct page_frag_res page_frag_alloc(struct page_frag_alloc_info *inf, size_t size)
extern "C" struct page_frag_res page_frag_alloc2(struct page_frag_alloc_info *inf, size_t size)
{
assert(size <= PAGE_SIZE);

Expand Down Expand Up @@ -352,7 +352,7 @@ int e1000_init_rx(struct e1000_device *dev)

for (unsigned int i = 0; i < number_rx_desc; i++)
{
struct page_frag_res res = page_frag_alloc(&alloc_info, rx_buffer_size);
struct page_frag_res res = page_frag_alloc2(&alloc_info, rx_buffer_size);
/* How can this even happen? Keep this here though, as a sanity check */
if (!res.page)
panic("OOM allocating rx buffers");
Expand Down
4 changes: 2 additions & 2 deletions kernel/drivers/virtio/network/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ struct page_frag_res
size_t off;
};

extern "C" struct page_frag_res page_frag_alloc(struct page_frag_alloc_info *inf, size_t size);
extern "C" struct page_frag_res page_frag_alloc2(struct page_frag_alloc_info *inf, size_t size);

namespace virtio
{
Expand Down Expand Up @@ -149,7 +149,7 @@ bool network_vdev::setup_rx()

for (unsigned int i = 0; i < qsize; i++)
{
auto [page, off] = page_frag_alloc(&alloc_info, rx_buf_size);
auto [page, off] = page_frag_alloc2(&alloc_info, rx_buf_size);

virtio_allocation_info info;

Expand Down
10 changes: 9 additions & 1 deletion kernel/include/onyx/atomic.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024 Pedro Falcato
* Copyright (c) 2024 - 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
Expand All @@ -26,4 +26,12 @@
__atomic_compare_exchange_n(ptr, &__old, new, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); \
__old; \
})

#define cmpxchg_relaxed(ptr, old, new) \
({ \
__auto_type __old = (old); \
__atomic_compare_exchange_n(ptr, &__old, new, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); \
__old; \
})

#endif
65 changes: 62 additions & 3 deletions kernel/include/onyx/net/socket.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018 - 2022 Pedro Falcato
* Copyright (c) 2018 - 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
Expand All @@ -17,6 +17,7 @@
#include <onyx/iovec_iter.h>
#include <onyx/net/netif.h>
#include <onyx/object.h>
#include <onyx/page_frag.h>
#include <onyx/refcount.h>
#include <onyx/semaphore.h>
#include <onyx/vector.h>
Expand Down Expand Up @@ -56,6 +57,7 @@ struct socket_ops
void (*close)(struct socket *);
void (*handle_backlog)(struct socket *);
short (*poll)(struct socket *, void *poll_file, short events);
void (*write_space)(struct socket *);
};

struct socket : public refcountable
Expand All @@ -79,11 +81,13 @@ struct socket : public refcountable
bool broadcast_allowed : 1;
bool proto_needs_work : 1 {0};
bool dead : 1 {0};
bool sndbuf_locked : 1 {0};

struct list_head socket_backlog;

unsigned int rx_max_buf;
unsigned int tx_max_buf;
unsigned int sk_sndbuf;
unsigned sk_send_queued;
int backlog;
unsigned int shutdown_state;

Expand All @@ -92,17 +96,23 @@ struct socket : public refcountable

const struct socket_ops *sock_ops;

/* Socket page frag info - used for allocating wmem */
struct page_frag_info sock_pfi;

/* Define a default constructor here */
socket()
: type{}, proto{}, domain{}, flags{}, sock_err{}, socket_lock{}, bound{}, connected{},
reuse_addr{false}, rx_max_buf{DEFAULT_RX_MAX_BUF}, tx_max_buf{DEFAULT_TX_MAX_BUF},
reuse_addr{false}, rx_max_buf{DEFAULT_RX_MAX_BUF}, sk_sndbuf{DEFAULT_TX_MAX_BUF},
shutdown_state{}, rcv_timeout{0}, snd_timeout{0}, sock_ops{}
{
INIT_LIST_HEAD(&socket_backlog);
pfi_init(&sock_pfi);
sk_send_queued = 0;
}

virtual ~socket()
{
pfi_destroy(&sock_pfi);
}

short poll(void *poll_file, short events);
Expand Down Expand Up @@ -326,6 +336,55 @@ int sock_default_getpeername(struct socket *sock, struct sockaddr *addr, socklen
int sock_default_shutdown(struct socket *sock, int how);
void sock_default_close(struct socket *sock);
short sock_default_poll(struct socket *sock, void *poll_file, short events);

static inline bool sock_may_write(struct socket *sock)
{
return READ_ONCE(sock->sk_send_queued) < READ_ONCE(sock->sk_sndbuf);
}

static inline int sock_write_space(struct socket *sock)
{
return READ_ONCE(sock->sk_sndbuf) - READ_ONCE(sock->sk_send_queued);
}

static inline bool sock_charge_snd_bytes(struct socket *sock, unsigned int bytes)
{
unsigned int queued = READ_ONCE(sock->sk_send_queued), new_space, expected;
do
{
expected = queued;
new_space = queued + bytes;
if (new_space > sock->sk_sndbuf)
return false;
queued = cmpxchg_relaxed(&sock->sk_send_queued, expected, new_space);
} while (queued != expected);
return true;
}

static inline bool sock_charge_pbf(struct socket *sock, struct packetbuf *pbf)
{
return sock_charge_snd_bytes(sock, pbf->total_len);
}

static inline void sock_discharge_snd_bytes(struct socket *sock, unsigned int bytes)
{
unsigned int queued = READ_ONCE(sock->sk_send_queued), new_space, expected;
do
{
expected = queued;
new_space = queued - bytes;
WARN_ON(queued < new_space);
queued = cmpxchg_relaxed(&sock->sk_send_queued, expected, new_space);
} while (queued != expected);

sock->sock_ops->write_space(sock);
}

static inline void sock_discharge_pbf(struct socket *sock, struct packetbuf *pbf)
{
sock_discharge_snd_bytes(sock, pbf->total_len);
}

__END_CDECLS

#endif
3 changes: 2 additions & 1 deletion kernel/include/onyx/net/tcp.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020 - 2024 Pedro Falcato
* Copyright (c) 2020 - 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
Expand Down Expand Up @@ -207,6 +207,7 @@ struct tcp_socket : public inet_socket
unsigned int nr_sacks;
int mss_for_ack;

struct list_head accept_queue;
struct list_head conn_queue;
int connqueue_len;
};
Expand Down
11 changes: 10 additions & 1 deletion kernel/include/onyx/packetbuf.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020 - 2024 Pedro Falcato
* Copyright (c) 2020 - 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
Expand Down Expand Up @@ -97,10 +97,12 @@ struct packetbuf
uint16_t gso_size;

uint8_t gso_flags;
u8 nr_vecs;

unsigned int needs_csum : 1;
unsigned int zero_copy : 1;
int domain;
unsigned int total_len;

/* The next bytes are always available for protocols. */
#define PACKETBUF_PROTO_SPACE 64
Expand All @@ -119,6 +121,9 @@ struct packetbuf
struct tcp_packetbuf_info tpi;
};

struct socket *sock;
void (*dtor)(struct packetbuf *pbf);

#ifdef __cplusplus
/**
* @brief Construct a new default packetbuf object.
Expand All @@ -130,6 +135,9 @@ struct packetbuf
header_length{}, gso_size{}, gso_flags{}, needs_csum{0}, zero_copy{0}, domain{0}
{
route = {};
sock = NULL;
dtor = NULL;
nr_vecs = 0;
}

/**
Expand Down Expand Up @@ -439,6 +447,7 @@ static inline void pbf_put_ref(struct packetbuf *pbf)
typedef unsigned int gfp_t;

struct packetbuf *pbf_alloc(gfp_t gfp);
struct packetbuf *pbf_alloc_sk(gfp_t gfp, struct socket *sock, unsigned int len);

__END_CDECLS

Expand Down
43 changes: 43 additions & 0 deletions kernel/include/onyx/page_frag.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
* SPDX-License-Identifier: GPL-2.0-only
*/
#ifndef _ONYX_PAGE_FRAG_H
#define _ONYX_PAGE_FRAG_H

#include <onyx/compiler.h>
#include <onyx/page.h>

__BEGIN_CDECLS

struct page_frag_info
{
struct page *page;
unsigned int offset;
unsigned int len;
};

struct page_frag
{
struct page *page;
unsigned int offset;
unsigned int len;
};

static inline void pfi_init(struct page_frag_info *pfi)
{
pfi->page = NULL;
pfi->len = pfi->offset = 0;
}

int page_frag_alloc(struct page_frag_info *pfi, unsigned int len, gfp_t gfp,
struct page_frag *frag);

void pfi_destroy(struct page_frag_info *pfi);

__END_CDECLS

#endif
3 changes: 2 additions & 1 deletion kernel/kernel/mm/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mm-y:= bootmem.o page.o pagealloc.o vm_object.o vm.o vmalloc.o reclaim.o anon.o mincore.o page_lru.o swap.o rmap.o slab_cache_pool.o madvise.o
mm-y:= bootmem.o page.o pagealloc.o vm_object.o vm.o vmalloc.o reclaim.o anon.o \
mincore.o page_lru.o swap.o rmap.o slab_cache_pool.o madvise.o page_frag.o
mm-$(CONFIG_KUNIT)+= vm_tests.o
mm-$(CONFIG_X86)+= memory.o
mm-$(CONFIG_RISCV)+= memory.o
Expand Down
64 changes: 64 additions & 0 deletions kernel/kernel/mm/page_frag.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
* SPDX-License-Identifier: GPL-2.0-only
*/
#include <stdio.h>

#include <onyx/page_frag.h>

static int page_frag_refill(struct page_frag_info *pfi, unsigned int len, gfp_t gfp)
{
unsigned int order = pages2order(vm_size_to_pages(len));

if (WARN_ON_ONCE(order > 0))
{
/* TODO: We're missing GFP_COMP support, and without it the refcounting gets all screwed
* up. So reject order > 0 allocations. */
pr_warn("%s: Asked for %u bytes, which we can't deliver\n", __func__, len);
return -ENOMEM;
}

if (pfi->page)
page_unref(pfi->page);

pfi->page = alloc_pages(order, gfp);
if (!pfi->page)
return -ENOMEM;
pfi->offset = 0;
pfi->len = 1UL << (order + PAGE_SHIFT);
return 0;
}

int page_frag_alloc(struct page_frag_info *pfi, unsigned int len, gfp_t gfp, struct page_frag *frag)
{
/* Check if we don't have a page already, or if we dont have enough space for the frag */
if (!pfi->page || pfi->len - pfi->offset < len)
{
if (page_frag_refill(pfi, len, gfp) < 0)
return -ENOMEM;
}

page_ref(pfi->page);
frag->page = pfi->page;
frag->len = len;
frag->offset = pfi->offset;
pfi->offset += len;

if (pfi->offset == len)
{
/* Release our ref if someone ate the whole thing. */
page_unref(pfi->page);
pfi->page = NULL;
}

return 0;
}

void pfi_destroy(struct page_frag_info *pfi)
{
if (pfi->page)
page_unref(pfi->page);
}
4 changes: 2 additions & 2 deletions kernel/kernel/net/ipv4/ipv4.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 - 2024 Pedro Falcato
* Copyright (c) 2016 - 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
Expand Down Expand Up @@ -431,7 +431,7 @@ int handle_packet(netif *nif, packetbuf *buf)
buf->data += iphdr_len;

/* Adjust tail to point at the end of the ipv4 packet */
buf->tail = (unsigned char *) header + ntohs(header->total_len);
buf->tail = cul::min(buf->end, (unsigned char *) header + ntohs(header->total_len));

inet_route route;
route.dst_addr.in4.s_addr = header->dest_ip;
Expand Down
5 changes: 3 additions & 2 deletions kernel/kernel/net/ipv6/ipv6.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020 - 2024 Pedro Falcato
* Copyright (c) 2020 - 2025 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the GPLv2 License
* check LICENSE at the root directory for more information
*
Expand Down Expand Up @@ -433,7 +433,8 @@ int handle_packet(netif *nif, packetbuf *buf)
buf->data += iphdr_len;

/* Adjust tail to point at the end of the ipv4 packet */
buf->tail = (unsigned char *) header + iphdr_len + ntohs(header->payload_length);
buf->tail =
cul::min(buf->end, (unsigned char *) header + iphdr_len + ntohs(header->payload_length));

inet_route route;
route.dst_addr.in6 = header->dst_addr;
Expand Down
Loading

0 comments on commit c539e0e

Please sign in to comment.