From 7d0c039f99c10374aea98376d59530892a04d26e Mon Sep 17 00:00:00 2001 From: Sam Kumar Date: Fri, 5 Aug 2016 20:59:00 -0700 Subject: [PATCH 1/3] Port of FreeBSD TCP implementation for gnrc, with TinyOS-style event scheduler and condition variable implementation The original commits are: Added FreeBSD TCP code and very basic integration into GNRC (doesn't compile yet) Add dummy method stubs and made modifications so that the code compiles; still doesn't link Fix some linker problems Fixed module definitions so that the code compiles Implement receipt of packets from lower layer Modify FreeBSD code to send packets using pktsnips Use consistent errno.h file Some more progress Implemented TinyOS-style scheduler (compiles, but not tested yet) Stub the remaining functions Debug task scheduler Start working on socket allocator Implement remaining functions for socket allocator Define external API in header file for TCP Delete unnecessary file and add some (coarse) synchronization to the TCP module Check the checksum for received segments Reorganize the code a bit, and fix bugs Fix bugs in TCP segment reception Fix bugs interfering with SYN-ACK retransmission Start implementing conn API for TCP Implement condition variable More work on conn API More work on conn interface Add memmgr-based zone allocator for TCP conn API Use union-based conn tcp structure Implemented all of conn except for send Preliminary implementation of send capability in conn API Fix memory leak Tie conn API logic into posix_sockets Fix bugs, and accept on ACK instead of SYN-ACK Got send/recv somewhat working Fix some bugs Update to new APIs after rebasing Fix race condition in initialization of TCP packet loop Prevent radio from going to sleep after send Fix some resource bugs in TCP Conn API implementation Fix bug in RIOT that prevents socket 0 from working properly Fix more bugs Implemented sock bindings for TCP in posix_sockets.c Also fixed some other miscellaneous errors that came up along the way. Compiles and linkes, but is not yet tested. Fix bugs found during testing Modify duty-cycling code to allow for Router - Router TCP connections Also fix many bugs in TCP and elsewhere Implement link-layer retransmission and software CSMA logic Commit remaining code and make CSMA/retries configurable in periph_conf.h Fix issue in software CSMA implementation Add support for REthos for border router Allow broadcast packets to be sent (without queueing) Fix bug in handling of broadcast Fix memory leaks in duty-cycled router Use autoconf bootstrapping Make autoconf_onehop more flexible Fix concurrency bugs in link retry/software CSMA implementation Fix some bugs in FreeBSD TCP These bugs were discovered when testing interoperability with Linux Fix major memory management problem in design of sock layer for tcp_freebsd Fix concurrency issue in sock_tcp_freebsd zone allocator Change starting MSS for TCP Fix bug in gnrc_netdev2_duty_router.c Allow for implicit bind in FreeBSD TCP Fix minor bug in FreeBSD TCP sock layer Increase queue_size Fix bug in link-layer so framebuffer is not overwritten while receiving Fix race condition in _xtimer_set_absolute Adjust MSS constants for Hamilton experiments Bugfixes and optimizations found during experiments Some progress on duty-cycling Appears to be quite stable Commit additional modifications needed for duty-cycling Further improvements to duty-cycling (and good TCP throughput!) Make duty_router work again --- Makefile.dep | 24 + boards/hamilton/include/periph_conf.h | 47 + boards/samr21-xpro/Makefile.include | 57 +- boards/samr21-xpro/board.c | 9 + boards/samr21-xpro/dist/debug.sh | 18 + boards/samr21-xpro/dist/flash.sh | 25 + boards/samr21-xpro/dist/reset.sh | 18 + boards/samr21-xpro/include/board.h | 11 + core/condition.c | 80 + core/include/condition.h | 92 + core/include/thread.h | 5 +- cpu/cortexm_common/include/cpu_conf_common.h | 2 +- cpu/sam0_common/periph/cpuid.c | 14 +- dist/tools/rethos/rethos | Bin 0 -> 28712 bytes drivers/at86rf2xx/at86rf2xx.c | 27 +- drivers/at86rf2xx/at86rf2xx_getset.c | 15 +- drivers/at86rf2xx/at86rf2xx_netdev.c | 7 + drivers/include/rethos.h | 280 ++ drivers/rethos/Makefile | 1 + drivers/rethos/rethos.c | 611 +++ makefiles/pseudomodules.inc.mk | 2 + sys/auto_init/auto_init.c | 15 +- sys/auto_init/netif/auto_init_ethos.c | 2 +- sys/include/net/conn.h | 4 + sys/include/net/gnrc/conn.h | 5 + sys/include/net/gnrc/ipv6/autoconf_onehop.h | 48 + sys/include/net/gnrc/netdev.h | 31 + sys/include/net/gnrc/nettype.h | 5 +- sys/include/net/gnrc/tcp_freebsd.h | 100 + sys/include/net/netopt.h | 5 + sys/include/net/sock/tcp_freebsd.h | 177 + sys/include/net/tcp_freebsd.h | 70 + sys/include/task_sched.h | 67 + sys/luid/luid.c | 26 +- sys/net/gnrc/Makefile | 16 + .../dutymac/gnrc_netdev2_duty_leaf.c | 723 +++ .../dutymac/gnrc_netdev2_duty_router.c | 640 +++ sys/net/gnrc/link_layer/dutymac/send.h | 14 + .../gnrc/link_layer/dutymac/send_with_csma.c | 84 + .../link_layer/dutymac/send_with_retries.c | 61 + .../netdev/gnrc_netdev_ieee802154.c | 89 +- sys/net/gnrc/netreg/gnrc_netreg.c | 6 +- .../ipv6/autoconf_onehop/Makefile | 3 + .../ipv6/autoconf_onehop/gnrc_ipv6_autoconf.c | 117 + sys/net/gnrc/network_layer/ipv6/gnrc_ipv6.c | 7 + .../gnrc/pktbuf_static/gnrc_pktbuf_static.c | 3 + sys/net/gnrc/sock/include/sock_types.h | 63 + sys/net/gnrc/sock/tcp_freebsd/Makefile | 5 + .../sock/tcp_freebsd/gnrc_sock_tcp_freebsd.c | 671 +++ sys/net/gnrc/sock/tcp_freebsd/zone/Makefile | 3 + .../zone/gnrc_sock_tcp_freebsd_zalloc.h | 50 + sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.c | 253 ++ sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.h | 96 + .../gnrc/transport_layer/tcp_freebsd/Makefile | 7 + .../transport_layer/tcp_freebsd/blip/Makefile | 3 + .../transport_layer/tcp_freebsd/blip/iovec.c | 80 + .../transport_layer/tcp_freebsd/blip/iovec.h | 19 + .../tcp_freebsd/bsdtcp/Makefile | 5 + .../tcp_freebsd/bsdtcp/_types.h | 120 + .../transport_layer/tcp_freebsd/bsdtcp/cc.h | 188 + .../tcp_freebsd/bsdtcp/cc/Makefile | 3 + .../tcp_freebsd/bsdtcp/cc/cc_module.h | 72 + .../tcp_freebsd/bsdtcp/cc/cc_newreno.c | 259 ++ .../tcp_freebsd/bsdtcp/icmp_var.h | 102 + .../transport_layer/tcp_freebsd/bsdtcp/ip.h | 232 + .../transport_layer/tcp_freebsd/bsdtcp/ip6.h | 401 ++ .../tcp_freebsd/bsdtcp/sys/queue.h | 753 ++++ .../transport_layer/tcp_freebsd/bsdtcp/tcp.h | 261 ++ .../tcp_freebsd/bsdtcp/tcp_const.h | 76 + .../tcp_freebsd/bsdtcp/tcp_fsm.h | 114 + .../tcp_freebsd/bsdtcp/tcp_input.c | 3923 +++++++++++++++++ .../tcp_freebsd/bsdtcp/tcp_output.c | 1860 ++++++++ .../tcp_freebsd/bsdtcp/tcp_reass.c | 318 ++ .../tcp_freebsd/bsdtcp/tcp_sack.c | 733 +++ .../tcp_freebsd/bsdtcp/tcp_seq.h | 101 + .../tcp_freebsd/bsdtcp/tcp_subr.c | 933 ++++ .../tcp_freebsd/bsdtcp/tcp_timer.c | 722 +++ .../tcp_freebsd/bsdtcp/tcp_timer.h | 249 ++ .../tcp_freebsd/bsdtcp/tcp_timewait.c | 503 +++ .../tcp_freebsd/bsdtcp/tcp_usrreq.c | 753 ++++ .../tcp_freebsd/bsdtcp/tcp_var.h | 917 ++++ .../tcp_freebsd/bsdtcp/types.h | 79 + .../transport_layer/tcp_freebsd/checksum.c | 124 + .../tcp_freebsd/gnrc_tcp_freebsd.c | 653 +++ .../tcp_freebsd/gnrc_tcp_freebsd_internal.h | 118 + .../transport_layer/tcp_freebsd/lib/Makefile | 3 + .../transport_layer/tcp_freebsd/lib/bitmap.c | 131 + .../transport_layer/tcp_freebsd/lib/bitmap.h | 16 + .../transport_layer/tcp_freebsd/lib/cbuf.c | 234 + .../transport_layer/tcp_freebsd/lib/cbuf.h | 41 + .../transport_layer/tcp_freebsd/lib/lbuf.c | 90 + .../transport_layer/tcp_freebsd/lib/lbuf.h | 65 + .../tcp_freebsd/socket_allocator.c | 437 ++ sys/pm_layered/pm.c | 2 +- sys/posix/include/sys/socket.h | 10 + sys/posix/sockets/posix_sockets.c | 127 +- sys/task_sched/Makefile | 3 + sys/task_sched/task_sched.c | 224 + sys/xtimer/xtimer_core.c | 6 +- 99 files changed, 20796 insertions(+), 78 deletions(-) create mode 100755 boards/samr21-xpro/dist/debug.sh create mode 100755 boards/samr21-xpro/dist/flash.sh create mode 100755 boards/samr21-xpro/dist/reset.sh create mode 100644 core/condition.c create mode 100644 core/include/condition.h create mode 100755 dist/tools/rethos/rethos create mode 100644 drivers/include/rethos.h create mode 100644 drivers/rethos/Makefile create mode 100644 drivers/rethos/rethos.c create mode 100644 sys/include/net/gnrc/ipv6/autoconf_onehop.h create mode 100644 sys/include/net/gnrc/tcp_freebsd.h create mode 100644 sys/include/net/sock/tcp_freebsd.h create mode 100644 sys/include/net/tcp_freebsd.h create mode 100644 sys/include/task_sched.h create mode 100644 sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_leaf.c create mode 100644 sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_router.c create mode 100644 sys/net/gnrc/link_layer/dutymac/send.h create mode 100644 sys/net/gnrc/link_layer/dutymac/send_with_csma.c create mode 100644 sys/net/gnrc/link_layer/dutymac/send_with_retries.c create mode 100644 sys/net/gnrc/network_layer/ipv6/autoconf_onehop/Makefile create mode 100644 sys/net/gnrc/network_layer/ipv6/autoconf_onehop/gnrc_ipv6_autoconf.c create mode 100644 sys/net/gnrc/sock/tcp_freebsd/Makefile create mode 100644 sys/net/gnrc/sock/tcp_freebsd/gnrc_sock_tcp_freebsd.c create mode 100644 sys/net/gnrc/sock/tcp_freebsd/zone/Makefile create mode 100644 sys/net/gnrc/sock/tcp_freebsd/zone/gnrc_sock_tcp_freebsd_zalloc.h create mode 100644 sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.c create mode 100644 sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/Makefile create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/blip/Makefile create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/Makefile create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/_types.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/Makefile create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_module.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_newreno.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/icmp_var.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip6.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/sys/queue.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_const.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_fsm.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_input.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_output.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_reass.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_sack.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_seq.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_subr.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timewait.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_usrreq.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_var.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/types.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/checksum.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd_internal.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/Makefile create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.c create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.h create mode 100644 sys/net/gnrc/transport_layer/tcp_freebsd/socket_allocator.c create mode 100644 sys/task_sched/Makefile create mode 100644 sys/task_sched/task_sched.c diff --git a/Makefile.dep b/Makefile.dep index 6654a948fbf4..3aeeaa6ca424 100644 --- a/Makefile.dep +++ b/Makefile.dep @@ -86,6 +86,26 @@ ifneq (,$(filter gnrc_netapi_mbox,$(USEMODULE))) USEMODULE += core_mbox endif +ifneq (,$(filter gnrc_sock_tcp_freebsd,$(USEMODULE))) + USEMODULE += gnrc_tcp_freebsd + USEMODULE += gnrc_sock_tcp_freebsd_zalloc + USEMODULE += sock_tcp_freebsd +endif + +ifneq (,$(filter gnrc_tcp_freebsd,$(USEMODULE))) + USEMODULE += gnrc_tcp_freebsd_internal + USEMODULE += gnrc_tcp_freebsd_blip + USEMODULE += task_sched +endif + +ifneq (,$(filter gnrc_tcp_freebsd_internal,$(USEMODULE))) + USEMODULE += gnrc_tcp_freebsd_internal_cc +endif + +ifneq (,$(filter task_sched,$(USEMODULE))) + USEMODULE += xtimer +endif + ifneq (,$(filter netdev_tap,$(USEMODULE))) USEMODULE += netif USEMODULE += netdev_eth @@ -503,6 +523,10 @@ ifneq (,$(filter %_conn_tcp,$(USEMODULE))) USEMODULE += conn_tcp endif +ifneq (,$(filter %_conn_tcp_freebsd,$(USEMODULE))) + USEMODULE += conn_tcp_freebsd +endif + ifneq (,$(filter %_conn_udp,$(USEMODULE))) USEMODULE += conn_udp endif diff --git a/boards/hamilton/include/periph_conf.h b/boards/hamilton/include/periph_conf.h index 9d59657457aa..e791ec13ff3a 100644 --- a/boards/hamilton/include/periph_conf.h +++ b/boards/hamilton/include/periph_conf.h @@ -79,6 +79,53 @@ extern "C" { #define PM_BLOCKER_INITIAL { .val_u32=0x00000000 } +/** + * @name Network configuration + * @{ + */ +#ifndef DUTYCYCLE_EN +#define DUTYCYCLE_EN (1) +#endif + +#ifndef DUTYCYCLE_WAKEUP_INTERVAL +#define DUTYCYCLE_WAKEUP_INTERVAL 20000UL /* Don't change it w/o particular reasons */ +#endif + +#ifndef DUTYCYCLE_SLEEP_INTERVAL +#define DUTYCYCLE_SLEEP_INTERVAL 2000000UL +#endif + +#ifndef DUTYCYCLE_SLEEP_INTERVAL_MIN +#define DUTYCYCLE_SLEEP_INTERVAL_MIN 20000UL +#endif + +#ifndef DUTYCYCLE_SLEEP_INTERVAL_MAX +#define DUTYCYCLE_SLEEP_INTERVAL_MAX 5000000UL /* 1) When it is ZERO, a leaf node does not send beacons + (i.e., extremely low duty-cycle, + but downlink transmission is disabled) + 2) Router and leaf node should have same sleep interval. + Router does not sleep + but uses the value for downlink transmissions */ +#endif + +#define ROUTER (0) /* Plugged-in router */ +#define LEAF_NODE (1-ROUTER) /* Duty-cycling node */ + +#define HARDWARE_CSMA_EN (0) + +#define HARDWARE_CSMA_MAX_TRIES (5) +#define HARDWARE_CSMA_MIN_BACKOFF_EXP (3) /* Hardware default. */ +#define HARDWARE_CSMA_MAX_BACKOFF_EXP (5) /* Hardware default. */ +#define HARDWARE_MAX_FRAME_RETRIES (0) /* No delay between these. */ + +#define SOFTWARE_MAX_FRAME_RETRIES (3) +#define SOFTWARE_FRAME_RETRY_DELAY_MICROS (0) +#define SOFTWARE_CSMA_MAX_TRIES (5) +#define SOFTWARE_CSMA_BACKOFF_MICROS (320) +#define SOFTWARE_CSMA_MIN_BACKOFF_EXP (3) /* Hardware default. */ +#define SOFTWARE_CSMA_MAX_BACKOFF_EXP (5) /* Hardware default. */ +/** @} */ + /** * @name Timer peripheral configuration * @{ diff --git a/boards/samr21-xpro/Makefile.include b/boards/samr21-xpro/Makefile.include index 7f68e0eaa1bf..ea4c06297346 100644 --- a/boards/samr21-xpro/Makefile.include +++ b/boards/samr21-xpro/Makefile.include @@ -2,24 +2,45 @@ export CPU = samd21 export CPU_MODEL = samr21g18a -# set default port depending on operating system -PORT_LINUX ?= /dev/ttyACM0 -PORT_DARWIN ?= $(firstword $(sort $(wildcard /dev/tty.usbmodem*))) +# debugger config +export DEBUGGER = $(RIOTBOARD)/$(BOARD)/dist/debug.sh +export DEBUGSERVER = JLinkGDBServer -device atsamr21e18a -if swd +export RESET = $(RIOTBOARD)/$(BOARD)/dist/reset.sh -# setup serial terminal -include $(RIOTMAKE)/tools/serial.inc.mk +export FLASHER = $(RIOTBOARD)/$(BOARD)/dist/flash.sh +export FFLAGS = $(BINDIR) $(HEXFILE) -# Add board selector (USB serial) to OpenOCD options if specified. -# Use /dist/tools/usb-serial/list-ttys.sh to find out serial number. -# Usage: SERIAL="ATML..." BOARD="samr21-xpro" make flash -ifneq (,$(SERIAL)) - export OPENOCD_EXTRA_INIT += "-c cmsis_dap_serial $(SERIAL)" - SERIAL_TTY = $(firstword $(shell $(RIOTBASE)/dist/tools/usb-serial/find-tty.sh $(SERIAL))) - ifeq (,$(SERIAL_TTY)) - $(error Did not find a device with serial $(SERIAL)) - endif - PORT_LINUX := $(SERIAL_TTY) -endif +CFLAGS += -O0 -g3 -# this board uses openocd -include $(RIOTMAKE)/tools/openocd.inc.mk +export OFLAGS = -O binary --gap-fill 0xff +export HEXFILE = $(ELFFILE:.elf=.bin) +export DEBUGGER_FLAGS = $(BINDIR) $(ELFFILE) +export RESET_FLAGS = $(BINDIR) + +export TERMPROG = JLinkRTTClient +export OBJDUMPFLAGS += --disassemble --source --disassembler-options=force-thumb + +# setup the boards dependencies +include $(RIOTBOARD)/$(BOARD)/Makefile.dep + +# # set default port depending on operating system +# PORT_LINUX ?= /dev/ttyACM0 +# PORT_DARWIN ?= $(firstword $(sort $(wildcard /dev/tty.usbmodem*))) +# +# # setup serial terminal +# include $(RIOTBOARD)/Makefile.include.serial +# +# # Add board selector (USB serial) to OpenOCD options if specified. +# # Use /dist/tools/usb-serial/list-ttys.sh to find out serial number. +# # Usage: SERIAL="ATML..." BOARD="samr21-xpro" make flash +# ifneq (,$(SERIAL)) +# export OPENOCD_EXTRA_INIT += "-c cmsis_dap_serial $(SERIAL)" +# SERIAL_TTY = $(firstword $(shell $(RIOTBASE)/dist/tools/usb-serial/find-tty.sh $(SERIAL))) +# ifeq (,$(SERIAL_TTY)) +# $(error Did not find a device with serial $(SERIAL)) +# endif +# PORT_LINUX := $(SERIAL_TTY) +# endif +# +# # this board uses openocd +# include $(RIOTBOARD)/Makefile.include.openocd diff --git a/boards/samr21-xpro/board.c b/boards/samr21-xpro/board.c index 9c63ee27b358..bea947eb815b 100644 --- a/boards/samr21-xpro/board.c +++ b/boards/samr21-xpro/board.c @@ -23,6 +23,15 @@ #include "board.h" #include "periph/gpio.h" +const uint64_t* const fb_sentinel = ((const uint64_t* const)0x3fc00); +const uint64_t* const fb_flashed_time = ((const uint64_t* const)0x3fc08); +const uint8_t* const fb_eui64 = ((const uint8_t* const)0x3fc10); +const uint16_t* const fb_device_id = ((const uint16_t* const)0x3fc18); +const uint64_t* const fb_designator = ((const uint64_t* const)0x3fc1c); +const uint8_t* const fb_aes128_key = ((const uint8_t* const)0x3fc30); +const uint8_t* const fb_25519_pub = ((const uint8_t* const)0x3fc40); +const uint8_t* const fb_25519_priv = ((const uint8_t* const)0x3fc60); + void board_init(void) { /* initialize the on-board LED */ diff --git a/boards/samr21-xpro/dist/debug.sh b/boards/samr21-xpro/dist/debug.sh new file mode 100755 index 000000000000..da54cbe639e2 --- /dev/null +++ b/boards/samr21-xpro/dist/debug.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# Start in-circuit debugging on this board: this script starts up the GDB +# client and connects to a GDB server. +# +# Start the GDB server first using the 'make debugserver' target + +# @author Hauke Petersen +# @author Michael Andersen + +BINDIR=$1 +ELFFILE=$2 + +# write GDB config file +echo "target extended-remote 127.0.0.1:2331" > $BINDIR/gdb.cfg + +# run GDB +arm-none-eabi-gdb -tui -command=$BINDIR/gdb.cfg $ELFFILE diff --git a/boards/samr21-xpro/dist/flash.sh b/boards/samr21-xpro/dist/flash.sh new file mode 100755 index 000000000000..36972904ffbc --- /dev/null +++ b/boards/samr21-xpro/dist/flash.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +# This flash script dynamically generates a file with a set of commands which +# have to be handed to the flashing script of SEGGER (JLinkExe >4.84). +# After that, JLinkExe will be executed with that set of commands to flash the +# latest .bin file to the board. + +# @author Hauke Petersen +# @author Michael Andersen + +BINDIR=$1 +HEXFILE=$2 +FLASHADDR=0 + +# setup JLink command file +echo "power on" > $BINDIR/burn.seg +echo "speed 1000" >> $BINDIR/burn.seg +echo "loadbin $HEXFILE $FLASHADDR" >> $BINDIR/burn.seg +echo "r" >> $BINDIR/burn.seg +echo "g" >> $BINDIR/burn.seg +echo "exit" >> $BINDIR/burn.seg + +# flash new binary to the board +JLinkExe -if swd -device atsamr21g18a < $BINDIR/burn.seg +echo "" diff --git a/boards/samr21-xpro/dist/reset.sh b/boards/samr21-xpro/dist/reset.sh new file mode 100755 index 000000000000..3f23ea061261 --- /dev/null +++ b/boards/samr21-xpro/dist/reset.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# This script resets a CC2538SF53 target using JLink called +# with a pre-defined reset sequence. + +# @author Hauke Petersen +# @author Michael Andersen + +BINDIR=$1 + +# create JLink command file for resetting the board +echo "r" > $BINDIR/reset.seg +echo "g" >> $BINDIR/reset.seg +echo "exit" >> $BINDIR/reset.seg + +# reset the board +JLinkExe -device atsamr21g18a < $BINDIR/reset.seg +echo "" diff --git a/boards/samr21-xpro/include/board.h b/boards/samr21-xpro/include/board.h index fbb71133a964..d4b6e6891902 100644 --- a/boards/samr21-xpro/include/board.h +++ b/boards/samr21-xpro/include/board.h @@ -30,6 +30,17 @@ extern "C" { #endif +extern const uint64_t* const fb_sentinel; +extern const uint64_t* const fb_flashed_time; +extern const uint8_t* const fb_eui64; +extern const uint16_t* const fb_device_id; +extern const uint64_t* const fb_designator; +extern const uint8_t* const fb_aes128_key; +extern const uint8_t* const fb_25519_pub; +extern const uint8_t* const fb_25519_priv; +#define FB_SENTINEL_VALUE 0x27c83f60f6b6e7c8 +#define HAS_FACTORY_BLOCK (*fb_sentinel == FB_SENTINEL_VALUE) + /** * @name xtimer configuration * @{ diff --git a/core/condition.c b/core/condition.c new file mode 100644 index 000000000000..d5281438f446 --- /dev/null +++ b/core/condition.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016 Sam Kumar + * 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup core_sync + * @{ + * + * @file + * @brief Kernel condition variable implementation + * + * @author Sam Kumar + * + * @} + */ + +#include "condition.h" +#include "irq.h" +#include "mutex.h" +#include "thread.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +void cond_wait(condition_t* cond, mutex_t* mutex) +{ + unsigned irqstate = irq_disable(); + thread_t* me = (thread_t*) sched_active_thread; + mutex_unlock(mutex); + sched_set_status(me, STATUS_CONDITION_BLOCKED); + thread_add_to_list(&cond->queue, me); + irq_restore(irqstate); + thread_yield_higher(); + + /* + * Once we reach this point, the condition variable was signalled, + * and we are free to continue. + */ + mutex_lock(mutex); +} + +void _cond_signal(condition_t* cond, bool broadcast) +{ + unsigned irqstate = irq_disable(); + list_node_t* next; + + uint16_t min_prio = THREAD_PRIORITY_MIN + 1; + while ((next = list_remove_head(&cond->queue)) != NULL) { + thread_t* process = container_of((clist_node_t*) next, thread_t, rq_entry); + sched_set_status(process, STATUS_PENDING); + uint16_t process_priority = process->priority; + if (process_priority < min_prio) { + min_prio = process_priority; + } + + if (!broadcast) { + break; + } + } + + irq_restore(irqstate); + if (min_prio <= THREAD_PRIORITY_MIN) { + sched_switch(min_prio); + } +} + +void cond_signal(condition_t* cond) +{ + _cond_signal(cond, false); +} + +void cond_broadcast(condition_t* cond) +{ + _cond_signal(cond, true); +} diff --git a/core/include/condition.h b/core/include/condition.h new file mode 100644 index 000000000000..01645719f243 --- /dev/null +++ b/core/include/condition.h @@ -0,0 +1,92 @@ +/* + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @brief Condition variable for thread synchronization + * @ingroup core + * @{ + * + * @file + * @brief RIOT synchronization API + * + * @author Sam Kumar + */ + +#ifndef CONDITION_H_ +#define CONDITION_H_ + +#include + +#include "list.h" +#include "mutex.h" + +#ifdef __cplusplus + extern "C" { +#endif + +/** + * @brief Condition variable structure. Must never be modified by the user. + * This condition variable has Mesa-semantics, so any waiting thread should + * the condition in a loop. + */ +typedef struct { + /** + * @brief The process waiting queue of the condition variable. + * @internal + */ + list_node_t queue; +} condition_t; + +/** + * @brief Static initializer for mutex_t. + * @details This initializer is preferable to mutex_init(). + */ +#define COND_INIT { { NULL } } + +/** + * @brief Initializes a condition variable. + * @details For initialization of variables use CONDITON_INIT instead. + * Only use the function call for dynamically allocated mutexes. + * @param[out] cond pre-allocated condition structure, must not be NULL. + */ +static inline void cond_init(condition_t* cond) +{ + cond->queue.next = NULL; +} + +/** + * @brief Waits on a condition. + * + * @param[in] condition Condition variable to wait on. + * @param[in] mutex Mutex object held by the current thread. + */ +void cond_wait(condition_t* cond, mutex_t* mutex); + +/** + * @brief Wakes up one thread waiting on the condition variable. The thread is + * marked as runnable and will only be scheduled later at the scheduler's whim, + * so the thread should re-check the condition and wait again if it is not + * fulfilled. + * + * @param[in] cond Condition variable to signal. + */ +void cond_signal(condition_t* cond); + +/** + * @brief Wakes up all threads waiting on the condition variable. They are + * marked as runnable and will only be scheduled later at the scheduler's whim, + * so they should re-check the condition and wait again if it is not fulfilled. + * + * @param[in] mutex Mutex object to unlock, must not be NULL. + */ +void cond_broadcast(condition_t* cond); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/core/include/thread.h b/core/include/thread.h index ffdc77dfd05d..804334f0bf34 100644 --- a/core/include/thread.h +++ b/core/include/thread.h @@ -150,6 +150,7 @@ #define STATUS_FLAG_BLOCKED_ANY 6 /**< waiting for any flag from flag_mask*/ #define STATUS_FLAG_BLOCKED_ALL 7 /**< waiting for all flags in flag_mask */ #define STATUS_MBOX_BLOCKED 8 /**< waiting for get/put on mbox */ +#define STATUS_CONDITION_BLOCKED 9 /**< waiting for a condition variable */ /** @} */ /** @@ -157,8 +158,8 @@ * @{*/ #define STATUS_ON_RUNQUEUE STATUS_RUNNING /**< to check if on run queue: `st >= STATUS_ON_RUNQUEUE` */ -#define STATUS_RUNNING 9 /**< currently running */ -#define STATUS_PENDING 10 /**< waiting to be scheduled to run */ +#define STATUS_RUNNING 10 /**< currently running */ +#define STATUS_PENDING 11 /**< waiting to be scheduled to run */ /** @} */ /** @} */ diff --git a/cpu/cortexm_common/include/cpu_conf_common.h b/cpu/cortexm_common/include/cpu_conf_common.h index 8d2d17744e14..ca07e3a2c59b 100644 --- a/cpu/cortexm_common/include/cpu_conf_common.h +++ b/cpu/cortexm_common/include/cpu_conf_common.h @@ -53,7 +53,7 @@ extern "C" { * @{ */ #ifndef ISR_STACKSIZE -#define ISR_STACKSIZE (512U) +#define ISR_STACKSIZE (550U) #endif /** @} */ diff --git a/cpu/sam0_common/periph/cpuid.c b/cpu/sam0_common/periph/cpuid.c index 364bd262c898..130af0c2dac7 100644 --- a/cpu/sam0_common/periph/cpuid.c +++ b/cpu/sam0_common/periph/cpuid.c @@ -36,12 +36,14 @@ void cpuid_get(void *id) { - if ( HAS_FACTORY_BLOCK ) +#if defined(HAS_FACTORY_BLOCK) + if (HAS_FACTORY_BLOCK) { + memset(id, 0, CPUID_LEN); + memcpy(id, fb_eui64, 8); + } else +#endif { - memset(id, 0, CPUID_LEN); - memcpy(id, fb_eui64, 8); - } else { - uint32_t addr[] = { WORD0, WORD1, WORD2, WORD3 }; - memcpy(id, (void *)addr, CPUID_LEN); + uint32_t addr[] = { WORD0, WORD1, WORD2, WORD3 }; + memcpy(id, (void *)addr, CPUID_LEN); } } diff --git a/dist/tools/rethos/rethos b/dist/tools/rethos/rethos new file mode 100755 index 0000000000000000000000000000000000000000..89e1e64d897313fd50fab7d09de737e05baa476e GIT binary patch literal 28712 zcmeHwdw5jU)%TfPAVkOnM2$%0h=T^jm>^(4qBAhy2@Vh;C@tDan9PuoB$Lhs(P|r; z1UZgDY^kOFY)jv&k5${!zJ6R%nu}cJ^`Y@jMI~6pGe!|n5pU%Ct$mp}GtAWY`7Zx_ zkDDj6&sux!wbx#It#$UNhmpq^3 zG7!0)UC4YFM=LUy^=?Ex)<04HOxlXzM#IlR;9O$}xcmLxL9Q^c8AI&2ADfo+z8*i9NL-^?@qS7<~ota`ljXD#p zYy+Qk`05P!*E8@L4!oL%|AQIm-_C$PkfGg88Sn+rKsx#NWuPCOfxaXI{ihk^+?avB zF9ZD(8So=A@R^?h|JMxocQW9^8SrWbJsiwHKQ#ls82wW5mzplfK>uI{{E!UztzevP zd^cvm2Qu*aECb$~0Y8udKRyHAn?au54EU>oSMiscLO`VJ@9_-nKAM4kUk3c%41Dg$ zfWHBJRQ#nTM+W*ofiRu?_hz8CWU#~gGVpm4d`2o`l%5-Fm`L*eq9FbT!xV4J6nACy ze0QBU=w0Ctg}uS*`Li1WP2TF-`q6yQ8w>`5ipSSjyV4u1yjftRTyj;(Rq^Yx zDT&Le@yi7As*=R@f3)*TyF6S{D9()MXpCQV%C@0*{8Jw$B3V5{NKZ0f+ zG#{h#Fyoi4d|$wkF~mPhxeYL-QwwWYPIIa;?27+LKcqP*%|m1=s;Xg^L-7ljjVINF z7pz%Xu?a7xo(x}V!ppfG6_lIs=6Rmogr~M;(oFbx%s^q436BoP(^3;YKaN$D8Wa93 z6TaSrr@4?!%_ck)9#5-H_<}fAQCduRs|ml>gqLGX#p_M@ktX_$CcHTwZ!+P}Hqk$B z!qZ$-rp+e&ITDF{n+boe3EyMFpJ&4FGvSL&_&yWXN|O!$jT_(BtYya``q!e4B{t0uh7gfBMXCz$Z1Cj2EP ze7Olvb1<3gCj3N+M6Q|emznTYCj8|l{8AG>{s@Tj8Wa8s6Mel2f29fEY{HkC@T*Pu zG84YVgr97}uQlPXGU2t>_w%%9ZiQV{wDz8Gu5lKkht|3)Zzm@joH`jrgA=CU*E-IQ zD}w8(FxEeaG~qIWEd=*5cs#)rnqoZ+9z!sNq}XN#k0O{tQEU@~^9iQ#7hBKZEP^Q{ z#abBr#dUzmoMO!kK0z>rpjZuq-y@hpPppc;ZxKu(CuV2xYXnoMiIp<=1%fHW#8d|F zCzwJ@tdPOa5KJK@rZBjZUeaR$0$> zGN-Z^H2t%WwI{oYleV{wm8mwng3D7NBNsFNC>dNP+Af4Al%4EcKNp3hA4ls~S$o=S z8*juk!@6!a{Iu)?;IhLxT6ATR77gW*#G6puNj7gx0z--E$^y!m2U~9M(Vey&>)OAe zz83vqp{5@*{(7On7hBgpPxy|fY=y|77$(Fa{Y_2(uyv=!*fb|N)8B*`L>Xwiya zdsU_ESty|ATDxK_l%=y#+U>a-sD>}mKMy*i>Ae~{rP2Zq1lXy)_^+tTR;=l_*ovaJ zp!~&t&GUKtAtK7#1{%d$xl_~cvK3C>XLa=gJ4x$!*ftvac9C(wqUMx+u$`1h`a2G` zPTlwloocD@%~Uai9HzD2&+ry4dY4TleT{~`96MM~JR}(KMcS|(yQnq+g@#L@^PJHz zS`=ji*ILK5kxYtlIS8Ul6#bRn)8l%Njp-OZx%J3(6F+k4N3|D^YQvspyd2YCaavs` zXb`mNZ-i%Sp1YOS!NEu`F1|91FNnogF8$q9F4~Ll$|jRpME>sR_>slcfet|kvS<={ zPFW9>|AKKZ7-Hb`-PX37(Zgu&6W6Fp>nV%1Z9f8eH1`)0`~Y^``Zc)@e28~=8R}{n zfIO}JVJ5q^y%46(RAyW&3X2kjm!pA`ZLE2Vwf%C~lw-%zMKzG=VP=kHAGn}s8lHi9 z)^SRu=d^L^^T9!)q!i;kE)LsdE{6m^l{-D{`A zy1STuMw~}Q6k>VNHY)l(D|(m(tvc3Zg%pj5h_DnYO{;) z18e+^_zNx?9F1a)V>_u}j&c_*oh`YF4nMK>^Q z@GiRa(?%yEeQYo47|l$SoXFT{ZY8+2_gLE>1U{O3wSZayb^nOM-lLchG5HZxa-{m$ zm>KgRKL!jT(7J97+(!-=NkDERso9We`fJcG4O5n;Kg=ZSsJHm^r}!u#XM=Pu@k~2N z=-}?{@Op$?;zai)#dx=XjnhH5d7OR<9yCtv7g8@4PD3vaW2}YUp&!Mv1>TRJKpW_X zb=?t6CdvEB&RiXj*hUg{c*2l*a&TYYebxD+gX>m>_> z-N?^k(^xcbvlWv_6_-K^>%G6l+}OGH=^Q3zYmbFy7F13wed~SMZR{SSKjX{ZU^^Fh z>z3obUL((PoEo9qk9Es_U+;T)mi<1RQw-b(el`_FLL{1f(+q;xQX|^q8#y6 z>Fv++4QsPq2ohGj#?&RKf^~@sW4A#S#D8ppKaD4m#}=T>y7g%;C%lTpLo>wtEUpp# zBVTVUPwL0=k#)-n62A9Hp5?@_{l4ja;oF&=O8fU&1GzNY?4&-FofHD&8sqm;?z*XJ ze2}aYbAJrb=)~}M&wwWzEg!?{5Tp?;D{@c-5f60!0`9=VA|lyuKg9%zy%jJt|6iZuAq{zCZjj3X0uC=Wk5oKnMwQU6yLTVs= z$&PfAlELVguu6+@4um8}BMR%*`)$7h)`y8jTP0XnxAu{O2-mfCrd`orE$f;15B+l% znp$HP&GW2g8S&F8Du(`kCbN>y0Yf&I$ zVLoe^-@Lvd7Q$yU$HIK#nK>3>Ad+I?A>3A=tmnRGj)iN$gT+D>K{56rZoqQ>moTCY zEcC*ItZl29{q65k(P#8scDw&0R1UEaRPjj}?&{l6Ue?3B!T1@Ph`Ru{^Hu|MpPZL9 z8cG<|nsTbznp->>;~b-TXLmnl7JGB|ffpsidK?7N5&eL*=sC|L>nfj-P3mA{6x~#- zsC+T>;PO1pJc(|u@Ftgj!f>Ji*YxAz%Z>fVSW}BZj0cPy_z{LTLhQ`GG#bUyfn)Pv znvy5@vGF6OAA%JzeZ`1yrl+BBHunM7fycY|K`F+4D2jDplJkvo3YUJ2Pb&-`3rF07 zx{gcLB0~NV-H6%23^Mm#iGk63JBu-~krDJ(2(Tx24N8>Q2V_C;v|Z4RFge6K_T*`J zX#k=XCvO&3-9j_5%|J4%K8)#6_Zy_rugU3Xo(19?MSF^=y$?v9Hu9+Gh%%ISe~DV1 zMRHt>zoVSBZTt*Z+wEwk^Fl=4u5QGw?$LBt7z6!uFQqB*dNkHOnwnihSEYozi>~tM zDo9tCQz@-(8NdGk)hY@6E)_IGC}Sb6d?j&LB)Q8zsMMeTzNWvWwZ474s=9J-PYYVs z_VyOCY;XT4)O#1aKH>9^ZGXP8Mp>d=@i;QA^;ChTzq9IsZRihMt{Zu4Hx&%_gSd^5 z%B?$Tl`hd}7nLv3?m5O7^q$TJ15H1!^%~b{mOa|@r^DyOxt%4s;W_BmoxJ^)ijS^P z@lZZ;2`aMnid*;Oz6q4&W7bl4;Q|uuIwg`j@H5nlGy8}t4EEg`yA${Pggnu?MObFM z2oHOa(R*{BAV!dy8sqYREZW_h`%_s4u8I0`{e>1^zQ*EZade!0ZP+%_*Ly5)ZMChC zfKi<^;pnQv;Od9>IBjQ}mt~x`BG~Qj0hmbZsjTosV;(Ny>Mug)FsiR6F<00BndE&B zq0qVo?5$gzwvn!m2D)>#^L8z`g}(3}Yj+uMw~e=(dJFvyN-ObmA6m9MtrG4eMK*W23UQPMd{46zgw7mLQkK0+oIiV_j=OYf$MQF6Syu z7G*XSVsc$M{q@LpEs9~mQzu_Li&(J$U2J;NH0FXgw?_>7m&PxMKb~hI=Wnkf4{o91 zW*)o`USupKN%X@;3zW^B{V`ne`m}bp9nnl#vKUKv4*gU8WmwTt)4Cm2m&$rH{asBz zip80|M9c0GZ-Fa4Lfyw;JkY&sJODYI^lbO8@d-+!IW}Bkd1^S-L5{$JIi1sX9{rq~ zJK^-d@Fx_oSb&_9D}?Xthm) zZbq{w^2pWD%~Gx#IH1yffO`B8Xt%-71_z!O&p+Kq2>+uve!GzcVSVcz)k|3S7wca; z^iR!!su|tgZ9*4eb^^F*O zEKV8=-X#mhvL8A&oatXb7@3Ezm9nlu|7=wh%bjEcg`ghzBe%2eiPXQ-;5g95`g<+G z#{lbxo%+9ww~6)G^jc?`Aahg5_c2#Z5V-d{>x4Vc%0iOd-2ySa#HkGjBQ+0%b zi;Y7^$5NF{ zuP*~Zz2Z8Pwg#(u@5AkRhG&M*6 zib4;dSJC13^DH~NM`HjTmY1~lXR+E2)f}+4{Rt>OFK9hITGKz*taA@&6Ax&XUbzm1 zCui%HszJ+(4$JG>^nH+Rri}H}yXhADqh+q>BwMAvXTIea zjbkF14;n4#7gS5ZJePNI9kZ8;t(UMaI`z+!qHh&Sc`!dl!Tj!|U_NM+aduf?x8-f7 zvFkXy`ttS245QH1;mHyad!#91_n;Lf|3qUB zZ}0#OTD0P`3F#vCgKR`i5u3b1xI0Dca&yGq0EVP5dRtPdpM+ai>Mt+0ZoSuaG2Br9 za_s|E?BW+okva6|w0ri_q6=Fr3cT;r1ILVupbw1Ab9@d;3qvmbJ(rI7cz93iTD-te zj05zH7Om)>FQ$!b0q+jf!?ZCxOv=?|hvh7G|Iwv1xD4w%aI3wV%|i&f}UsgJMbcX&*6F&1V+MfzUj?-&N?BC-_0><75}XwhZGTKl2MJ`K+z zdB$n7fM~@~o2ty$4VQkT^B#0XPTn`6l-*UxA1Su;5Ez+jx&?7QWUsn0@M_uMz>{co z?^}%R=8IJfb#G_0iKk2Q9C*TL2H9fAHj3Guc4APz_t_9(dV!4b>9!&BZjp zBvI0EVG4=y=Hre*H_pWZMM8)~U-sg?k@;ps-7qE!xn?G6n8nT)#oy5nrie|qekQgP zH%2D+Z!K zL_Y|ZC3)mDNa&x>gN~x!zyXSnkoN}apOecDM%K1scHJ@o*X=KnW$hRN{;{huzgrLb z4I@GGHq%7Szs|+=T6S%pWd;2sj6M&2jtwPxKF?Z(_F~HjcI?QdvH_5>_~F=5K=2_6 zga0!Y=#R9UIH4XvO*6`Lkpg7V?4oLY;OnadqJ@`+zm@^kDMO7IU8u!Z22 z33vp-%M$Qe1mBQ=hY@^D0?r}$@&t_69ZKwi1YAJySrUewO&q6UP-EtIRDXji292J> zkbVv*jiYbK=L5_wARz^H*Z*1#sD+j~@$9()3p}M7uw*n-E?L zBfxMPYx~nwQ+R6&u5o*KLF-s8ZWUH%AL<(YxFIECz&AvVk6-7(t|U>z95r4N#a8`b zEP&3*@#1k})teNMM~u7h%sBBRiddY8m6J|h2Oqpuh%Z{O`xluXyML9v;LtyEY}*Ru zVB-VXUDrzdB*n(nbpK=T@U!7Z?2hWwhugLjp-;a#hr5-Sh8#Bjb_k-u$rhvGvKPAO z_R80@I_k5HCJ0Bj^1F0IQd+ZyCmWxm1}zJ{7l%I9=AZ{Qs9-Qsgm<0PH$H#Z+1*3&v=s|*MiywVBAJmuhSK+Uv~G>)lIk(&zrPgL?0wIx;iGjS)^hyrsE3AF&{XIIgxAP zJFu@jt@rC}Bk&q@(f&bLEjuK~k575Hk3Kbj48&%qA>|Y)UW0&|s>_R5|m37`#S2lUWSB4`^ zd{1+6^^ygP=TukDojXyP9f&m4sZD{f3O;JkySmXIR@ugJWpXK!dD}JbL869-Zw050 zUOFK(V?yZCeA6~+(d~R}fo`55`hDeuI#Y}2hsrmjtGSk;}_ts6!H?jBBHhH`a$r598ISo_LU)!Jx-DA^xNj|qF^`Z)O&dm##xSX?G z3uoV;EGD5@gF+e-ad~Z>z!it6d;!Fm$ZU8Hv&7r$VX9;btp8#JuqPhvz<4QfI@;n+ z9TU>P#QYvFGw{l$RkaQNIK$2jPsIE z?F#P<3=lJsnc>>zYt@-t9oMSMWbbd~N<$5mrzCpvjG3^oFlsahf?=Yr_`jTl8k9v| zkJrD-i-6P+;U*9w)rtYcH-Rg!p+4ST8>|yVsfy;v!>C$TEpt z2Yf0l&{MD8j*ny1I&YXc2FV$&C$nUxPqOAKjwTfY>L<&DJ0uv1s^odzx=CtX#7pt1 z0bil0;9je8t0LQ_uT;DtVb;tiBTdp5Rd2{s+e}UVAmW85!i8qdnXq(*S|?rF6A1?6 z9)=!~cab-##R)Ex$UK6zO(6tTIJhc(9-f|ozAiwLAsp8uXO5ZRD`iXNn(%SSSGJQZ z3%(wr@By!<=tQ)Xh<{zAxxtT5XuPI)K}z?)_?r-gCe*1sTu`(y)l0PeP1L-dMyMNJ zlT18G47oqVZbKrlQMicfTGV4L^P3)h*+ZJ*x5?UO*a}6U5-2=WG2u>szKByvboj2v zABdBbs7`+fpIi|eDrr)EktX&{58_w&j*=1tRwVu%k15`g^hwrER9J^#-}_*YJq>=+ zv9PY>zJlZIe^ad1`9tyk3q5o0iQoS@*3N(-B7g4!4*!?P9~1d3k)J2>@q(UxT9ja? zc39-6rIlk<+TfW~_alLSBU>?NU5b zS7FtjcF?PMm$MD|X5_S^w;ws}pgoSf7`cKsm3HJR@}8ITq$W`owI*5Eda#eJAG}uwH+_^}xtS+=%Fnnm_h(M(D z^on9bInnR?$KarXwv~dyxdlb@tY_Vx*P>iE=9+$9k(hGfy+>d(CLH=*lOLO*&R#0bQL43X@F54grE68}k{ ze;htZ_QK`3xuEoR&Yn|od#ncCdid;&O#NL_u-TIJdfNVyeQZGe8u(oqR!Gv?2gXzQ z&Aft5?^&{AIT7<>WyRg^rDkCCt_A1h&ze|mZ_K$jb{?3CfO51zISwX+M_qCu%A z%Z^_~k;@yjRh|-m3czy$2NinvW=@zY^Hf#Cayi#!U%CTB&pDLXc0%N|UO-7Z%g%C% zFA}iqS1BvSK0%So_VLz|_a~p%$9pRV%XaY?!Qc{6E~@`O&*V9FsSOL>VBl3qt z{+P(Oh9o@6sXS8QY37L|^5g!Qj6X|}aVi-lQS3TfIa`UJS7|EEMwH4E@zaFHXq}_bHviO=t(=Q< zEYso7i=TI7u9HpCvnn0^`O5km)8P~6lqK_#(LaHu`$~@T5pBRvN_1bbq>-QQCt1*c zRmy#eI1_$-3cf_(Q}1iEPGZ4{PWy!X^8U7%;m_3Por2Ht6#i=%{h8zPDDdg}MH^m8 zPCD>`(pBtNQFb6Fe2u`fHB{hU!k-29Lmx#@lJ|)txF$S(Q9w!J-^VrKX@5PXQS2A@ zO>~g(^yLgCc^{>Ir<3zSMn6g^cG`(No+dGTdb>);ht|&(*lntP()W;-G|F`U4rSnTECb#GdrPOEvoqi)15e{fr?*hbW50@W9q=^nsdnpP zdin`|P5#ULleqnC_S^7%`KD&uN>wINnihaJ2f&Lw$ zN6+Ma93V@dF!}<8J_DrWWxt522(an&>4ZHOLQYNSO^_>d1RwQcJ44BLc17Sr{i5S^ zC`mor7#{MZ*v~Z9F6>$8S-6+75O``g)gPBJdd52GCn)%oia5#M#eias!1s&!4|`_; zd^7`}M>F8J3O zp8-!NouwP!yMR|2=j5OD8R(y2c-YSsc2+@!eNft(f&RaM9|`~3B_3ZosCcx{|A>mQPJjmrSfbQ-S-a)UCQL>k{c z%8&LHd(#x0_}f&+YnEXDNGOcWC(SqsR-7iwSSn~u(4dv7MRTgFzw4g6WWns}%7qKu z?viO`I9oPt!IaX;D44s*F@KJG&H^VLO3d^lual(2dArW$T} zo}&qFnsbrjzTUNPmc!*LJ33QmQGAHcm$VyYrh-0mlj_)6==g$(Z>0!71qY zA;f3kF~+p}&CPfCh7@*!a59zKTZe5t5E)xp&1VZIQ}LsflX0o%G$%@@D9!wUB^e06gJr@f=RKV}3!{W-ZBHaEL# z8?gH~&4wm+kaIFQJE%DsMO(#^F=rgfoLtN|OLM`X1JXTrIEA|@_ewhcS~#HTr04`E zGC^2_Jmxx4!1a`L7`SNB=MVBDvT@np_)PPv6jd^>$Ib$mm8GTO|fDKTIabCLC*%0dM8s3m?L#RE-O(7XU5*?l(zCo?Ug3y*Z?IV@!PZ=FiDOpf z72(uEdiJ@Kk=K69yJS+sVNkWmQ)^`Gr?VAB9|HdpkquiKMACEOG z$$6&$*?-SN@3dt7o-8{cdjG{T<&J*ZEZ^j|c zLtazudIu(9xvsxaytr!-^+|rBk@5?)LhG>9 zMOk02|I-P5luCIPU*DAVW&hhyPW5LAI{6+??&p*5{YZYQlUjcraAZ>?A0_!dVY8LP zSlLQSVmRVB9arZ1^1VZ!sLuu`PEF73Jjr}RYJKq>-X|WOq`ZsV~=q<^Pkl zD}_F_{qB_d@;%G8>v+X*N`+MYJqxk|{E-i_B;TVf;9x3!D*Q6YXsBgiQG zb;*8YeffT;@V{CADC&~!%KGv>konC>x}DfTuSy| Y#y@hebnP#7b8gwP0tZ=p;*w?m3rY?HdjJ3c literal 0 HcmV?d00001 diff --git a/drivers/at86rf2xx/at86rf2xx.c b/drivers/at86rf2xx/at86rf2xx.c index 1a572aeceb1f..12db235726d0 100644 --- a/drivers/at86rf2xx/at86rf2xx.c +++ b/drivers/at86rf2xx/at86rf2xx.c @@ -111,15 +111,15 @@ void at86rf2xx_reset(at86rf2xx_t *dev) tmp |= (AT86RF2XX_TRX_CTRL_0_CLKM_CTRL__OFF); at86rf2xx_reg_write(dev, AT86RF2XX_REG__TRX_CTRL_0, tmp); - /* AUTO_CSMA */ -#if AUTO_CSMA_EN + /* This is set whether HARDWARE_CSMA_EN is 1 or 0, because we configure the + * backoff to be 0 when HARDWARE_CSMA_EN is 0 and then do our own backoff + * manually in software. + */ at86rf2xx_set_option(dev, AT86RF2XX_OPT_CSMA, true); +#if HARDWARE_CSMA_EN + at86rf2xx_set_max_retries(dev, HARDWARE_MAX_FRAME_RETRIES); #else - at86rf2xx_set_option(dev, AT86RF2XX_OPT_CSMA, false); - /* CCA setting for manual CSMA */ - tmp = at86rf2xx_reg_read(dev, AT86RF2XX_REG__PHY_CC_CCA); - tmp |= AT86RF2XX_PHY_CC_CCA_DEFAULT__CCA_MODE; - at86rf2xx_reg_write(dev, AT86RF2XX_REG__PHY_CC_CCA, tmp); + at86rf2xx_set_max_retries(dev, 0); #endif /* enable interrupts */ @@ -179,19 +179,6 @@ void at86rf2xx_tx_exec(at86rf2xx_t *dev) /* write frame length field in FIFO */ at86rf2xx_sram_write(dev, 0, &(dev->tx_frame_len), 1); -#if AUTO_CSMA_EN -#else - while(!at86rf2xx_cca(dev)) { - at86rf2xx_set_state(dev, AT86RF2XX_STATE_RX_AACK_ON); /* Listening during backoff */ - xtimer_usleep((rand()%(2^BE))*320); - at86rf2xx_set_state(dev, AT86RF2XX_STATE_TX_ARET_ON); - printf("CCA busy %u\n", (2^BE)*320); - if (BE < MAX_BE) { - BE++; - } - } -#endif - /* trigger sending of pre-loaded frame */ at86rf2xx_reg_write(dev, AT86RF2XX_REG__TRX_STATE, AT86RF2XX_TRX_STATE__TX_START); diff --git a/drivers/at86rf2xx/at86rf2xx_getset.c b/drivers/at86rf2xx/at86rf2xx_getset.c index 761f97825202..61c6855df43c 100644 --- a/drivers/at86rf2xx/at86rf2xx_getset.c +++ b/drivers/at86rf2xx/at86rf2xx_getset.c @@ -27,6 +27,7 @@ #include "at86rf2xx_internal.h" #include "at86rf2xx_registers.h" #include "periph/spi.h" +#include "pm_layered.h" #define ENABLE_DEBUG (0) #include "debug.h" @@ -344,8 +345,13 @@ void at86rf2xx_set_option(at86rf2xx_t *dev, uint16_t option, bool state) "(4 retries, min BE: 3 max BE: 5)\n"); /* Initialize CSMA seed with hardware address */ at86rf2xx_set_csma_seed(dev, dev->netdev.long_addr); - at86rf2xx_set_csma_max_retries(dev, 4); - at86rf2xx_set_csma_backoff_exp(dev, 3, 5); +#if HARDWARE_CSMA_EN + at86rf2xx_set_csma_max_retries(dev, HARDWARE_CSMA_MAX_TRIES - 1); + at86rf2xx_set_csma_backoff_exp(dev, HARDWARE_CSMA_MIN_BACKOFF_EXP, HARDWARE_CSMA_MAX_BACKOFF_EXP); +#else + at86rf2xx_set_csma_max_retries(dev, 0); + at86rf2xx_set_csma_backoff_exp(dev, 0, 0); +#endif break; case AT86RF2XX_OPT_PROMISCUOUS: DEBUG("[at86rf2xx] opt: enabling PROMISCUOUS mode\n"); @@ -490,8 +496,13 @@ void at86rf2xx_set_state(at86rf2xx_t *dev, uint8_t state) /* Go to SLEEP mode from TRX_OFF */ gpio_set(dev->params.sleep_pin); dev->state = state; + /* Allow CPU to go to the full sleep mode */ + pm_unblock(PM_NUM_MODES-1); } else { _set_state(dev, state, state); + /* Prevent CPU from going to the full sleep mode */ + if (old_state == AT86RF2XX_STATE_SLEEP) + pm_block(PM_NUM_MODES-1); } } diff --git a/drivers/at86rf2xx/at86rf2xx_netdev.c b/drivers/at86rf2xx/at86rf2xx_netdev.c index 8ba52cbb642d..bab68e9f6897 100644 --- a/drivers/at86rf2xx/at86rf2xx_netdev.c +++ b/drivers/at86rf2xx/at86rf2xx_netdev.c @@ -555,6 +555,13 @@ static void _isr(netdev_t *netdev) * there are none */ assert(dev->pending_tx != 0); if ((--dev->pending_tx) == 0) { +#if DUTYCYCLE_EN +#if LEAF_NODE + if (trac_status == AT86RF2XX_TRX_STATE__TRAC_SUCCESS_DATA_PENDING) { + dev->idle_state = AT86RF2XX_STATE_RX_AACK_ON; + } +#endif +#endif at86rf2xx_set_state(dev, dev->idle_state); DEBUG("[at86rf2xx] return to state 0x%x\n", dev->idle_state); } diff --git a/drivers/include/rethos.h b/drivers/include/rethos.h new file mode 100644 index 000000000000..548a2e777881 --- /dev/null +++ b/drivers/include/rethos.h @@ -0,0 +1,280 @@ +/* + * Copyright (C) 2016 Michael Andersen + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License v2.1. See the file LICENSE in the top level directory for more + * details. + */ + +/** + * @defgroup drivers_ethos rethos + * @ingroup drivers_netdev + * @brief Driver for the Really EveryTHing over-serial module + * @{ + * + * @file + * @brief Interface definition for the rethos module + * + * @author Michael Andersen + */ + +#ifndef RETHOS_H +#define RETHOS_H + +#include "kernel_types.h" +#include "periph/uart.h" +#include "net/netdev2.h" +#include "tsrb.h" +#include "mutex.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* rethos is a drop in replacement for ethos, so shares some of its + symbols. The difference between an ETHOS and RETHOS frame is as follows: + +A RETHOS frame looks like: + .. frame .. + +a RETHOS implementation may drop frames whose types it does not recognize, or +frames on channels that have no registered listener + +Note that it is illegal for any character in the preamble to be 0xBE (the escape) +so that implies that even the preamble needs to be escaped when written +*/ + +/* if using ethos + stdio, use UART_STDIO values unless overridden */ +#ifdef USE_ETHOS_FOR_STDIO +#include "uart_stdio.h" +#ifndef ETHOS_UART +#define ETHOS_UART UART_STDIO_DEV +#endif +#ifndef ETHOS_BAUDRATE +#define ETHOS_BAUDRATE UART_STDIO_BAUDRATE +#endif +#endif + +#ifndef RETHOS_TX_BUF_SZ +#define RETHOS_TX_BUF_SZ 2048 +#endif + +#ifndef RETHOS_RX_BUF_SZ +#define RETHOS_RX_BUF_SZ 2048 +#endif + +/** + * @name Escape char definitions + * @{ + */ +#define RETHOS_ESC_CHAR (0xBE) +/* This means that a stream of ESC_CHAR still keeps us inside the escape state */ +#define RETHOS_LITERAL_ESC (0x55) +#define RETHOS_FRAME_START (0xEF) +#define RETHOS_FRAME_END (0xE5) + +#define RETHOS_FRAME_TYPE_DATA (0x1) + +#define RETHOS_FRAME_TYPE_HB (0x2) +#define RETHOS_FRAME_TYPE_HB_REPLY (0x3) + +/* Sam: I am going to remove this because I don't use it at all. +#define RETHOS_FRAME_TYPE_SETMAC (0x4) +*/ + +#define RETHOS_FRAME_TYPE_ACK (0x4) +#define RETHOS_FRAME_TYPE_NACK (0x5) + +#define RETHOS_CHANNEL_CONTROL 0x00 +#define RETHOS_CHANNEL_NETDEV 0x01 +#define RETHOS_CHANNEL_STDIO 0x02 + + +/* Retransmit interval in microseconds. */ +#define RETHOS_REXMIT_MICROS 100000L + +/** @} */ + +/** + * @brief enum describing line state + */ +typedef enum { + SM_WAIT_FRAMESTART, + SM_WAIT_TYPE, + SM_WAIT_SEQ0, + SM_WAIT_SEQ1, + SM_WAIT_CHANNEL, + SM_IN_FRAME, + SM_WAIT_CKSUM1, + SM_WAIT_CKSUM2, + SM_IN_ESCAPE +} line_state_t; + +struct _rethos_handler; + +typedef struct _rethos_handler rethos_handler_t; + +/** + * @brief ethos netdev2 device + * @extends netdev2_t + */ +typedef struct { + netdev2_t netdev; /**< extended netdev2 structure */ + uart_t uart; /**< UART device the to use */ + uint8_t mac_addr[6]; /**< this device's MAC address */ + uint8_t remote_mac_addr[6]; /**< this device's MAC address */ + + line_state_t state; /**< Line status variable */ + line_state_t fromstate; /**< what you go back to after escape */ + // size_t framesize; /**< size of currently incoming frame */ + // unsigned frametype; /**< type of currently incoming frame */ + // size_t last_framesize; /**< size of last completed frame */ + mutex_t out_mutex; /**< mutex used for locking concurrent sends */ + + tsrb_t netdev_inbuf; /**< ringbuffer for incoming netdev data */ + size_t netdev_packetsz; + + uint8_t rx_buffer [RETHOS_RX_BUF_SZ]; + size_t rx_buffer_index; + uint8_t rx_frame_type; + uint16_t rx_seqno; + uint8_t rx_channel; + uint16_t rx_cksum1; + uint16_t rx_cksum2; + uint16_t rx_actual_cksum; //The data + uint16_t rx_expected_cksum; //The header + + rethos_handler_t *handlers; + + uint32_t stats_rx_cksum_fail; + uint32_t stats_rx_bytes; + uint32_t stats_rx_frames; + + uint32_t stats_tx_bytes; + uint32_t stats_tx_frames; + uint32_t stats_tx_retries; + + // uint8_t txframebuf [RETHOS_TX_BUF]; + uint16_t txseq; + // uint16_t txlen; + uint16_t flsum1; + uint16_t flsum2; + + /* State for retransmissions. */ + uint16_t rexmit_seqno; + uint8_t rexmit_channel; + size_t rexmit_numbytes; + uint8_t rexmit_frame[RETHOS_TX_BUF_SZ]; + bool rexmit_acked; + + bool received_data; + uint16_t last_rcvd_seqno; +} ethos_t; + +struct _rethos_handler { + struct _rethos_handler *_next; + void (*cb)(ethos_t *dev, uint8_t channel, const uint8_t *data, uint16_t length); + uint8_t channel; +}; + +/** + * @brief Struct containing the needed configuration + */ +typedef struct { + uart_t uart; /**< UART device to use */ + uint32_t baudrate; /**< baudrate to UART device */ + uint8_t *buf; /**< buffer for incoming packets */ + size_t bufsize; /**< size of ethos_params_t::buf */ +} ethos_params_t; + +/** + * @brief Setup an ethos based device state. + * + * The supplied buffer *must* have a power-of-two size, and it *must* be large + * enough for the largest expected packet + enough buffer space to buffer + * bytes that arrive while one packet is being handled. + * note that rethos needs a bigger buffer than ethos because it buffers up to TWO frames + * + * E.g., if 1536b ethernet frames are expected, 4096 is probably a good size for @p buf. + * + * @param[out] dev handle of the device to initialize + * @param[in] params parameters for device initialization + */ +void ethos_setup(ethos_t *dev, const ethos_params_t *params); + +void rethos_rexmit_callback(void* arg); + +/** + * @brief send frame over serial port using ethos' framing + * + * This is used by e.g., stdio over ethos to send text frames. + * + * @param[in] dev handle of the device to initialize + * @param[in] data ptr to data to be sent + * @param[in] len nr of bytes to send + * @param[in] frame_type frame channel to use + */ +void ethos_send_frame(ethos_t *dev, const uint8_t *data, size_t len, unsigned channel); + +/** + * @brief send frame over serial port using ethos' framing + * + * This is used by e.g., stdio over ethos to send text frames. + * + * @param[in] dev handle of the device to initialize + * @param[in] data ptr to data to be sent + * @param[in] len nr of bytes to send + * @param[in] frame_type frame channel to use + */ +void rethos_send_frame(ethos_t *dev, const uint8_t *data, size_t len, uint8_t channel, uint8_t frame_type); + +void rethos_rexmit_data_frame(ethos_t* dev); + +void rethos_send_ack_frame(ethos_t* dev, uint16_t seqno); + +void rethos_send_nack_frame(ethos_t* dev); + +/** + * @brief send frame over serial port using ethos' framing + * + * This is used by e.g., stdio over ethos to send text frames. + * + * @param[in] dev handle of the device to initialize + * @param[in] data ptr to data to be sent + * @param[in] thislen nr of bytes to send on this invocation + * @param[in] frame_type frame type to use + */ +void rethos_start_frame(ethos_t *dev, const uint8_t *data, size_t thislen, uint8_t channel, uint8_t frame_type); + +/** + * @brief send frame over serial port using ethos' framing + * + * This is used by e.g., stdio over ethos to send text frames. + * + * @param[in] dev handle of the device to initialize + * @param[in] data ptr to data to be sent + * @param[in] thislen nr of bytes to send on this invocation + */ +void rethos_continue_frame(ethos_t *dev, const uint8_t *data, size_t thislen); + +/** + * @brief send frame over serial port using ethos' framing + * + * This is used by e.g., stdio over ethos to send text frames. + * + * @param[in] dev handle of the device to initialize + * @param[in] data ptr to data to be sent + * @param[in] thislen nr of bytes to send on this invocation + * @param[in] frame_type frame type to use + */ +void rethos_end_frame(ethos_t *dev); + + + +void rethos_register_handler(ethos_t *dev, rethos_handler_t *handler); + +#ifdef __cplusplus +} +#endif +#endif /* RETHOS_H */ +/** @} */ diff --git a/drivers/rethos/Makefile b/drivers/rethos/Makefile new file mode 100644 index 000000000000..48422e909a47 --- /dev/null +++ b/drivers/rethos/Makefile @@ -0,0 +1 @@ +include $(RIOTBASE)/Makefile.base diff --git a/drivers/rethos/rethos.c b/drivers/rethos/rethos.c new file mode 100644 index 000000000000..34f5d4266476 --- /dev/null +++ b/drivers/rethos/rethos.c @@ -0,0 +1,611 @@ +/* + * Copyright (C) 2016 Michael Andersen + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup driver_ethos + * @{ + * + * @file + * @brief A re-implementation of ethos (originally by Kaspar Schleiser) + * that creates a reliable multi-channel duplex link over serial + * + * @author Michael Andersen + * + * @} + */ + +#include +#include +#include + +#include "random.h" +#include "rethos.h" +#include "periph/uart.h" +#include "tsrb.h" +#include "irq.h" + +#include "net/netdev2.h" +#include "net/netdev2/eth.h" +#include "net/eui64.h" +#include "net/ethernet.h" + +#include + +#ifdef USE_ETHOS_FOR_STDIO +#include "uart_stdio.h" +#include "isrpipe.h" +extern isrpipe_t uart_stdio_isrpipe; +#endif + +#define ENABLE_DEBUG (0) +#include "debug.h" + +static void _get_mac_addr(netdev2_t *dev, uint8_t* buf); +static void ethos_isr(void *arg, uint8_t c); +static const netdev2_driver_t netdev2_driver_ethos; + +static const uint8_t _esc_esc[] = {RETHOS_ESC_CHAR, RETHOS_LITERAL_ESC}; +static const uint8_t _start_frame[] = {RETHOS_ESC_CHAR, RETHOS_FRAME_START}; +static const uint8_t _end_frame[] = {RETHOS_ESC_CHAR, RETHOS_FRAME_END}; + +xtimer_t rexmit_timer; + +void rethos_send_frame_seqno_norexmit(ethos_t *dev, const uint8_t *data, size_t len, uint8_t channel, uint16_t seqno, uint8_t frame_type); +void rethos_start_frame_seqno_norexmit(ethos_t* dev, const uint8_t* data, size_t thislen, uint8_t channel, uint16_t seqno, uint8_t frame_type); + +static void fletcher16_add(const uint8_t *data, size_t bytes, uint16_t *sum1i, uint16_t *sum2i) +{ + uint16_t sum1 = *sum1i, sum2 = *sum2i; + + while (bytes) { + size_t tlen = bytes > 20 ? 20 : bytes; + bytes -= tlen; + do { + sum2 += sum1 += *data++; + } while (--tlen); + sum1 = (sum1 & 0xff) + (sum1 >> 8); + sum2 = (sum2 & 0xff) + (sum2 >> 8); + } + *sum1i = sum1; + *sum2i = sum2; +} + +static uint16_t fletcher16_fin(uint16_t sum1, uint16_t sum2) +{ + sum1 = (sum1 & 0xff) + (sum1 >> 8); + sum2 = (sum2 & 0xff) + (sum2 >> 8); + return (sum2 << 8) | sum1; +} + +void ethos_setup(ethos_t *dev, const ethos_params_t *params) +{ + dev->netdev.driver = &netdev2_driver_ethos; + dev->uart = params->uart; + dev->state = SM_WAIT_FRAMESTART; + dev->netdev_packetsz = 0; + dev->rx_buffer_index = 0; + dev->handlers = NULL; + dev->txseq = 0; + dev->stats_tx_frames = 0; + dev->stats_tx_retries = 0; + dev->stats_tx_bytes = 0; + dev->stats_rx_frames = 0; + dev->stats_rx_cksum_fail = 0; + dev->stats_rx_bytes = 0; + + dev->rexmit_acked = true; + dev->received_data = false; + + tsrb_init(&dev->netdev_inbuf, (char*)params->buf, params->bufsize); + mutex_init(&dev->out_mutex); + + uint32_t a = random_uint32(); + memcpy(dev->mac_addr, (char*)&a, 4); + a = random_uint32(); + memcpy(dev->mac_addr+4, (char*)&a, 2); + + dev->mac_addr[0] &= (0x2); /* unset globally unique bit */ + dev->mac_addr[0] &= ~(0x1); /* set unicast bit*/ + + rexmit_timer.callback = rethos_rexmit_callback; + + uart_init(params->uart, params->baudrate, ethos_isr, (void*)dev); + + //TODO send mac address + // + // uint8_t frame_delim = ETHOS_FRAME_DELIMITER; + // uart_write(dev->uart, &frame_delim, 1); + // ethos_send_frame(dev, dev->mac_addr, 6, ETHOS_FRAME_TYPE_HELLO); +} + +static void sm_invalidate(ethos_t *dev) +{ + dev->state = SM_WAIT_FRAMESTART; + dev->rx_buffer_index = 0; +} +static void process_frame(ethos_t *dev) +{ + /* Sam: Michael, I have no idea what you're doing here. + if (dev->rx_frame_type == RETHOS_FRAME_TYPE_SETMAC) + { + memcpy(&dev->remote_mac_addr, dev->rx_buffer, 6); + rethos_send_frame(dev, dev->mac_addr, 6, RETHOS_CHANNEL_CONTROL, RETHOS_FRAME_TYPE_SETMAC); + } + */ + if (dev->rx_frame_type != RETHOS_FRAME_TYPE_DATA) + { + /* All ACKs and NACKs happen on the RETHOS-reserved channel. */ + if (dev->rx_channel == RETHOS_CHANNEL_CONTROL) + { + if (dev->rx_frame_type == RETHOS_FRAME_TYPE_ACK) + { + if (dev->rx_seqno == dev->rexmit_seqno) + { + dev->rexmit_acked = true; + xtimer_remove(&rexmit_timer); + } + } + else if (dev->rx_frame_type == RETHOS_FRAME_TYPE_NACK) + { + if (dev->rexmit_acked) + { + /* They've already ACKed the last thing we sent, so either one of NACKs got + * corrupted or one of our ACKs got corrupted. + * + * Sending a NACK here could cause a NACK storm, so instead just ACK the last + * thing we received. + */ + if (dev->received_data) + { + rethos_send_ack_frame(dev, dev->rx_seqno); + } + } + else + { + /* Retransmit the last data frame we sent. */ + rethos_rexmit_data_frame(dev); + } + } + } + + return; //Other types are internal to rethos + } + + dev->received_data = true; + dev->last_rcvd_seqno = dev->rx_seqno; + + /* ACK the frame we just received. */ + rethos_send_ack_frame(dev, dev->rx_seqno); + + //Handle the special channels + switch(dev->rx_channel) { + case RETHOS_CHANNEL_NETDEV: + tsrb_add(&dev->netdev_inbuf, (char*) dev->rx_buffer, dev->rx_buffer_index); + dev->netdev_packetsz = dev->rx_buffer_index; + dev->netdev.event_callback((netdev2_t*) dev, NETDEV2_EVENT_ISR); + break; + case RETHOS_CHANNEL_STDIO: + for (size_t i = 0; i < dev->rx_buffer_index; i++) + { + //uart_stdio_rx_cb(NULL, dev->rx_buffer[i]); + isrpipe_write_one(&uart_stdio_isrpipe, dev->rx_buffer[i]); + } + break; + default: + break; + } + //And all registered handlers + rethos_handler_t *h = dev->handlers; + while (h != NULL) + { + if (h->channel == dev->rx_channel) { + h->cb(dev, dev->rx_channel, dev->rx_buffer, dev->rx_buffer_index); + } + h = h->_next; + } +} + +static void sm_char(ethos_t *dev, uint8_t c) +{ + switch (dev->state) + { + case SM_WAIT_TYPE: + dev->rx_frame_type = c; + fletcher16_add(&c, 1, &dev->rx_cksum1, &dev->rx_cksum2); + dev->state = SM_WAIT_SEQ0; + return; + case SM_WAIT_SEQ0: + dev->rx_seqno = c; + fletcher16_add(&c, 1, &dev->rx_cksum1, &dev->rx_cksum2); + dev->state = SM_WAIT_SEQ1; + return; + case SM_WAIT_SEQ1: + dev->rx_seqno |= (((uint16_t)c)<<8); + fletcher16_add(&c, 1, &dev->rx_cksum1, &dev->rx_cksum2); + dev->state = SM_WAIT_CHANNEL; + return; + case SM_WAIT_CHANNEL: + dev->rx_channel = c; + fletcher16_add(&c, 1, &dev->rx_cksum1, &dev->rx_cksum2); + dev->state = SM_IN_FRAME; + return; + case SM_IN_FRAME: + dev->rx_buffer[dev->rx_buffer_index] = c; + fletcher16_add(&c, 1, &dev->rx_cksum1, &dev->rx_cksum2); + if ((++dev->rx_buffer_index) >= RETHOS_RX_BUF_SZ) { + sm_invalidate(dev); + } + return; + case SM_WAIT_CKSUM1: + dev->rx_expected_cksum = c; + dev->state = SM_WAIT_CKSUM2; + return; + case SM_WAIT_CKSUM2: + dev->rx_expected_cksum |= (((uint16_t)c)<<8); + if (dev->rx_expected_cksum != dev->rx_actual_cksum) + { + dev->stats_rx_cksum_fail++; + //SAM: do nack or something + rethos_send_nack_frame(dev); + } else { + dev->stats_rx_frames++; + dev->stats_rx_bytes += dev->rx_buffer_index; + process_frame(dev); + } + sm_invalidate(dev); + return; + default: + return; + } +} +static void sm_frame_start(ethos_t *dev) +{ + //Drop everything, we are beginning a new frame reception + dev->state = SM_WAIT_TYPE; + dev->rx_buffer_index = 0; + dev->rx_cksum1 = 0xFF; + dev->rx_cksum2 = 0xFF; +} +//This is not quite the real end of the frame, we still expect the checksum +static void sm_frame_end(ethos_t *dev) +{ + uint16_t cksum = fletcher16_fin(dev->rx_cksum1, dev->rx_cksum2); + dev->rx_actual_cksum = cksum; + dev->state = SM_WAIT_CKSUM1; +} +static void ethos_isr(void *arg, uint8_t c) +{ + ethos_t *dev = (ethos_t *) arg; + + if (dev->state == SM_IN_ESCAPE) { + switch (c) { + case RETHOS_LITERAL_ESC: + dev->state = dev->fromstate; + sm_char(dev, RETHOS_ESC_CHAR); + return; + case RETHOS_FRAME_START: + sm_frame_start(dev); + return; + case RETHOS_FRAME_END: + sm_frame_end(dev); + return; + default: + //any other character is invalid + sm_invalidate(dev); + return; + } + } else { + switch(c) { + case RETHOS_ESC_CHAR: + dev->fromstate = dev->state; + dev->state = SM_IN_ESCAPE; + return; + default: + sm_char(dev, c); + return; + } + } +} + +//This gets called by netdev2 +static void _isr(netdev2_t *netdev) +{ + ethos_t *dev = (ethos_t *) netdev; + dev->netdev.event_callback((netdev2_t*) dev, NETDEV2_EVENT_RX_COMPLETE); +} + +static int _init(netdev2_t *encdev) +{ + ethos_t *dev = (ethos_t *) encdev; + (void)dev; + return 0; +} + +static size_t iovec_count_total(const struct iovec *vector, int count) +{ + size_t result = 0; + while(count--) { + result += vector->iov_len; + vector++; + } + return result; +} + +static void _write_escaped(uart_t uart, uint8_t c) +{ + uint8_t *out; + int n; + + switch(c) { + case RETHOS_ESC_CHAR: + out = (uint8_t*)_esc_esc; + n = 2; + break; + default: + out = &c; + n = 1; + } + + uart_write(uart, out, n); +} + +void rethos_rexmit_callback(void* arg) +{ + ethos_t* dev = (ethos_t*) arg; + rethos_rexmit_data_frame(dev); + + xtimer_set(&rexmit_timer, (uint32_t) RETHOS_REXMIT_MICROS); +} + +void _start_frame_seqno(ethos_t* dev, const uint8_t* data, size_t thislen, uint8_t channel, uint16_t seqno, uint8_t frame_type) +{ + uint8_t preamble_buffer[6]; + + dev->flsum1 = 0xFF; + dev->flsum2 = 0xFF; + + preamble_buffer[0] = RETHOS_ESC_CHAR; + preamble_buffer[1] = RETHOS_FRAME_START; + //This is where the checksum starts + preamble_buffer[2] = frame_type; + preamble_buffer[3] = seqno & 0xFF; //Little endian cos im a rebel + preamble_buffer[4] = seqno >> 8; + preamble_buffer[5] = channel; + + dev->stats_tx_bytes += 4 + thislen; + + fletcher16_add(&preamble_buffer[2], 4, &dev->flsum1, &dev->flsum2); + + uart_write(dev->uart, preamble_buffer, 2); + for (size_t i = 0; i < 4; i++) + { + _write_escaped(dev->uart, preamble_buffer[2+i]); + } + + if (thislen > 0) + { + fletcher16_add(data, thislen, &dev->flsum1, &dev->flsum2); + //todo replace with a little bit of chunking + for (size_t i = 0; iuart, data[i]); + } + } +} + +void rethos_start_frame_seqno_norexmit(ethos_t* dev, const uint8_t* data, size_t thislen, uint8_t channel, uint16_t seqno, uint8_t frame_type) +{ + if (!irq_is_in()) { + mutex_lock(&dev->out_mutex); + } + + _start_frame_seqno(dev, data, thislen, channel, seqno, frame_type); +} + +void rethos_start_frame_seqno(ethos_t* dev, const uint8_t* data, size_t thislen, uint8_t channel, uint16_t seqno, uint8_t frame_type) +{ + if (!irq_is_in()) { + mutex_lock(&dev->out_mutex); + } + + /* Store this data, in case we need to retransmit it. */ + dev->rexmit_seqno = seqno; + dev->rexmit_channel = (uint8_t) channel; + dev->rexmit_numbytes = thislen; + memcpy(dev->rexmit_frame, data, thislen); + dev->rexmit_acked = true; // We have a partial frame, so don't retransmit it on a NACK + + _start_frame_seqno(dev, data, thislen, channel, seqno, frame_type); +} + +void ethos_send_frame(ethos_t *dev, const uint8_t *data, size_t len, unsigned channel) +{ + rethos_send_frame(dev, data, len, channel, RETHOS_FRAME_TYPE_DATA); +} + +void rethos_send_frame(ethos_t *dev, const uint8_t *data, size_t len, uint8_t channel, uint8_t frame_type) +{ + rethos_start_frame(dev, data, len, channel, frame_type); + rethos_end_frame(dev); +} + +/* We need to copy this because, apparently, both rethos_send_frame and rethos_start_frame are public... */ +void rethos_send_frame_seqno(ethos_t *dev, const uint8_t *data, size_t len, uint8_t channel, uint16_t seqno, uint8_t frame_type) +{ + rethos_start_frame_seqno(dev, data, len, channel, seqno, frame_type); + rethos_end_frame(dev); +} + +void rethos_send_frame_seqno_norexmit(ethos_t *dev, const uint8_t *data, size_t len, uint8_t channel, uint16_t seqno, uint8_t frame_type) +{ + rethos_start_frame_seqno_norexmit(dev, data, len, channel, seqno, frame_type); + rethos_end_frame(dev); +} + +void rethos_rexmit_data_frame(ethos_t* dev) +{ + rethos_send_frame_seqno(dev, dev->rexmit_frame, dev->rexmit_numbytes, dev->rexmit_channel, dev->rexmit_seqno, RETHOS_FRAME_TYPE_DATA); +} + +void rethos_send_ack_frame(ethos_t* dev, uint16_t seqno) +{ + rethos_send_frame_seqno_norexmit(dev, NULL, 0, RETHOS_CHANNEL_CONTROL, seqno, RETHOS_FRAME_TYPE_ACK); +} + +void rethos_send_nack_frame(ethos_t* dev) +{ + rethos_send_frame_seqno_norexmit(dev, NULL, 0, RETHOS_CHANNEL_CONTROL, 0, RETHOS_FRAME_TYPE_NACK); +} + +void rethos_start_frame(ethos_t *dev, const uint8_t *data, size_t thislen, uint8_t channel, uint8_t frame_type) +{ + uint16_t seqno = ++(dev->txseq); + rethos_start_frame_seqno(dev, data, thislen, channel, seqno, frame_type); +} + +void rethos_continue_frame(ethos_t *dev, const uint8_t *data, size_t thislen) +{ + fletcher16_add(data, thislen, &dev->flsum1, &dev->flsum2); + + /* Check if we're going to overflow the rexmit buffer. */ + if (thislen + dev->rexmit_numbytes > RETHOS_TX_BUF_SZ) + { + /* Just stop transmitting data. The checksum should be corrupt anyway, so + * the other side won't think this was valid. We just need to make sure we + * never retransmit. + */ + dev->rexmit_numbytes = RETHOS_TX_BUF_SZ + 1; + return; + } + + dev->stats_tx_bytes += thislen; + //todo replace with a little bit of chunking + for (size_t i = 0; iuart, data[i]); + } +} + +void rethos_end_frame(ethos_t *dev) +{ + uint16_t cksum = fletcher16_fin(dev->flsum1, dev->flsum2); + uart_write(dev->uart, _end_frame, 2); + _write_escaped(dev->uart, cksum & 0xFF); + _write_escaped(dev->uart, cksum >> 8); + dev->stats_tx_frames += 1; + + /* Enable retransmission and set the rexmit timer */ + if (dev->rexmit_numbytes <= RETHOS_TX_BUF_SZ) + { + dev->rexmit_acked = false; + rexmit_timer.arg = dev; + xtimer_set(&rexmit_timer, (uint32_t) RETHOS_REXMIT_MICROS); + } + + if (!irq_is_in()) + { + mutex_unlock(&dev->out_mutex); + } +} + +static int _send(netdev2_t *netdev, const struct iovec *vector, unsigned count) +{ + ethos_t * dev = (ethos_t *) netdev; + + rethos_start_frame(dev, NULL, 0, RETHOS_CHANNEL_NETDEV, RETHOS_FRAME_TYPE_DATA); + + /* count total packet length */ + size_t pktlen = iovec_count_total(vector, count); + + while(count--) { + size_t n = vector->iov_len; + uint8_t *ptr = vector->iov_base; + rethos_continue_frame(dev, ptr, n); + vector++; + } + + rethos_end_frame(dev); + + return pktlen; +} + +static void _get_mac_addr(netdev2_t *encdev, uint8_t* buf) +{ + ethos_t * dev = (ethos_t *) encdev; + memcpy(buf, dev->mac_addr, 6); +} + +static int _recv(netdev2_t *netdev, void *buf, size_t len, void* info) +{ + (void) info; + ethos_t * dev = (ethos_t *) netdev; + + if (buf) { + if (len < (int)dev->netdev_packetsz) { + DEBUG("ethos _recv(): receive buffer too small.\n"); + return -1; + } + + len = dev->netdev_packetsz; + dev->netdev_packetsz = 0; + + if ((tsrb_get(&dev->netdev_inbuf, buf, len) != len)) { + DEBUG("ethos _recv(): inbuf doesn't contain enough bytes.\n"); + return -1; + } + + return (int)len; + } + else { + return dev->netdev_packetsz; + } +} + +static int _get(netdev2_t *dev, netopt_t opt, void *value, size_t max_len) +{ + int res = 0; + + switch (opt) { + case NETOPT_ADDRESS: + if (max_len < ETHERNET_ADDR_LEN) { + res = -EINVAL; + } + else { + _get_mac_addr(dev, (uint8_t*)value); + res = ETHERNET_ADDR_LEN; + } + break; + default: + res = netdev2_eth_get(dev, opt, value, max_len); + break; + } + + return res; +} + +void rethos_register_handler(ethos_t *dev, rethos_handler_t *handler) +{ + rethos_handler_t *h = dev->handlers; + handler->_next = NULL; + if (h == NULL) { + dev->handlers = handler; + } else { + while (h->_next != NULL) { + h = h->_next; + } + h->_next = handler; + } +} + +/* netdev2 interface */ +static const netdev2_driver_t netdev2_driver_ethos = { + .send = _send, + .recv = _recv, + .init = _init, + .isr = _isr, + .get = _get, + .set = netdev2_eth_set +}; diff --git a/makefiles/pseudomodules.inc.mk b/makefiles/pseudomodules.inc.mk index 0ecf459a027b..c3f6582edbfb 100644 --- a/makefiles/pseudomodules.inc.mk +++ b/makefiles/pseudomodules.inc.mk @@ -2,6 +2,7 @@ PSEUDOMODULES += auto_init_gnrc_rpl PSEUDOMODULES += conn PSEUDOMODULES += conn_ip PSEUDOMODULES += conn_tcp +PSEUDOMODULES += conn_tcp_freebsd PSEUDOMODULES += conn_udp PSEUDOMODULES += core_% PSEUDOMODULES += emb6_router @@ -56,6 +57,7 @@ PSEUDOMODULES += sock PSEUDOMODULES += sock_ip PSEUDOMODULES += sock_tcp PSEUDOMODULES += sock_udp +PSEUDOMODULES += sock_tcp_freebsd # include variants of the AT86RF2xx drivers as pseudo modules PSEUDOMODULES += at86rf23% diff --git a/sys/auto_init/auto_init.c b/sys/auto_init/auto_init.c index 30fc3be9de85..52eba02e422b 100644 --- a/sys/auto_init/auto_init.c +++ b/sys/auto_init/auto_init.c @@ -68,6 +68,10 @@ #include "net/gnrc/tcp.h" #endif +#ifdef MODULE_GNRC_TCP_FREEBSD +#include "net/gnrc/tcp_freebsd.h" +#endif + #ifdef MODULE_LWIP #include "lwip.h" #endif @@ -136,6 +140,15 @@ void auto_init(void) DEBUG("Auto init TCP module\n"); gnrc_tcp_init(); #endif +#ifdef MODULE_GNRC_TCP_FREEBSD + DEBUG("Auto init FreeBSD TCP module") + gnrc_tcp_freebsd_init(); +#endif +#ifdef MODULE_DHT + DEBUG("Auto init DHT devices.\n"); + extern void dht_auto_init(void); + dht_auto_init(); +#endif #ifdef MODULE_LWIP DEBUG("Bootstraping lwIP.\n"); lwip_bootstrap(); @@ -178,7 +191,7 @@ void auto_init(void) auto_init_enc28j60(); #endif -#ifdef MODULE_ETHOS +#if defined(MODULE_ETHOS) || defined(MODULE_RETHOS) extern void auto_init_ethos(void); auto_init_ethos(); #endif diff --git a/sys/auto_init/netif/auto_init_ethos.c b/sys/auto_init/netif/auto_init_ethos.c index b3fe15fef400..cd671bc82c65 100644 --- a/sys/auto_init/netif/auto_init_ethos.c +++ b/sys/auto_init/netif/auto_init_ethos.c @@ -17,7 +17,7 @@ * @author Kaspar Schleiser */ -#ifdef MODULE_ETHOS +#if defined(MODULE_ETHOS) || defined(MODULE_RETHOS) #include "log.h" #include "debug.h" diff --git a/sys/include/net/conn.h b/sys/include/net/conn.h index 7ea944f3c08f..84df941ea282 100644 --- a/sys/include/net/conn.h +++ b/sys/include/net/conn.h @@ -77,7 +77,11 @@ #define NET_CONN_H #include "net/conn/ip.h" + +#ifdef MODULE_GNRC_CONN_TCP #include "net/conn/tcp.h" +#endif + #include "net/conn/udp.h" #include "net/ipv6/addr.h" diff --git a/sys/include/net/gnrc/conn.h b/sys/include/net/gnrc/conn.h index 478f68c378ca..2078e1f90bc2 100644 --- a/sys/include/net/gnrc/conn.h +++ b/sys/include/net/gnrc/conn.h @@ -24,10 +24,15 @@ #include #include +#include "cib.h" +#include "condition.h" +#include "mutex.h" #include "net/ipv6/addr.h" #include "net/gnrc.h" #include "sched.h" +#include "net/tcp_freebsd.h" + #ifdef __cplusplus extern "C" { #endif diff --git a/sys/include/net/gnrc/ipv6/autoconf_onehop.h b/sys/include/net/gnrc/ipv6/autoconf_onehop.h new file mode 100644 index 000000000000..9157e465340e --- /dev/null +++ b/sys/include/net/gnrc/ipv6/autoconf_onehop.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015 Martine Lenders + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License v2.1. See the file LICENSE in the top level directory for + * more details. + */ + +/** + * @defgroup net_gnrc_ipv6_nc IPv6 neighbor cache + * @ingroup net_gnrc_ipv6 + * @brief Translates IPv6 addresses to link layer addresses. + * @{ + * + * @file + * @brief Neighbor cache definitions. + * + * @author Martine Lenders + */ + +#ifndef GNRC_IPV6_AUTOCONF_ONEHOP_H +#define GNRC_IPV6_AUTOCONF_ONEHOP_H + +#include "net/gnrc/ipv6.h" +#include "net/ipv6/addr.h" +#include "net/gnrc/ipv6/netif.h" + +#ifdef __cplusplus +extern "C" { +#endif + +kernel_pid_t get_6lowpan_pid(void); + +/* ipv6addr should already have the top 64 bits for the prefix set. */ +int gnrc_ipv6_autoconf_l2addr_to_ipv6(ipv6_addr_t* ipv6addr, eui64_t* l2addr); + +void gnrc_ipv6_autoconf_ipv6_to_l2addr(eui64_t* l2addr, ipv6_addr_t* ipv6addr); + +kernel_pid_t gnrc_ipv6_autoconf_next_hop_l2addr(uint8_t *l2addr, uint8_t *l2addr_len, kernel_pid_t iface, ipv6_addr_t *dst); + +#ifdef __cplusplus +} +#endif + +#endif /* GNRC_IPV6_NC_H */ +/** + * @} + */ diff --git a/sys/include/net/gnrc/netdev.h b/sys/include/net/gnrc/netdev.h index b7d6d480c1cd..ba5e1d7c89a9 100644 --- a/sys/include/net/gnrc/netdev.h +++ b/sys/include/net/gnrc/netdev.h @@ -55,6 +55,27 @@ extern "C" { */ #define NETDEV_MSG_TYPE_EVENT 0x1234 +/** + * @brief Type for @ref msg_t if device updates dutycycle operation + */ +#define GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT (0x1235U) +/** + * @brief Type for @ref msg_t if device updates dutycycle operation + */ +#define GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_SND (0x1236U) +/** + * @brief Type for @ref msg_t if device updates dutycycle operation + */ +#define GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE (0x1237U) +/** + * @brief Type for @ref msg_t if a link-layer retransmission or CSMA attempt needs to be performed + */ +#define GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT (0x1238U) +/** + * @brief Type for @ref msg_t if a link-layer retransmission or CSMA attempt needs to be performed + */ +#define GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE (0x1239U) + /** * @brief Mask for @ref gnrc_mac_tx_feedback_t */ @@ -83,6 +104,16 @@ typedef struct gnrc_netdev { */ int (*send)(struct gnrc_netdev *dev, gnrc_pktsnip_t *snip); + int (*send_without_release)(struct gnrc_netdev *dev, gnrc_pktsnip_t *snip, bool set_pending_bit); + int (*resend_without_release)(struct gnrc_netdev *dev, gnrc_pktsnip_t *snip, bool set_pending_bit); + + /** hskim + * @brief Send a beacon using this device + * + * This function should make a beacon for the corresponding link layer type. + */ + int (*send_beacon)(struct gnrc_netdev *dev); + /** * @brief Receive a pktsnip from this device * diff --git a/sys/include/net/gnrc/nettype.h b/sys/include/net/gnrc/nettype.h index 6bc8777b9d47..c6c8925d387f 100644 --- a/sys/include/net/gnrc/nettype.h +++ b/sys/include/net/gnrc/nettype.h @@ -80,6 +80,9 @@ typedef enum { #ifdef MODULE_GNRC_TCP GNRC_NETTYPE_TCP, /**< Protocol is TCP */ #endif +#ifdef MODULE_GNRC_TCP_FREEBSD + GNRC_NETTYPE_TCP, /**< Protocol is TCP */ +#endif #ifdef MODULE_GNRC_UDP GNRC_NETTYPE_UDP, /**< Protocol is UDP */ #endif @@ -183,7 +186,7 @@ static inline gnrc_nettype_t gnrc_nettype_from_protnum(uint8_t num) case PROTNUM_IPV6: return GNRC_NETTYPE_IPV6; #endif -#ifdef MODULE_GNRC_TCP +#if defined(MODULE_GNRC_TCP) || defined(MODULE_GNRC_TCP_FREEBSD) case PROTNUM_TCP: return GNRC_NETTYPE_TCP; #endif diff --git a/sys/include/net/gnrc/tcp_freebsd.h b/sys/include/net/gnrc/tcp_freebsd.h new file mode 100644 index 000000000000..25eb76729f23 --- /dev/null +++ b/sys/include/net/gnrc/tcp_freebsd.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2015 Freie Universität Berlin + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @defgroup net_gnrc_tcp_freebsd TCP_FREEBSD + * @ingroup net_gnrc + * @brief FreeBSD TCP Frontend for GNRC + * + * @{ + * + * @file + * @brief TCP GNRC definition + * + * @author Sam Kumar + * + * This file is largely based on sys/include/net/gnrc/udp.h. + */ + +#ifndef GNRC_TCP_FREEBSD_H_ +#define GNRC_TCP_FREEBSD_H_ + +#include + +#include "byteorder.h" +#include "net/gnrc.h" +//#include "net/udp.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Default message queue size for the TCP thread + */ +#ifndef GNRC_TCP_FREEBSD_MSG_QUEUE_SIZE +#define GNRC_TCP_FREEBSD_MSG_QUEUE_SIZE (8U) +#endif + +/** + * @brief Priority of the TCP thread + */ +#ifndef GNRC_TCP_FREEBSD_PRIO +#define GNRC_TCP_FREEBSD_PRIO (THREAD_PRIORITY_MAIN - 2) +#endif + +/** + * @brief Default stack size to use for the TCP thread + */ +#ifndef GNRC_TCP_FREEBSD_STACK_SIZE +#define GNRC_TCP_FREEBSD_STACK_SIZE (THREAD_STACKSIZE_DEFAULT) +#endif + +/** + * @brief Calculate the TCP checksum for the given packet + * + * @param[in] hdr Pointer to the TCP header + * @param[in] pseudo_hdr Pointer to the network layer header + * + * @return 0 on success + * @return -EBADMSG if @p hdr is not of type GNRC_NETTYPE_TCP + * @return -EFAULT if @p hdr or @p pseudo_hdr is NULL + * @return -ENOENT if gnrc_pktsnip_t::type of @p pseudo_hdr is not known + */ +int gnrc_tcp_calc_csum(const gnrc_pktsnip_t *hdr, const gnrc_pktsnip_t *pseudo_hdr); + +#if 0 +/** + * @brief Allocate and initialize a fresh UDP header in the packet buffer + * + * @param[in] payload Payload contained in the UDP packet + * @param[in] src Source port in host byte order + * @param[in] dst Destination port in host byte order + * + * @return pointer to the newly created (and allocated) header + * @return NULL on `src == NULL`, `dst == NULL`, `src_len != 2`, `dst_len != 2` + * or on allocation error + */ +gnrc_pktsnip_t *gnrc_udp_hdr_build(gnrc_pktsnip_t *payload, uint16_t src, + uint16_t dst); +#endif + +/** + * @brief Initialize and start TCP + * + * @return PID of the TCP thread + * @return negative value on error + */ +int gnrc_tcp_freebsd_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* GNRC_TCP_FREEBSD_H_ */ +/** @} */ diff --git a/sys/include/net/netopt.h b/sys/include/net/netopt.h index 1d224a103b62..4fab0de68530 100644 --- a/sys/include/net/netopt.h +++ b/sys/include/net/netopt.h @@ -240,6 +240,11 @@ typedef enum { */ NETOPT_RF_TESTMODE, + /** + * @brief en/disable radio duty-cycling + */ + NETOPT_DUTYCYCLE, + /* add more options if needed */ /** diff --git a/sys/include/net/sock/tcp_freebsd.h b/sys/include/net/sock/tcp_freebsd.h new file mode 100644 index 000000000000..23c290d57470 --- /dev/null +++ b/sys/include/net/sock/tcp_freebsd.h @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2017 Sam Kumar + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @defgroup net_sock_tcp TCP connections + * @ingroup net_sock + * @brief Connection submodule for TCP connections + * @{ + * + * @file + * @brief TCP connection definitions + * + * @author Sam Kumar + */ +#ifndef NET_SOCK_TCP_FREEBSD_H_ +#define NET_SOCK_TCP_FREEBSD_H_ + +#include +#include + +#include "net/sock.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Forward declaration of @ref sock_tcp_freebsd_t to allow for external definition. + */ +struct sock_tcp_freebsd; + +/** + * @brief Implementation-specific type of a TCP connection object + */ +typedef struct sock_tcp_freebsd sock_tcp_freebsd_t; + +/** + * @brief Creates a new TCP connection object + * + * @param[out] conn Preallocated connection object. Must fill the size of the stack-specific + * connection desriptor. + * @param[in] addr The local network layer address for @p conn. + * @param[in] addr_len The length of @p addr. Must be fitting for the @p family. + * @param[in] family The family of @p addr (see @ref net_af). + * @param[in] port The local TCP port for @p conn. + * + * @return 0 on success. + * @return any other negative number in case of an error. For portability implementations should + * draw inspiration of the errno values from the POSIX' bind() function specification. + */ +int sock_tcp_freebsd_create(sock_tcp_freebsd_t *conn, const void *addr, size_t addr_len, int family, + uint16_t port); + +/** + * @brief Closes a TCP connection + * + * @param[in,out] conn A TCP connection object. + */ +void sock_tcp_freebsd_close(sock_tcp_freebsd_t *conn); + +/** + * @brief Gets the local address of a TCP connection + * + * @param[in] conn A TCP connection object. + * @param[out] addr The local network layer address. Must have space for any address of + * the connection's family. + * @param[out] port The local TCP port. + * + * @return length of @p addr on success. + * @return any other negative number in case of an error. For portability implementations should + * draw inspiration of the errno values from the POSIX' getsockname() function + * specification. + */ +int sock_tcp_freebsd_getlocaladdr(sock_tcp_freebsd_t *conn, void *addr, uint16_t *port); + +/** + * @brief Gets the address of the connected peer of a TCP connection + * + * @param[in] conn A TCP connection object. + * @param[out] addr The network layer address of the connected peer. Must have space for any + * address of the connection's family. + * @param[out] port The TCP port of the connected peer. + * + * @return length of @p addr on success. + * @return any other negative number in case of an error. For portability implementations should + * draw inspiration of the errno values from the POSIX' getpeername() function + * specification. + */ +int sock_tcp_freebsd_getpeeraddr(sock_tcp_freebsd_t *conn, void *addr, uint16_t *port); + +/** + * @brief Connects to a remote TCP peer + * + * @param[in] conn A TCP connection object. + * @param[in] addr The remote network layer address for @p conn. + * @param[in] addr_len Length of @p addr. + * @param[in] port The remote TCP port for @p conn. + * + * @return 0 on success. + * @return any other negative number in case of an error. For portability implementations should + * draw inspiration of the errno values from the POSIX' connect() function specification. + */ +int sock_tcp_freebsd_connect(sock_tcp_freebsd_t *conn, const void *addr, size_t addr_len, uint16_t port); + +/** + * @brief Marks connection to listen for a connection request by a remote TCP peer + * + * @param[in] conn A TCP connection object. + * @param[in] queue_len Maximum length of the queue for connection requests. + * An implementation may choose to silently adapt this value to its needs + * (setting it to a minimum or maximum value). Any negative number must be + * set at least to 0. + * + * @return 0 on success. + * @return any other negative number in case of an error. For portability implementations should + * draw inspiration of the errno values from the POSIX' listen() function specification. + */ +int sock_tcp_freebsd_listen(sock_tcp_freebsd_t *conn, int queue_len); + +/** + * @brief Receives and handles TCP connection requests from other peers + * + * @param[in] conn A TCP connection object. + * @param[out] out_conn A new TCP connection object for the established connection. + * + * @return 0 on success. + * @return any other negative number in case of an error. For portability implementations should + * draw inspiration of the errno values from the POSIX' accept() function specification. + */ +int sock_tcp_freebsd_accept(sock_tcp_freebsd_t *conn, sock_tcp_freebsd_t *out_conn); + +/** + * @brief Receives a TCP message + * + * @param[in] conn A TCP connection object. + * @param[out] data Pointer where the received data should be stored. + * @param[in] max_len Maximum space available at @p data. + * + * @note Function may block. + * + * @return The number of bytes received on success. + * @return 0, if no received data is available, but everything is in order. + * @return any other negative number in case of an error. For portability, implementations should + * draw inspiration of the errno values from the POSIX' recv(), recvfrom(), or recvmsg() + * function specification. + */ +int sock_tcp_freebsd_recv(sock_tcp_freebsd_t *conn, void *data, size_t max_len); + +/** + * @brief Sends a TCP message + * + * @param[in] conn A TCP connection object. + * @param[in] data Pointer where the received data should be stored. + * @param[in] len Maximum space available at @p data. + * + * @note Function may block. + * + * @return The number of bytes send on success. + * @return any other negative number in case of an error. For portability, implementations should + * draw inspiration of the errno values from the POSIX' send(), sendfrom(), or sendmsg() + * function specification. + */ +int sock_tcp_freebsd_send(sock_tcp_freebsd_t *conn, const void *data, size_t len); + +#include "sock_types.h" + +#ifdef __cplusplus +} +#endif + +#endif /* NET_CONN_TCP_H_ */ +/** @} */ diff --git a/sys/include/net/tcp_freebsd.h b/sys/include/net/tcp_freebsd.h new file mode 100644 index 000000000000..83f27b145f7b --- /dev/null +++ b/sys/include/net/tcp_freebsd.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup net_gnrc_tcp_freebsd + * @{ + * + * @file + * @brief External API to TCP Stack + * + * @author Sam Kumar + * + * The code that provides the "raw" event-based API to the TCP stack. + * A cleaner version supporting the BSD Socket API will be implemented + * in the conn module. + * @} + */ + +#ifndef TCP_FREEBSD_H_ +#define TCP_FREEBSD_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "../net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.h" + +typedef struct { + int asockid; + uint8_t* recvbuf; + size_t recvbuflen; + uint8_t* reassbmp; +} acceptArgs_t; + +typedef void (*connectDone_t)(uint8_t, struct sockaddr_in6*, void*); +typedef void (*sendDone_t)(uint8_t, uint32_t, void*); +typedef void (*receiveReady_t)(uint8_t, int, void*); +typedef void (*connectionLost_t)(acceptArgs_t*, uint8_t, void*); +typedef acceptArgs_t (*acceptReady_t)(uint8_t, void*); +typedef bool (*acceptDone_t)(uint8_t, struct sockaddr_in6*, acceptArgs_t*, void*); + +bool gnrc_tcp_freebsd_portisfree(uint16_t port); +int bsdtcp_active_socket(connectDone_t cd, sendDone_t sd, receiveReady_t rr, connectionLost_t cl, void* ctx); +int bsdtcp_passive_socket(acceptReady_t ar, acceptDone_t ad, void* ctx); +int bsdtcp_set_ctx(int fd, void* newctx); +int bsdtcp_bind(int fd, uint16_t port); +int bsdtcp_connect(int fd, struct sockaddr_in6* faddrport, uint8_t* recvbuf, size_t recvbuflen, uint8_t* reassbmp); +int bsdtcp_listen(int fd); +int bsdtcp_send(int fd, struct lbufent* data, int* status); +int bsdtcp_receive(int fd, uint8_t* buffer, size_t length, size_t* numbytes); +int bsdtcp_shutdown(int fd, int how); +int bsdtcp_close(int fd); +int bsdtcp_abort(int fd); +int bsdtcp_isestablished(int fd); +int bsdtcp_hasrcvdfin(int fd); +int bsdtcp_peerinfo(int fd, struct in6_addr** addrptr, uint16_t** portptr); + +#ifdef __cplusplus +} +#endif + +#endif + +/** @} */ diff --git a/sys/include/task_sched.h b/sys/include/task_sched.h new file mode 100644 index 000000000000..4b793da6030e --- /dev/null +++ b/sys/include/task_sched.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup task_sched + * @{ + * + * @file + * @brief TinyOS-style task scheduler + * + * @author Sam Kumar + * + * This module, built on top of the xtimer module, emulates a TinyOS-style task + * scheduler in a single thread. + * + * Tasks in TinyOS execute in an event loop. When a task is "posted", it is + * placed at the back of the event queue, unless that task is already on the + * event queue, in which case nothing happens. + * + * This is slightly different from messages in RIOT's IPC mechanism in that a + * task can only exist in the event queue in one place. This has the advantage + * that the memory needed for the event queue is bounded by the number of + * different tasks. + * @} + */ + +#include +#include +#include + +struct task { + /* All fields are for internal use by the task_sched module. */ + uint64_t _min_exec_time; + uint64_t _req_exec_time; + int _next; + int _prev; +}; + +struct task_sched { + /* These fields must be set before calling start_task_sched. */ + int coalesce_shift; + int64_t max_coalesce_time_delta; + struct task* tasks; + int num_tasks; + char* thread_stack; + size_t thread_stack_size; + char thread_priority; + char* thread_name; + void (*task_handler)(int task); + + /* These fields are for internal use by the task_sched module. */ + kernel_pid_t _pid; + mutex_t _lock; + int _first; + xtimer_t _timer; + bool _in_process_loop; +}; + +kernel_pid_t start_task_sched(struct task_sched* args); + +int sched_task(struct task_sched* sched, int taskid, int64_t delay); +int cancel_task(struct task_sched* sched, int taskid); diff --git a/sys/luid/luid.c b/sys/luid/luid.c index 56ed86cf14b3..6a7ec3d917ca 100644 --- a/sys/luid/luid.c +++ b/sys/luid/luid.c @@ -54,20 +54,20 @@ void luid_base(void *buf, size_t len) memset(buf, LUID_BACKUP_SEED, len); - if ( HAS_FACTORY_BLOCK ) - { - memcpy(buf, fb_eui64, 8); - } - else +#if defined(HAS_FACTORY_BLOCK) + if (HAS_FACTORY_BLOCK) { + memcpy(buf, fb_eui64, 8); + } else +#endif { - #if CPUID_LEN - uint8_t *out = (uint8_t *)buf; - uint8_t cid[CPUID_LEN]; +#if CPUID_LEN + uint8_t *out = (uint8_t *)buf; + uint8_t cid[CPUID_LEN]; - cpuid_get(cid); - for (size_t i = 0; i < CPUID_LEN; i++) { - out[i % len] ^= cid[i]; - } - #endif + cpuid_get(cid); + for (size_t i = 0; i < CPUID_LEN; i++) { + out[i % len] ^= cid[i]; + } +#endif } } diff --git a/sys/net/gnrc/Makefile b/sys/net/gnrc/Makefile index 601ada6ca0d1..d07c93600324 100644 --- a/sys/net/gnrc/Makefile +++ b/sys/net/gnrc/Makefile @@ -7,6 +7,9 @@ endif ifneq (,$(filter gnrc_conn_udp,$(USEMODULE))) DIRS += conn/udp endif +ifneq (,$(filter gnrc_csma_sender,$(USEMODULE))) + DIRS += link_layer/csma_sender +endif ifneq (,$(filter gnrc_icmpv6,$(USEMODULE))) DIRS += network_layer/icmpv6 endif @@ -28,6 +31,9 @@ endif ifneq (,$(filter gnrc_ipv6_nc,$(USEMODULE))) DIRS += network_layer/ipv6/nc endif +ifneq (,$(filter gnrc_ipv6_autoconf_onehop,$(USEMODULE))) + DIRS += network_layer/ipv6/autoconf_onehop +endif ifneq (,$(filter gnrc_ipv6_netif,$(USEMODULE))) DIRS += network_layer/ipv6/netif endif @@ -124,12 +130,22 @@ endif ifneq (,$(filter gnrc_sock_udp,$(USEMODULE))) DIRS += sock/udp endif +ifneq (,$(filter gnrc_sock_tcp_freebsd,$(USEMODULE))) + DIRS += sock/tcp_freebsd + DIRS += sock/tcp_freebsd/zone +endif ifneq (,$(filter gnrc_udp,$(USEMODULE))) DIRS += transport_layer/udp endif ifneq (,$(filter gnrc_tcp,$(USEMODULE))) DIRS += transport_layer/tcp endif +ifneq (,$(filter gnrc_tcp_freebsd,$(USEMODULE))) + DIRS += transport_layer/tcp_freebsd +endif +ifneq (,$(filter gnrc_zep,$(USEMODULE))) + DIRS += application_layer/zep +endif ifneq (,$(filter gnrc_tftp,$(USEMODULE))) DIRS += application_layer/tftp endif diff --git a/sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_leaf.c b/sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_leaf.c new file mode 100644 index 000000000000..4c2e9f007316 --- /dev/null +++ b/sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_leaf.c @@ -0,0 +1,723 @@ +/* + * Copyright (C) 2015 Freie Universität Berlin + * 2015 Kaspar Schleiser + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @{ + * @ingroup net + * @file + * @brief Glue for netdev devices to netapi (duty-cycling protocol for leaf nodes) + * Duty-cycling protocol of Thread network + * + * @author Hyung-Sin Kim + * @} + */ + +#include + + + +#include "msg.h" +#include "thread.h" + +#include "net/gnrc.h" +#include "net/gnrc/nettype.h" +#include "net/netdev.h" + +#include "net/gnrc/netdev.h" +#include "net/ethernet/hdr.h" +#include "random.h" +#include "net/ieee802154.h" +#include + + +#include "xtimer.h" + +#include "send.h" + +#if DUTYCYCLE_EN +#if LEAF_NODE + +#define ENABLE_DEBUG (0) +#include "debug.h" + +#if defined(MODULE_OD) && ENABLE_DEBUG +#include "od.h" +#endif + +#define NETDEV_NETAPI_MSG_QUEUE_SIZE 16 +#define NETDEV_PKT_QUEUE_SIZE 128 + + +static uint8_t sleep_interval_shift = 0; + +static void reset_sleep_interval(void) { + int state = irq_disable(); + sleep_interval_shift = 0; + irq_restore(state); +} +static void backoff_sleep_interval(void) { + int state = irq_disable(); + uint32_t interval = (DUTYCYCLE_SLEEP_INTERVAL_MIN << sleep_interval_shift); + if (interval < DUTYCYCLE_SLEEP_INTERVAL_MAX) { + assert((interval << 1) >= interval); // check for overflow + sleep_interval_shift++; + } + irq_restore(state); +} +static uint32_t get_sleep_interval(void) { + uint32_t interval = (DUTYCYCLE_SLEEP_INTERVAL_MIN << sleep_interval_shift); + if (interval > DUTYCYCLE_SLEEP_INTERVAL_MAX) { + interval = DUTYCYCLE_SLEEP_INTERVAL_MAX; + } + return interval; +} + +static void _pass_on_packet(gnrc_pktsnip_t *pkt); + +/** 1) For a leaf node (battery-powered), 'dutycycling' is set to NETOPT_ENABLE by application + * 2) For a router (wall-powered), 'dutycycling' remains to NETOPT_DISABLE + */ +netopt_enable_t dutycycling = NETOPT_DISABLE; + +/* Dutycycle state (INIT, SLEEP, TXBEACON, TXDATA, and LISTEN) */ +typedef enum { + DUTY_INIT, + DUTY_SLEEP, + DUTY_TX_BEACON, + DUTY_TX_DATA, + DUTY_TX_DATA_BEFORE_BEACON, + DUTY_LISTEN, +} dutycycle_state_t; +dutycycle_state_t dutycycle_state = DUTY_INIT; + +/** 1) For a leaf node, 'timer' is used for wake-up scheduling + * 2) For a router, 'timer' is used for broadcasting; + * a router does not discard a broadcasting packet during a sleep interval + */ +xtimer_t timer; +uint8_t pending_num = 0; + +kernel_pid_t dutymac_netdev_pid; + +/* A packet can be sent only when radio_busy = 0 */ +bool radio_busy = false; + +/* This is the packet being sent by the radio now */ +gnrc_pktsnip_t *current_pkt; + +bool additional_wakeup = false; + +bool retry_rexmit = false; +void send_packet(gnrc_pktsnip_t* pkt, gnrc_netdev_t* gnrc_dutymac_netdev, bool retransmission) { + retry_rexmit = retransmission; + msg_t msg; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT; + msg.content.ptr = pkt; + if (msg_send(&msg, dutymac_netdev_pid) <= 0) { + assert(false); + } +} + +bool sending_beacon = false; + +void send_packet_csma(gnrc_pktsnip_t* pkt, gnrc_netdev_t* gnrc_dutymac_netdev, bool retransmission) { + send_with_csma(pkt, send_packet, gnrc_dutymac_netdev, retransmission, sending_beacon); +} + +// FIFO QUEUE +int msg_queue_add(msg_t* msg_queue, msg_t* msg) { + if (pending_num < NETDEV_PKT_QUEUE_SIZE) { + /* Add a packet to the last entry of the queue */ + msg_queue[pending_num].sender_pid = msg->sender_pid; + msg_queue[pending_num].type = msg->type; + msg_queue[pending_num].content.ptr = msg->content.ptr; + DEBUG("\nqueue add success [%u/%u/%4x]\n", pending_num, msg_queue[pending_num].sender_pid, + msg_queue[pending_num].type); + pending_num++; /* Number of packets in the queue */ + return 1; + } else { + DEBUG("Queue loss at netdev\n"); + return 0; + } +} + +void msg_queue_remove_head(msg_t* msg_queue) { + /* Remove queue head */ + DEBUG("remove queue [%u]\n", pending_num-1); + gnrc_pktbuf_release(msg_queue[0].content.ptr); + pending_num--; + if (pending_num < 0) { + DEBUG("NETDEV: Pending number error\n"); + } + + /* Update queue when more pending packets exist */ + if (pending_num) { + for (int i=0; ipid); + break; + case DUTY_LISTEN: + if (pending_num > 0) { + xtimer_set(&timer, get_sleep_interval()); + dutycycle_state = DUTY_TX_DATA; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } else { + dutycycle_state = DUTY_SLEEP; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + break; + case DUTY_TX_DATA: /* Sleep ends while transmitting data: just state change */ + dutycycle_state = DUTY_TX_DATA_BEFORE_BEACON; + break; + default: + break; + } +} + +bool irq_pending = false; + +/** + * @brief Function called by the device driver on device events + * + * @param[in] event type of event + */ +static void _event_cb(netdev_t *dev, netdev_event_t event) +{ + gnrc_netdev_t* gnrc_dutymac_netdev = (gnrc_netdev_t*)dev->context; + if (event == NETDEV_EVENT_ISR) { + irq_pending = true; + msg_t msg; + msg.type = NETDEV_MSG_TYPE_EVENT; + msg.content.ptr = gnrc_dutymac_netdev; + if (msg_send(&msg, gnrc_dutymac_netdev->pid) <= 0) { + puts("gnrc_netdev: possibly lost interrupt."); + } + } + else if (event == NETDEV_EVENT_RX_PENDING) { + additional_wakeup = true; + } + else { + DEBUG("gnrc_netdev: event triggered -> %i\n", event); + bool will_retry; + switch(event) { + case NETDEV_EVENT_RX_COMPLETE: + { + /* Packet decoding */ + gnrc_pktsnip_t *pkt = gnrc_dutymac_netdev->recv(gnrc_dutymac_netdev); + + int state = irq_disable(); + xtimer_remove(&timer); + + msg_t msg; + + if (additional_wakeup) { /* LISTEN for a while for further packet reception */ + //printf("Listen after reception...\n"); + dutycycle_state = DUTY_LISTEN; + additional_wakeup = false; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + } else if (pending_num == 0) { /* SLEEP now */ + //printf("Sleep after reception...\n"); + dutycycle_state = DUTY_SLEEP; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + } else { + //printf("Send data after reception...\n"); + xtimer_set(&timer, get_sleep_interval()); + dutycycle_state = DUTY_TX_DATA; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE; + } + + msg_send(&msg, gnrc_dutymac_netdev->pid); + + irq_restore(state); + + if (pkt) { + _pass_on_packet(pkt); + } + break; + } + case NETDEV_EVENT_TX_COMPLETE_PENDING: /* Response for Data Request packet*/ + { +#ifdef MODULE_NETSTATS_L2 + dev->stats.tx_success++; +#endif + csma_send_succeeded(); + retry_send_succeeded(); + + //printf("sent, with pending\n"); + + radio_busy = false; + + /* There will be data in this sleep interval. */ + reset_sleep_interval(); + + if (dutycycle_state != DUTY_INIT) { + /* Dutycycle_state must be DUTY_TX_BEACON */ + if (dutycycle_state != DUTY_TX_BEACON) { + DEBUG("gnrc_netdev: SOMETHING IS WRONG\n"); + } + /* LISTEN for a while for packet reception */ + //printf("remove timer 2\n"); + xtimer_remove(&timer); + //printf("Listening after beacon...\n"); + dutycycle_state = DUTY_LISTEN; + msg_t msg; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + break; + } + case NETDEV_EVENT_TX_COMPLETE: + { +#ifdef MODULE_NETSTATS_L2 + dev->stats.tx_success++; +#endif + csma_send_succeeded(); + retry_send_succeeded(); + + radio_busy = false; /* radio is free now */ + + if (dutycycle_state != DUTY_INIT) { + msg_t msg; + if (dutycycle_state == DUTY_TX_BEACON) { /* Sleep again */ + //printf("remove timer 3\n"); + xtimer_remove(&timer); + + /* No data in this interval... */ + backoff_sleep_interval(); + + dutycycle_state = DUTY_SLEEP; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } else if (pending_num > 0) { /* Remove the packet from the queue */ + /* We just sent a data-containing packet. */ + reset_sleep_interval(); + + if (dutycycle_state != DUTY_TX_DATA) { + assert(dutycycle_state != DUTY_SLEEP); + //printf("remove timer 4\n"); + xtimer_remove(&timer); + } + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } else if (dutycycle_state == DUTY_TX_DATA) { + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + } + break; + } + case NETDEV_EVENT_TX_MEDIUM_BUSY: +#ifdef MODULE_NETSTATS_L2 + dev->stats.tx_failed++; +#endif + will_retry = csma_send_failed(); + if (will_retry) { + break; + } + /* fallthrough intentional */ + case NETDEV_EVENT_TX_NOACK: + if (event == NETDEV_EVENT_TX_NOACK) { + /* CSMA succeeded... */ + csma_send_succeeded(); + } + /* ... but the retry failed. */ + will_retry = retry_send_failed(); + if (will_retry) { + break; + } + + radio_busy = false; + { +#ifdef MODULE_NETSTATS_L2 + dev->stats.tx_failed++; +#endif + if (dutycycle_state != DUTY_INIT) { + msg_t msg; + if (dutycycle_state == DUTY_TX_BEACON) { /* Sleep again */ + //printf("remove timer 5\n"); + xtimer_remove(&timer); + dutycycle_state = DUTY_SLEEP; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } else if (pending_num > 0) { /* Remove the packet from the queue */ + if (dutycycle_state != DUTY_TX_DATA) { + assert(dutycycle_state != DUTY_SLEEP); + //printf("remove timer 6\n"); + xtimer_remove(&timer); + } + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } else if (dutycycle_state == DUTY_TX_DATA) { + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + } + break; + } + default: + DEBUG("gnrc_netdev: warning: unhandled event %u.\n", event); + } + } +} + +static bool is_receiving(netdev_t* dev) { + netopt_state_t state; + int rv = dev->driver->get(dev, NETOPT_STATE, &state, sizeof(state)); + if (rv != sizeof(state)) { + assert(false); + } + return state == NETOPT_STATE_RX; +} + +static void _pass_on_packet(gnrc_pktsnip_t *pkt) +{ + /* throw away packet if no one is interested */ + if (!gnrc_netapi_dispatch_receive(pkt->type, GNRC_NETREG_DEMUX_CTX_ALL, pkt)) { + DEBUG("gnrc_netdev: unable to forward packet of type %i\n", pkt->type); + gnrc_pktbuf_release(pkt); + return; + } +} + +bool beacon_pending = false; +static void send_beacon_safely(gnrc_netdev_t* gnrc_dutymac_netdev) { + if (radio_busy || irq_pending || is_receiving(gnrc_dutymac_netdev->dev)) { + beacon_pending = true; + } else { + //printf("sending beacon...\n"); + radio_busy = true; + sending_beacon = true; + send_with_retries(NULL, -1, send_packet_csma, gnrc_dutymac_netdev, false); + } +} + +msg_t pkt_queue[NETDEV_PKT_QUEUE_SIZE]; + +/** + * @brief Startup code and event loop of the gnrc_netdev layer + * + * @param[in] args expects a pointer to the underlying netdev device + * + + * @return never returns + */ +static void *_gnrc_netdev_duty_thread(void *args) +{ + DEBUG("gnrc_netdev: starting thread\n"); + + gnrc_netdev_t* gnrc_dutymac_netdev = (gnrc_netdev_t*) args; + netdev_t *dev = gnrc_dutymac_netdev->dev; + gnrc_dutymac_netdev->pid = thread_getpid(); + dutymac_netdev_pid = gnrc_dutymac_netdev->pid; + + timer.callback = dutycycle_cb; + timer.arg = (void*) gnrc_dutymac_netdev; + netopt_state_t sleepstate; + uint16_t src_len = IEEE802154_SHORT_ADDRESS_LEN; + + gnrc_netapi_opt_t *opt; + int res; + + /* setup the MAC layers message queue (general purpose) */ + msg_t msg, reply, msg_queue[NETDEV_NETAPI_MSG_QUEUE_SIZE]; + msg_init_queue(msg_queue, NETDEV_NETAPI_MSG_QUEUE_SIZE); + + /* setup the MAC layers packet queue (only for packet transmission) */ + for (int i=0; ievent_callback = _event_cb; + dev->context = (void*) gnrc_dutymac_netdev; + + /* register the device to the network stack*/ + gnrc_netif_add(thread_getpid()); + + /* initialize low-level driver */ + dev->driver->init(dev); + + /* start the event loop */ + while (1) { + DEBUG("gnrc_netdev: waiting for incoming messages\n"); + msg_receive(&msg); + + /* dispatch NETDEV and NETAPI messages */ + switch (msg.type) { + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT: + /* radio dutycycling control */ + DEBUG("gnrc_netdev: GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT received\n"); + if (dutycycling == NETOPT_ENABLE) { + switch(dutycycle_state) { + case DUTY_INIT: /* Start dutycycling from sleep state */ + dutycycling = NETOPT_ENABLE; + dutycycle_state = DUTY_SLEEP; + sleepstate = NETOPT_STATE_SLEEP; + dev->driver->set(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + dev->driver->set(dev, NETOPT_SRC_LEN, &src_len, sizeof(src_len)); + xtimer_set(&timer,random_uint32_range(0, DUTYCYCLE_SLEEP_INTERVAL_MAX)); + DEBUG("gnrc_netdev: INIT DUTYCYCLE\n"); + break; + case DUTY_TX_BEACON: /* Tx a beacon after wake-up */ + //printf("remove timer 7\n"); + xtimer_remove(&timer); + send_beacon_safely(gnrc_dutymac_netdev); + DEBUG("gnrc_netdev: SEND BEACON\n"); + break; + case DUTY_TX_DATA: /* After Tx all data packets */ + // Timer is running in this state... when it expires we move to DUTY_TX_DATA_BEFORE_BEACON + dutycycle_state = DUTY_SLEEP; + sleepstate = NETOPT_STATE_SLEEP; + dev->driver->set(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + DEBUG("gnrc_netdev: RADIO OFF\n\n"); + break; + case DUTY_TX_DATA_BEFORE_BEACON: + //printf("remove timer 8\n"); + xtimer_remove(&timer); + if (!radio_busy && !irq_pending && !is_receiving(dev)) { + msg_queue_send(pkt_queue, gnrc_dutymac_netdev); + } + DEBUG("gnrc_netdev: SEND DATA BEFORE BEACON\n"); + break; + case DUTY_LISTEN: /* Idle listening after transmission or reception */ + //radio_busy = false; + dev->driver->get(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + sleepstate = NETOPT_STATE_IDLE; + dev->driver->set(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + //printf("wakeup timer set\n"); + xtimer_set(&timer, DUTYCYCLE_WAKEUP_INTERVAL); + DEBUG("gnrc_netdev: RADIO REMAINS ON\n"); + break; + case DUTY_SLEEP: /* Go to sleep */ + //gpio_write(GPIO_PIN(0,19),0); + sleepstate = NETOPT_STATE_SLEEP; + dev->driver->set(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + xtimer_set(&timer, get_sleep_interval()); + DEBUG("gnrc_netdev: RADIO OFF\n\n"); + break; + default: + break; + } + } else { + /* somthing is wrong */ + DEBUG("gnrc_netdev: SOMETHING IS WRONG\n"); + } + break; + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE: + /* Remove a packet from the packet queue */ + msg_queue_remove_head(pkt_queue); + /* Send a packet in the packet queue */ + if (pending_num) { + if (!radio_busy && !irq_pending && !is_receiving(dev)) { + /* Send any packet */ + msg_queue_send(pkt_queue, gnrc_dutymac_netdev); + } + } else { + if (dutycycle_state == DUTY_TX_DATA_BEFORE_BEACON) { + dutycycle_state = DUTY_TX_BEACON; + send_beacon_safely(gnrc_dutymac_netdev); + DEBUG("gnrc_netdev: SEND BEACON AFTER DATA\n"); + } else if (dutycycle_state == DUTY_TX_DATA) { + /*msg_t nmsg; + nmsg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_EVENT; + msg_send_to_self(&nmsg);*/ + //printf("Emptied send queue, going to sleep...\n"); + dutycycle_state = DUTY_SLEEP; + sleepstate = NETOPT_STATE_SLEEP; + dev->driver->set(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + DEBUG("gnrc_netdev: RADIO OFF\n\n"); + } + } + break; + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE: + if (dutycycle_state != DUTY_LISTEN && pending_num != 0 && !radio_busy && !irq_pending && !is_receiving(dev)) { + if (dutycycle_state == DUTY_SLEEP) { + dutycycle_state = DUTY_TX_DATA; + } + msg_queue_send(pkt_queue, gnrc_dutymac_netdev); + } + break; + case NETDEV_MSG_TYPE_EVENT: + DEBUG("gnrc_netdev: GNRC_NETDEV_MSG_TYPE_EVENT received\n"); + irq_pending = false; + dev->driver->isr(dev); + if (beacon_pending && !radio_busy) { + //printf("sending pending beacon\n"); + beacon_pending = false; + radio_busy = true; + sending_beacon = true; + send_with_retries(NULL, -1, send_packet_csma, gnrc_dutymac_netdev, false); + } + { + msg_t nmsg; + nmsg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE; + msg_send_to_self(&nmsg); + } + break; + case GNRC_NETAPI_MSG_TYPE_SND: + DEBUG("gnrc_netdev: GNRC_NETAPI_MSG_TYPE_SND received\n"); + /* Queue it no matter what. */ + msg_queue_add(pkt_queue, &msg); + if (dutycycle_state == DUTY_INIT) { + msg_queue_send(pkt_queue, gnrc_dutymac_netdev); + DEBUG("gnrc_netdev: SENDING IMMEDIATELY %lu\n"); + } else { + if (/*_xtimer_usec_from_ticks(timer.target - xtimer_now().ticks32) < 50000 ||*/ + pending_num > 1 || radio_busy) { + DEBUG("gnrc_netdev: QUEUEING %lu\n", _xtimer_usec_from_ticks(timer.target - xtimer_now().ticks32)); + } else if (!radio_busy && !irq_pending && !is_receiving(dev) && dutycycle_state == DUTY_SLEEP){ + /* Send a packet now */ + dutycycle_state = DUTY_TX_DATA; + msg_queue_send(pkt_queue, gnrc_dutymac_netdev); + DEBUG("gnrc_netdev: SENDING IMMEDIATELY %lu\n", _xtimer_usec_from_ticks(timer.target - xtimer_now().ticks32)); + } + } + break; + case GNRC_NETAPI_MSG_TYPE_SET: + /* read incoming options */ + opt = msg.content.ptr; + DEBUG("gnrc_netdev: GNRC_NETAPI_MSG_TYPE_SET received. opt=%s\n", + netopt2str(opt->opt)); + if (opt->opt == NETOPT_DUTYCYCLE) { + dutycycling = *(netopt_enable_t*) opt->data; + //printf("remove timer 9\n"); + xtimer_remove(&timer); + if (dutycycling == NETOPT_ENABLE) { + /* Dutycycle start triggered by application layer */ + dutycycle_state = DUTY_SLEEP; + sleepstate = NETOPT_STATE_SLEEP; + xtimer_set(&timer, random_uint32_range(0, DUTYCYCLE_SLEEP_INTERVAL_MAX)); + DEBUG("gnrc_netdev: INIT DUTYCYCLE\n"); + } else { + /* Dutycycle end triggered by application layer */ + dutycycle_state = DUTY_INIT; + sleepstate = NETOPT_STATE_SLEEP; + } + /* We use short address for duty-cycling */ + dev->driver->set(dev, NETOPT_SRC_LEN, &src_len, sizeof(src_len)); + opt->opt = NETOPT_STATE; + opt->data = &sleepstate; + } + /* set option for device driver */ + res = dev->driver->set(dev, opt->opt, opt->data, opt->data_len); + DEBUG("gnrc_netdev: response of netdev->set: %i\n", res); + /* send reply to calling thread */ + reply.type = GNRC_NETAPI_MSG_TYPE_ACK; + reply.content.value = (uint32_t)res; + msg_reply(&msg, &reply); + break; + case GNRC_NETAPI_MSG_TYPE_GET: + /* read incoming options */ + opt = msg.content.ptr; + DEBUG("gnrc_netdev: GNRC_NETAPI_MSG_TYPE_GET received. opt=%s\n", + netopt2str(opt->opt)); + /* get option from device driver */ + res = dev->driver->get(dev, opt->opt, opt->data, opt->data_len); + DEBUG("gnrc_netdev: response of netdev->get: %i\n", res); + /* send reply to calling thread */ + reply.type = GNRC_NETAPI_MSG_TYPE_ACK; + reply.content.value = (uint32_t)res; + msg_reply(&msg, &reply); + break; + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT: + if (!irq_pending && !is_receiving(dev)) { + if (sending_beacon) { + res = gnrc_dutymac_netdev->send_beacon(gnrc_dutymac_netdev); + } else { + if (retry_rexmit) { + res = gnrc_dutymac_netdev->resend_without_release(gnrc_dutymac_netdev, msg.content.ptr, false); + } else { + res = gnrc_dutymac_netdev->send_without_release(gnrc_dutymac_netdev, msg.content.ptr, false); + } + } + if (res < 0) { + _event_cb(dev, NETDEV_EVENT_TX_MEDIUM_BUSY); + } + } else { + msg_t nmsg; + nmsg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT; + nmsg.content.ptr = msg.content.ptr; + msg_send_to_self(&nmsg); + } + default: + DEBUG("gnrc_netdev: Unknown command %" PRIu16 "\n", msg.type); + break; + } + } + /* never reached */ + return NULL; +} + + +kernel_pid_t gnrc_netdev_dutymac_init(char *stack, int stacksize, char priority, + const char *name, gnrc_netdev_t *gnrc_netdev) +{ + + kernel_pid_t res; + + retry_init(); + csma_init(); + + /* check if given netdev device is defined and the driver is set */ + if (gnrc_netdev == NULL || gnrc_netdev->dev == NULL) { + return -ENODEV; + } + + /* create new gnrc_netdev thread */ + res = thread_create(stack, stacksize, priority, THREAD_CREATE_STACKTEST, + _gnrc_netdev_duty_thread, (void *)gnrc_netdev, name); + + if (res <= 0) { + return -EINVAL; + } + + return res; +} +#endif +#endif diff --git a/sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_router.c b/sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_router.c new file mode 100644 index 000000000000..cd4b288b9d18 --- /dev/null +++ b/sys/net/gnrc/link_layer/dutymac/gnrc_netdev2_duty_router.c @@ -0,0 +1,640 @@ +/* + * Copyright (C) 2015 Freie Universität Berlin + * 2015 Kaspar Schleiser + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @{ + * @ingroup net + * @file + * @brief Glue for netdev devices to netapi (duty-cycling protocol for routers) + * + * @author Hyung-Sin Kim + * @} + */ + +#include + + + +#include "msg.h" +#include "thread.h" + +#include "net/gnrc.h" +#include "net/gnrc/nettype.h" +#include "net/netdev.h" + +#include "net/gnrc/netdev.h" +#include "net/ethernet/hdr.h" +#include "random.h" +#include "net/ieee802154.h" +#include "xtimer.h" + +#include "send.h" + +#if DUTYCYCLE_EN +#if ROUTER + +#define ENABLE_DEBUG (0) +#include "debug.h" + +#if defined(MODULE_OD) && ENABLE_DEBUG +#include "od.h" +#endif + +#define ENABLE_BROADCAST_QUEUEING 0 + +#define NETDEV_NETAPI_MSG_QUEUE_SIZE 8 +#define NETDEV_PKT_QUEUE_SIZE 64 + +#define NEIGHBOR_TABLE_SIZE 10 +typedef struct { + uint16_t addr; + uint16_t dutycycle; + int8_t rssi; + uint8_t lqi; + uint8_t etx; +} link_neighbor_table_t; +link_neighbor_table_t neighbor_table[NEIGHBOR_TABLE_SIZE]; +uint8_t neighbor_num = 0; + +static void _pass_on_packet(gnrc_pktsnip_t *pkt); + +/** 1) For a leaf node, 'timer' is used for wake-up scheduling + * 2) For a router, 'timer' is used for broadcasting; + * a router does not discard a broadcasting packet during a sleep interval + */ +xtimer_t timer; +bool broadcasting = false; +uint8_t pending_num = 0; +uint8_t broadcasting_num = 0; +uint8_t sending_pkt_key = 0xFF; +/** [This is for bursty transmission.] + * After a router sends a packet, if it has another packet to send to the same destination + * (=recent_dst_l2addr), it does not have to wait for another sleep interval but sends immediately + * To this end, a leaf node wakes up for a while after transmitting or receiving a packet. + */ +uint16_t recent_dst_l2addr = 0; + +/* A packet can be sent only when radio_busy = 0 */ +bool radio_busy = false; + +/* This is the packet being sent by the radio now */ + + +/* Rx data request command from a leaf node: I can send data to the leaf node */ +bool rx_data_request = false; + +kernel_pid_t dutymac_netdev_pid; + +/* TODO this should take a MAC address and return whether that is a duty-cycled + * node sending beacons to this router. For now, just hardcode to true or false. + */ +static bool addr_is_dutycycled(uint16_t addr) { + (void) addr; + return false; +} + +bool retry_rexmit = false; +void send_packet(gnrc_pktsnip_t* pkt, gnrc_netdev_t* gnrc_dutymac_netdev, bool retransmission) { + retry_rexmit = retransmission; + msg_t msg; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT; + msg.content.ptr = pkt; + if (msg_send(&msg, dutymac_netdev_pid) <= 0) { + assert(false); + } +} + +void send_packet_csma(gnrc_pktsnip_t* pkt, gnrc_netdev_t* gnrc_dutymac_netdev, bool retransmission) { + send_with_csma(pkt, send_packet, gnrc_dutymac_netdev, retransmission, false); +} + +// Exhaustive search version +int msg_queue_add(msg_t* msg_queue, msg_t* msg, gnrc_netdev_t* gnrc_dutymac_netdev) { + if (pending_num < NETDEV_PKT_QUEUE_SIZE) { + gnrc_pktsnip_t *pkt = msg->content.ptr; + gnrc_netif_hdr_t* hdr = pkt->data; + + // 1) Broadcasting packet (Insert head of the queue) + if (hdr->flags & (GNRC_NETIF_HDR_FLAGS_BROADCAST | GNRC_NETIF_HDR_FLAGS_MULTICAST)) { +#if ENABLE_BROADCAST_QUEUEING + (void) gnrc_dutymac_netdev; + if (broadcasting_num < pending_num) { + for (int i=pending_num-1; i>= broadcasting_num; i--) { + msg_queue[i+1].sender_pid = msg_queue[i].sender_pid; + msg_queue[i+1].type = msg_queue[i].type; + msg_queue[i+1].content.ptr = msg_queue[i].content.ptr; + } + } + msg_queue[broadcasting_num].sender_pid = msg->sender_pid; + msg_queue[broadcasting_num].type = msg->type; + msg_queue[broadcasting_num].content.ptr = msg->content.ptr; + + /** When it is the first and broadcasting packet and the nodes is a router, + * MAC maintains the packet for a sleep interval to send it to all neighbors + */ + if (broadcasting_num == 0) { + xtimer_set(&timer, DUTYCYCLE_SLEEP_INTERVAL+100); + broadcasting = true; + sending_pkt_key = 0; + printf("broadcast starts\n"); + } + broadcasting_num++; +#else + /* Send it right away. */ + if (!radio_busy) { + radio_busy = true; + msg_queue[pending_num].sender_pid = msg->sender_pid; + msg_queue[pending_num].type = msg->type; + msg_queue[pending_num].content.ptr = msg->content.ptr; + sending_pkt_key = pending_num; + pending_num++; + send_with_retries(pkt, 0, send_packet_csma, gnrc_dutymac_netdev, false); + return 1; + } + return 0; +#endif + } + // 2) Unicasting packet + else { + /* Add a packet to the last entry of the queue */ + msg_queue[pending_num].sender_pid = msg->sender_pid; + msg_queue[pending_num].type = msg->type; + msg_queue[pending_num].content.ptr = msg->content.ptr; + DEBUG("\nqueue add success [%u/%u/%4x]\n", pending_num, msg_queue[pending_num].sender_pid, + msg_queue[pending_num].type); + } + pending_num++; /* Number of packets in the queue */ + return 1; + } else { + DEBUG("Queue loss at netdev\n"); + return 0; + } +} + +void msg_queue_remove(msg_t* msg_queue) { + /* Remove a sent packet from MAC queue */ + if (sending_pkt_key == 0xFF) + return; + + DEBUG("NETDEV: Remove queue [%u, %u/%u]\n", sending_pkt_key, broadcasting_num, pending_num-1); + + gnrc_pktbuf_release(msg_queue[sending_pkt_key].content.ptr); + pending_num--; + if (pending_num < 0) { + DEBUG("NETDEV: Pending number error\n"); + } + + /* Update queue when more pending packets exist */ + if (pending_num) { + for (int i=sending_pkt_key; i 0) { + xtimer_set(&timer, DUTYCYCLE_SLEEP_INTERVAL+100); + broadcasting = true; + sending_pkt_key = 0; + printf("broadcast starts\n"); + return; + } + } + sending_pkt_key = 0xFF; + return; +} + +/* If to_dutycycled_dest is true, then we know that a dutycycled node is listening and + * are trying to find packets destined for that node. + * If to_dutycycled_dest is false, then we are looking for packets destined for a + * neighboring always-on node. + */ +void msg_queue_send(msg_t* msg_queue, bool to_dutycycled_dest, uint16_t dst_l2addr, gnrc_netdev_t* gnrc_dutymac_netdev) { + gnrc_pktsnip_t *pkt = NULL; + + if (broadcasting) { // broadcasting + pkt = msg_queue[0].content.ptr; + sending_pkt_key = 0; + recent_dst_l2addr = 0xFFFF; + + } else { // unicasting + gnrc_pktsnip_t *temp_pkt; + gnrc_netif_hdr_t *temp_hdr; + uint16_t pkt_dst_l2addr; + uint8_t* dst; + for (int i=0; idata; + dst = gnrc_netif_hdr_get_dst_addr(temp_hdr); + if (temp_hdr->dst_l2addr_len == IEEE802154_SHORT_ADDRESS_LEN) { + pkt_dst_l2addr = (((uint16_t) dst[1]) << 8) | (uint16_t) dst[0]; + } else { + pkt_dst_l2addr = (((uint16_t) dst[7]) << 8) | (uint16_t) dst[6]; + } + + if ((to_dutycycled_dest && pkt_dst_l2addr == dst_l2addr) || (!to_dutycycled_dest && !addr_is_dutycycled(pkt_dst_l2addr))) { + pkt = msg_queue[i].content.ptr; + recent_dst_l2addr = pkt_dst_l2addr; + sending_pkt_key = i; + break; + } + } + } + + assert(!radio_busy); + + if (pkt != NULL && sending_pkt_key != 0xFF) { + //printf("sending %u to %4x (%u/%u)\n", sending_pkt_key, recent_dst_l2addr, broadcasting_num, pending_num); + + radio_busy = true; /* radio is now busy */ + //send_packet(pkt, gnrc_dutymac_netdev); + //send_with_retries(pkt, send_packet, gnrc_dutymac_netdev, false); + send_with_retries(pkt, -1, send_packet_csma, gnrc_dutymac_netdev, false); + } +} + +/** + * @brief Function called by the broadcast timer + * + * @param[in] event type of event + */ +void broadcast_cb(void* arg) { + gnrc_netdev_t* gnrc_dutymac_netdev = (gnrc_netdev_t*) arg; + msg_t msg; + /* Broadcasting msg maintenance for routers */ + broadcasting = false; + broadcasting_num--; + printf("braodcast ends\n"); + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE; + msg_send(&msg, gnrc_dutymac_netdev->pid); +} + +void neighbor_table_update(uint16_t l2addr, gnrc_netif_hdr_t *hdr) { + uint8_t key = 0xFF; + for (int8_t i=0; irssi; /* when using AT86RF233 transceiver*/ + neighbor_table[neighbor_num].lqi = hdr->lqi; + neighbor_table[neighbor_num].dutycycle = 1; + neighbor_num++; + } else { + neighbor_table[key].rssi = (8*neighbor_table[key].rssi + 2*(-94+3*hdr->rssi))/10; /* when using AT86RF233 transceiver*/ + neighbor_table[key].lqi = (8*neighbor_table[key].lqi + 2*hdr->lqi)/10; + } + //printf("neighbor: addr %4x, rssi %d, lqi %u\n", neighbor_table[key].addr, neighbor_table[key].rssi, neighbor_table[key].lqi); +} + +static bool is_receiving(netdev_t* dev) { + netopt_state_t state; + int rv = dev->driver->get(dev, NETOPT_STATE, &state, sizeof(state)); + if (rv != sizeof(state)) { + assert(false); + } + return state == NETOPT_STATE_RX; +} + +uint16_t global_src_l2addr; +bool irq_pending = false; +/** + * @brief Function called by the device driver on device events + * + * @param[in] event type of event + */ +static void _event_cb(netdev_t *dev, netdev_event_t event) +{ + gnrc_netdev_t* gnrc_dutymac_netdev = (gnrc_netdev_t*)dev->context; + if (event == NETDEV_EVENT_ISR) { + irq_pending = true; + msg_t msg; + msg.type = NETDEV_MSG_TYPE_EVENT; + msg.content.ptr = gnrc_dutymac_netdev; + if (msg_send(&msg, gnrc_dutymac_netdev->pid) <= 0) { + puts("gnrc_netdev: possibly lost interrupt."); + } + } + else if (event == NETDEV_EVENT_RX_DATAREQ) { + rx_data_request = true; + } + else { + DEBUG("gnrc_netdev: event triggered -> %i\n", event); + bool will_retry; + switch(event) { + case NETDEV_EVENT_RX_COMPLETE: + { + gnrc_pktsnip_t *pkt = gnrc_dutymac_netdev->recv(gnrc_dutymac_netdev); + + /* Extract src addr and update neighbor table */ + gnrc_pktsnip_t *temp_pkt = pkt; + while (temp_pkt->next) { temp_pkt = temp_pkt->next; } + gnrc_netif_hdr_t *hdr = temp_pkt->data; + uint8_t* src_addr = gnrc_netif_hdr_get_src_addr(hdr); + uint16_t src_l2addr = 0; + if (hdr->src_l2addr_len == IEEE802154_SHORT_ADDRESS_LEN) { + src_l2addr = (((uint16_t) src_addr[1]) << 8) | (uint16_t) src_addr[0]; + } else { + src_l2addr = (((uint16_t) src_addr[7]) << 8) | (uint16_t) src_addr[6]; + } + neighbor_table_update(src_l2addr, hdr); + + global_src_l2addr = src_l2addr; + + /* Send packets when receiving a data req from a leaf node */ + if (rx_data_request && pending_num) { + msg_t msg; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_SND; + msg.content.ptr = &global_src_l2addr; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + rx_data_request = false; + + if (pkt) { + _pass_on_packet(pkt); + } + break; + } + case NETDEV_EVENT_TX_COMPLETE: +#ifdef MODULE_NETSTATS_L2 + dev->stats.tx_success++; +#endif + csma_send_succeeded(); + retry_send_succeeded(); + radio_busy = false; /* radio is free now */ + /* Remove only unicasting packets, broadcasting packets are removed by timer expires */ + if (broadcasting) { + recent_dst_l2addr = 0xffff; + } else { + msg_t msg; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + break; + case NETDEV_EVENT_TX_MEDIUM_BUSY: +#ifdef MODULE_NETSTATS_L2 + dev->stats.tx_failed++; +#endif + will_retry = csma_send_failed(); + if (will_retry) { + break; + } + /* Fallthrough intentional */ + case NETDEV_EVENT_TX_NOACK: + if (event == NETDEV_EVENT_TX_NOACK) { + /* CSMA succeeded... */ + csma_send_succeeded(); + } + /* ... but the retry failed. */ + will_retry = retry_send_failed(); + if (will_retry) { + break; + } + + radio_busy = false; /* radio is free now */ + /* Remove only unicasting packets, broadcasting packets are removed by timer expires */ + if (broadcasting) { + recent_dst_l2addr = 0xffff; + } else { + msg_t msg; + msg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE; + msg_send(&msg, gnrc_dutymac_netdev->pid); + } + break; + default: + printf("gnrc_netdev: warning: unhandled event %u.\n", event); + } + } +} + +static void _pass_on_packet(gnrc_pktsnip_t *pkt) +{ + /* throw away packet if no one is interested */ + if (!gnrc_netapi_dispatch_receive(pkt->type, GNRC_NETREG_DEMUX_CTX_ALL, pkt)) { + DEBUG("gnrc_netdev: unable to forward packet of type %i\n", pkt->type); + gnrc_pktbuf_release(pkt); + return; + } +} + +msg_t pkt_queue[NETDEV_PKT_QUEUE_SIZE]; + +/** + * @brief Startup code and event loop of the gnrc_netdev layer + * + * @param[in] args expects a pointer to the underlying netdev device + * + + * @return never returns + */ +static void *_gnrc_netdev_duty_thread(void *args) +{ + DEBUG("gnrc_netdev: starting thread\n"); + + gnrc_netdev_t* gnrc_dutymac_netdev = (gnrc_netdev_t*) args; + netdev_t *dev = gnrc_dutymac_netdev->dev; + gnrc_dutymac_netdev->pid = thread_getpid(); + dutymac_netdev_pid = gnrc_dutymac_netdev->pid; + + timer.callback = broadcast_cb; + timer.arg = (void*) gnrc_dutymac_netdev; + + gnrc_netapi_opt_t *opt; + int res; + + /* setup the MAC layers message queue (general purpose) */ + msg_t msg, reply, msg_queue[NETDEV_NETAPI_MSG_QUEUE_SIZE]; + msg_init_queue(msg_queue, NETDEV_NETAPI_MSG_QUEUE_SIZE); + + /* setup the MAC layers packet queue (only for packet transmission) */ + for (int i=0; ievent_callback = _event_cb; + dev->context = (void*) gnrc_dutymac_netdev; + + /* register the device to the network stack*/ + gnrc_netif_add(thread_getpid()); + + /* initialize low-level driver (listening mode) */ + dev->driver->init(dev); + netopt_state_t sleepstate = NETOPT_STATE_IDLE; + dev->driver->set(dev, NETOPT_STATE, &sleepstate, sizeof(netopt_state_t)); + + { + bool pending = false; + dev->driver->set(dev, NETOPT_ACK_PENDING, &pending, sizeof(bool)); + } + + /* start the event loop */ + while (1) { + DEBUG("gnrc_netdev: waiting for incoming messages\n"); + msg_receive(&msg); + + /* dispatch NETDEV and NETAPI messages */ + switch (msg.type) { + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_SND: + /* Send a packet in the packet queue if its destination matches to the input address */ + if (pending_num && !radio_busy) { + msg_queue_send(pkt_queue, true, *((uint16_t*)msg.content.ptr), gnrc_dutymac_netdev); + } + break; + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_REMOVE_QUEUE: + /* Remove a packet from the packet queue */ + msg_queue_remove(pkt_queue); + /* Send a packet in the packet queue */ + /* */ + if (pending_num && !radio_busy && recent_dst_l2addr != 0xffff && !irq_pending && !is_receiving(dev)) { + /* Send a packet to the same destination */ + msg_queue_send(pkt_queue, true, recent_dst_l2addr, gnrc_dutymac_netdev); + if (!radio_busy && !irq_pending && !is_receiving(dev)) { + /* If there are no packets with the same destination, check for packets destined for always-on nodes. */ + msg_queue_send(pkt_queue, false, 0, gnrc_dutymac_netdev); + } + } else if (!pending_num) { + bool pending = false; + dev->driver->set(dev, NETOPT_ACK_PENDING, &pending, sizeof(bool)); + } + break; + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE: + if (!radio_busy && !irq_pending && !is_receiving(dev)) { + msg_queue_send(pkt_queue, false, 0, gnrc_dutymac_netdev); + } + break; + case NETDEV_MSG_TYPE_EVENT: + DEBUG("gnrc_netdev: GNRC_NETDEV_MSG_TYPE_EVENT received\n"); + irq_pending = false; + dev->driver->isr(dev); + { + msg_t nmsg; + nmsg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_CHECK_QUEUE; + nmsg.content.ptr = NULL; + msg_send_to_self(&nmsg); + } + break; + case GNRC_NETAPI_MSG_TYPE_SND: + DEBUG("gnrc_netdev: GNRC_NETAPI_MSG_TYPE_SND received\n"); + /* ToDo: We need to distingush sending operation according to the destination + characteristisc: duty-cycling or always-on */ + /* Queue a packet */ + if (msg_queue_add(pkt_queue, &msg, gnrc_dutymac_netdev)) { + /* If a packet exists, send ACKs with pending bit */ + bool pending = true; + dev->driver->set(dev, NETOPT_ACK_PENDING, &pending, sizeof(bool)); + + if (!radio_busy && !irq_pending && !is_receiving(dev)) { + /* If we added something to the queue, check for packets destined for always-on nodes. + * If the radio is busy now, it's OK. We will do this same check whenever the radio + * goes from busy to not busy. + */ + msg_queue_send(pkt_queue, false, 0, gnrc_dutymac_netdev); + } + } else { + gnrc_pktbuf_release(msg.content.ptr); + } + break; + case GNRC_NETAPI_MSG_TYPE_SET: + /* read incoming options */ + opt = msg.content.ptr; + DEBUG("gnrc_netdev: GNRC_NETAPI_MSG_TYPE_SET received. opt=%s\n", + netopt2str(opt->opt)); + /* set option for device driver */ + res = dev->driver->set(dev, opt->opt, opt->data, opt->data_len); + DEBUG("gnrc_netdev: response of netdev->set: %i\n", res); + /* send reply to calling thread */ + reply.type = GNRC_NETAPI_MSG_TYPE_ACK; + reply.content.value = (uint32_t)res; + msg_reply(&msg, &reply); + break; + case GNRC_NETAPI_MSG_TYPE_GET: + /* read incoming options */ + opt = msg.content.ptr; + DEBUG("gnrc_netdev: GNRC_NETAPI_MSG_TYPE_GET received. opt=%s\n", + netopt2str(opt->opt)); + /* get option from device driver */ + res = dev->driver->get(dev, opt->opt, opt->data, opt->data_len); + DEBUG("gnrc_netdev: response of netdev->get: %i\n", res); + /* send reply to calling thread */ + reply.type = GNRC_NETAPI_MSG_TYPE_ACK; + reply.content.value = (uint32_t)res; + msg_reply(&msg, &reply); + break; + case GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT: + if (!irq_pending && !is_receiving(dev)) { + if (retry_rexmit) { + res = gnrc_dutymac_netdev->resend_without_release(gnrc_dutymac_netdev, msg.content.ptr, pending_num > 1); + } else { + res = gnrc_dutymac_netdev->send_without_release(gnrc_dutymac_netdev, msg.content.ptr, pending_num > 1); + } + if (res < 0) { + _event_cb(dev, NETDEV_EVENT_TX_MEDIUM_BUSY); + } + } else { + msg_t nmsg; + nmsg.type = GNRC_NETDEV_DUTYCYCLE_MSG_TYPE_LINK_RETRANSMIT; + nmsg.content.ptr = msg.content.ptr; + msg_send_to_self(&nmsg); + } + default: + DEBUG("gnrc_netdev: Unknown command %" PRIu16 "\n", msg.type); + break; + } + } + /* never reached */ + return NULL; +} + + +kernel_pid_t gnrc_netdev_dutymac_init(char *stack, int stacksize, char priority, + const char *name, gnrc_netdev_t *gnrc_netdev) +{ + + kernel_pid_t res; + + retry_init(); + csma_init(); + + /* check if given netdev device is defined and the driver is set */ + if (gnrc_netdev == NULL || gnrc_netdev->dev == NULL) { + return -ENODEV; + } + + /* create new gnrc_netdev thread */ + res = thread_create(stack, stacksize, priority, THREAD_CREATE_STACKTEST, + _gnrc_netdev_duty_thread, (void *)gnrc_netdev, name); + + if (res <= 0) { + return -EINVAL; + } + + return res; +} +#endif +#endif diff --git a/sys/net/gnrc/link_layer/dutymac/send.h b/sys/net/gnrc/link_layer/dutymac/send.h new file mode 100644 index 000000000000..f4381c0f52e7 --- /dev/null +++ b/sys/net/gnrc/link_layer/dutymac/send.h @@ -0,0 +1,14 @@ +#include "net/gnrc.h" +#include "net/gnrc/netdev2.h" + +void retry_init(void); +/* Always returns 0. */ +void send_with_retries(gnrc_pktsnip_t* pkt, int num_retries, void (*send_packet_fn)(gnrc_pktsnip_t*, gnrc_netdev2_t*, bool), gnrc_netdev2_t* gnrc_dutymac_netdev2, bool rexmit); +void retry_send_succeeded(void); +bool retry_send_failed(void); + +void csma_init(void); +/* Always returns 0. */ +void send_with_csma(gnrc_pktsnip_t* pkt, void (*send_packet_fn)(gnrc_pktsnip_t*, gnrc_netdev2_t*, bool), gnrc_netdev2_t* gnrc_dutymac_netdev2, bool rexmit, bool skip_first_backoff); +void csma_send_succeeded(void); +bool csma_send_failed(void); diff --git a/sys/net/gnrc/link_layer/dutymac/send_with_csma.c b/sys/net/gnrc/link_layer/dutymac/send_with_csma.c new file mode 100644 index 000000000000..4c17f3ca3e20 --- /dev/null +++ b/sys/net/gnrc/link_layer/dutymac/send_with_csma.c @@ -0,0 +1,84 @@ +#include "net/gnrc.h" +#include "net/gnrc/netdev2.h" +#include "net/netdev2.h" +#include "random.h" +#include "xtimer.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +#define MIN_BE SOFTWARE_CSMA_MIN_BACKOFF_EXP +#define MAX_BE SOFTWARE_CSMA_MAX_BACKOFF_EXP + +#if HARDWARE_CSMA_EN + +#define MAX_TRIES 1 +#define BACKOFF_PERIOD_MICROS 100 + +#else + +#define MAX_TRIES SOFTWARE_CSMA_MAX_TRIES // Back off 5 times before giving up +#define BACKOFF_PERIOD_MICROS SOFTWARE_CSMA_BACKOFF_MICROS + +#endif + +static xtimer_t backoff_timer; +static uint8_t num_tries; +static bool send_in_progress = false; +static void (*send_packet)(gnrc_pktsnip_t*, gnrc_netdev2_t*, bool); +static gnrc_netdev2_t* dev; +static bool is_rexmit; + +static void try_send_packet(void* pkt) { + send_packet(pkt, dev, is_rexmit); + is_rexmit = false; +} + +void backoff_and_send(void) { + uint8_t be = MIN_BE + num_tries; + if (be > MAX_BE) { + be = MAX_BE; + } + uint32_t max_possible_backoff = ((uint32_t) BACKOFF_PERIOD_MICROS) << be; + if (max_possible_backoff == 0) { + try_send_packet(backoff_timer.arg); + } else { + uint32_t micros_to_wait = random_uint32_range(0, max_possible_backoff); + xtimer_set(&backoff_timer, micros_to_wait); + } +} + +int send_with_csma(gnrc_pktsnip_t* pkt, void (*send_packet_fn)(gnrc_pktsnip_t*, gnrc_netdev2_t*, bool), gnrc_netdev2_t* gnrc_dutymac_netdev2, bool rexmit, bool skip_first_backoff) { + assert(!send_in_progress); + backoff_timer.arg = pkt; + num_tries = 0; + send_packet = send_packet_fn; + dev = gnrc_dutymac_netdev2; + is_rexmit = rexmit; + send_in_progress = true; + + if (skip_first_backoff) { + try_send_packet(pkt); + } else { + backoff_and_send(); + } + return 0; +} + +void csma_send_succeeded(void) { + send_in_progress = false; +} + +bool csma_send_failed(void) { + num_tries++; + if (num_tries >= MAX_TRIES) { + send_in_progress = false; + return false; + } + backoff_and_send(); + return true; +} + +void csma_init(void) { + backoff_timer.callback = try_send_packet; +} diff --git a/sys/net/gnrc/link_layer/dutymac/send_with_retries.c b/sys/net/gnrc/link_layer/dutymac/send_with_retries.c new file mode 100644 index 000000000000..eb3486370fa4 --- /dev/null +++ b/sys/net/gnrc/link_layer/dutymac/send_with_retries.c @@ -0,0 +1,61 @@ +#include "net/gnrc.h" +#include "net/gnrc/netdev2.h" +#include "net/netdev2.h" +#include "xtimer.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +#define NUM_RETRIES SOFTWARE_MAX_FRAME_RETRIES +#define DELAY_MICROS SOFTWARE_FRAME_RETRY_DELAY_MICROS + +static xtimer_t retry_timer; +static uint8_t tries_left; +static bool send_in_progress = false; +static void (*send_packet)(gnrc_pktsnip_t*, gnrc_netdev2_t*, bool); +static gnrc_netdev2_t* dev; + +static void try_send_packet(void* pkt) { + send_packet(pkt, dev, true); +} + +void retry_init(void) { + retry_timer.callback = try_send_packet; +} + +int send_with_retries(gnrc_pktsnip_t* pkt, int num_retries, void (*send_packet_fn)(gnrc_pktsnip_t*, gnrc_netdev2_t*, bool), gnrc_netdev2_t* gnrc_dutymac_netdev2, bool rexmit) { + assert(!send_in_progress); + DEBUG("[send_with_retries] Initiating send...\n"); + retry_timer.arg = pkt; + if (num_retries < 0) { + tries_left = NUM_RETRIES; + } else { + tries_left = num_retries; + } + send_packet = send_packet_fn; + dev = gnrc_dutymac_netdev2; + send_in_progress = true; + + send_packet(pkt, dev, rexmit); + return 0; +} + +/* Informs this module that the packet was sent successfully on this try. */ +void retry_send_succeeded(void) { + assert(send_in_progress); + DEBUG("[send_with_retries] Send successful!\n"); + send_in_progress = false; +} + +/* Informs this module that the packet was not sent successfully on this try. */ +bool retry_send_failed(void) { + assert(send_in_progress); + DEBUG("[send_with_retries] Send failed. %d attempts left...\n", tries_left); + if (tries_left == 0) { + send_in_progress = false; + return false; + } + tries_left--; + xtimer_set(&retry_timer, DELAY_MICROS); + return true; +} diff --git a/sys/net/gnrc/link_layer/netdev/gnrc_netdev_ieee802154.c b/sys/net/gnrc/link_layer/netdev/gnrc_netdev_ieee802154.c index 662150add120..148fb5cf5624 100644 --- a/sys/net/gnrc/link_layer/netdev/gnrc_netdev_ieee802154.c +++ b/sys/net/gnrc/link_layer/netdev/gnrc_netdev_ieee802154.c @@ -26,11 +26,17 @@ static gnrc_pktsnip_t *_recv(gnrc_netdev_t *gnrc_netdev); static int _send(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt); +static int _send_without_release(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt, bool set_pending_bit); +static int _resend_without_release(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt, bool set_pending_bit); +static int _send_beacon(gnrc_netdev_t *gnrc_netdev); int gnrc_netdev_ieee802154_init(gnrc_netdev_t *gnrc_netdev, netdev_ieee802154_t *dev) { gnrc_netdev->send = _send; + gnrc_netdev->send_without_release = _send_without_release; + gnrc_netdev->resend_without_release = _resend_without_release; + gnrc_netdev->send_beacon = _send_beacon; gnrc_netdev->recv = _recv; gnrc_netdev->dev = (netdev_t *)dev; @@ -138,7 +144,7 @@ static gnrc_pktsnip_t *_recv(gnrc_netdev_t *gnrc_netdev) return pkt; } -static int _send(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt) +static int _send_impl(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt, bool retransmission, bool release_pkt, bool set_pending_bit) { netdev_t *netdev = gnrc_netdev->dev; netdev_ieee802154_t *state = (netdev_ieee802154_t *)gnrc_netdev->dev; @@ -160,6 +166,9 @@ static int _send(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt) DEBUG("_send_ieee802154: first header is not generic netif header\n"); return -EBADMSG; } + if (set_pending_bit) { + flags |= IEEE802154_FCF_FRAME_PEND; + } netif_hdr = pkt->data; /* prepare destination address */ if (netif_hdr->flags & /* If any of these flags is set so this is correct */ @@ -183,10 +192,25 @@ static int _send(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt) src_len = IEEE802154_SHORT_ADDRESS_LEN; src = state->short_addr; } +#if DUTYCYCLE_EN + /* ToDo: Current version does not use a neighbor discovery protocol, which cannot support unicast. + We can manually set a destination (router's address) here */ +#if LEAF_NODE + //int16_t ddd = 0x166d; + //dst = (uint8_t*)&ddd; +#endif +#if ROUTER + //int16_t ddd = 0x1e17; + //dst = (uint8_t*)&ddd; +#endif +#endif + if (!retransmission) { + state->seq++; + } /* fill MAC header, seq should be set by device */ if ((res = ieee802154_set_frame_hdr(mhr, src, src_len, dst, dst_len, dev_pan, - dev_pan, flags, state->seq++)) == 0) { + dev_pan, flags, state->seq)) == 0) { DEBUG("_send_ieee802154: Error preperaring frame\n"); return -EINVAL; } @@ -213,9 +237,70 @@ static int _send(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt) else { return -ENOBUFS; } + + /* If release_pkt is false, then only release the iovec, not the rest. */ + if (!release_pkt) { + pkt->next = NULL; + } /* release old data */ gnrc_pktbuf_release(pkt); return res; } +static int _send(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt) { + return _send_impl(gnrc_netdev, pkt, false, true, false); +} + +static int _send_without_release(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt, bool set_pending_bit) { + return _send_impl(gnrc_netdev, pkt, false, false, set_pending_bit); +} + +static int _resend_without_release(gnrc_netdev_t *gnrc_netdev, gnrc_pktsnip_t *pkt, bool set_pending_bit) { + return _send_impl(gnrc_netdev, pkt, true, false, set_pending_bit); +} + +/* hskim: send Data Request MAC command for MAC operation */ + static int _send_beacon(gnrc_netdev_t *gnrc_netdev) + { + netdev_t *netdev = gnrc_netdev->dev; + netdev_ieee802154_t *state = (netdev_ieee802154_t *)gnrc_netdev->dev; + struct iovec vector; + const uint8_t *src, *dst = NULL; + int res = 0; + size_t src_len, dst_len; + uint8_t mhr[IEEE802154_MAX_HDR_LEN+1]; + uint8_t command_id = 4; /* Data request commnad ID */ + uint8_t flags = (uint8_t)(state->flags & NETDEV_IEEE802154_SEND_MASK); + le_uint16_t dev_pan = byteorder_btols(byteorder_htons(state->pan)); + + flags |= (IEEE802154_FCF_ACK_REQ | IEEE802154_FCF_TYPE_MACCMD); + + src_len = IEEE802154_SHORT_ADDRESS_LEN; + src = state->short_addr; + + /* ToDo: Current version does not use a neighbor discovery protocol, which cannot support unicast. + We can manually set a destination (router's address) here */ + dst_len = IEEE802154_SHORT_ADDRESS_LEN; + int16_t ddd = 0x7976; + dst = (uint8_t*)&ddd; + + /* fill MAC header, seq should be set by device */ + if ((res = ieee802154_set_frame_hdr(mhr, src, src_len, + dst, dst_len, dev_pan, + dev_pan, flags, state->seq++)) == 0) { + DEBUG("_send_ieee802154: Error preperaring frame\n"); + return -EINVAL; + } + mhr[res++] = command_id; /* MAC command ID: Data Request */ + + DEBUG("[Tx DataReq] %u/%2x%2x->%u/%2x%2x, flag %2x, seq %u\n", src_len, src[0],src[1], dst_len, dst[0],dst[1], flags, state->seq-1); + + /* prepare packet for sending */ + vector.iov_base = mhr; + vector.iov_len = (size_t)res; + res = netdev->driver->send(netdev, &vector, 1); + + return res; + } + /** @} */ diff --git a/sys/net/gnrc/netreg/gnrc_netreg.c b/sys/net/gnrc/netreg/gnrc_netreg.c index 8ec47e7d336f..a806f6778a96 100644 --- a/sys/net/gnrc/netreg/gnrc_netreg.c +++ b/sys/net/gnrc/netreg/gnrc_netreg.c @@ -25,6 +25,10 @@ #include "net/gnrc/udp.h" #include "net/gnrc/tcp.h" +#ifdef MODULE_GNRC_TCP_FREEBSD +#include "net/gnrc/tcp_freebsd.h" +#endif + #define _INVALID_TYPE(type) (((type) < GNRC_NETTYPE_UNDEF) || ((type) >= GNRC_NETTYPE_NUMOF)) /* The registry as lookup table by gnrc_nettype_t */ @@ -131,7 +135,7 @@ int gnrc_netreg_calc_csum(gnrc_pktsnip_t *hdr, gnrc_pktsnip_t *pseudo_hdr) case GNRC_NETTYPE_ICMPV6: return gnrc_icmpv6_calc_csum(hdr, pseudo_hdr); #endif -#ifdef MODULE_GNRC_TCP +#if defined(MODULE_GNRC_TCP) || defined(MODULE_GNRC_TCP_FREEBSD) case GNRC_NETTYPE_TCP: return gnrc_tcp_calc_csum(hdr, pseudo_hdr); #endif diff --git a/sys/net/gnrc/network_layer/ipv6/autoconf_onehop/Makefile b/sys/net/gnrc/network_layer/ipv6/autoconf_onehop/Makefile new file mode 100644 index 000000000000..73cbb8103e3f --- /dev/null +++ b/sys/net/gnrc/network_layer/ipv6/autoconf_onehop/Makefile @@ -0,0 +1,3 @@ +MODULE = gnrc_ipv6_autoconf_onehop + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/network_layer/ipv6/autoconf_onehop/gnrc_ipv6_autoconf.c b/sys/net/gnrc/network_layer/ipv6/autoconf_onehop/gnrc_ipv6_autoconf.c new file mode 100644 index 000000000000..937450db9bc4 --- /dev/null +++ b/sys/net/gnrc/network_layer/ipv6/autoconf_onehop/gnrc_ipv6_autoconf.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2017 Sam Kumar + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @{ + * + * @file + */ + +#include +#include + +#include "net/gnrc/ipv6.h" +#include "net/ipv6/addr.h" +#include "net/gnrc/ipv6/autoconf_onehop.h" +#include "net/gnrc/ipv6/netif.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +#if ENABLE_DEBUG +/* For PRIu8 etc. */ +#include + +static char addr_str[IPV6_ADDR_MAX_STR_LEN]; +#endif + +static inline void _revert_iid(uint8_t* iid) { + iid[0] ^= 0x02; +} + +kernel_pid_t get_6lowpan_pid(void) { + kernel_pid_t ifs[GNRC_NETIF_NUMOF]; + size_t ifnum = gnrc_netif_get(ifs); + for (unsigned i = 0; i < ifnum; i++) { + gnrc_ipv6_netif_t *ipv6_if = gnrc_ipv6_netif_get(ifs[i]); + if ((ipv6_if != NULL) && (ipv6_if->flags & GNRC_IPV6_NETIF_FLAGS_SIXLOWPAN)) { + /* always take the first 6LoWPAN interface we can find */ + return ipv6_if->pid; + } + } + return KERNEL_PID_UNDEF; +} + +/* ipv6addr should already have the top 64 bits for the prefix set. */ +int gnrc_ipv6_autoconf_l2addr_to_ipv6(ipv6_addr_t* ipv6addr, eui64_t* l2addr) { + memcpy(&ipv6addr->u8[8], l2addr, sizeof(eui64_t)); + _revert_iid(&ipv6addr->u8[8]); + return 0; +} + +void gnrc_ipv6_autoconf_ipv6_to_l2addr(eui64_t* l2addr, ipv6_addr_t* ipv6addr) { + memcpy(l2addr, &ipv6addr->u8[8], sizeof(eui64_t)); + _revert_iid((uint8_t*) l2addr); +} + +kernel_pid_t gnrc_ipv6_autoconf_next_hop_l2addr(uint8_t* l2addr, uint8_t* l2addr_len, kernel_pid_t iface, ipv6_addr_t *dst) { + static kernel_pid_t sixlowpan_pid = KERNEL_PID_UNDEF; + if (sixlowpan_pid == KERNEL_PID_UNDEF) { + sixlowpan_pid = get_6lowpan_pid(); + } + + + if (ipv6_addr_is_link_local(dst)) { + *l2addr_len = sizeof(eui64_t); + gnrc_ipv6_autoconf_ipv6_to_l2addr((eui64_t*) l2addr, dst); + return sixlowpan_pid; + } + +#ifdef I_AM_HAMILTON_BORDER_ROUTER + + ipv6_addr_t* longest_prefix_match; + kernel_pid_t matching_iface_pid = gnrc_ipv6_netif_find_by_prefix(&longest_prefix_match, dst); + if (matching_iface_pid == KERNEL_PID_UNDEF) { + return KERNEL_PID_UNDEF; + } + + /* I expect the interface to always be the 6LoWPAN interface... */ + + uint8_t prefix_length_bits = ipv6_addr_match_prefix(longest_prefix_match, dst); + if (prefix_length_bits < 64) { + return KERNEL_PID_UNDEF; + } + + *l2addr_len = sizeof(eui64_t); + gnrc_ipv6_autoconf_ipv6_to_l2addr((eui64_t*) l2addr, dst); + + return matching_iface_pid; + +#else + + static ipv6_addr_t border_router_ip; + static bool filled_border_router_ip = false; + + if (!filled_border_router_ip) { + ipv6_addr_t* rv = ipv6_addr_from_str(&border_router_ip, HAMILTON_BORDER_ROUTER_ADDRESS); + if (rv == NULL) { + printf("The HAMILTON_BORDER_ROUTER_ADDRESS is malformed! Check its definition (probably in the Makefile)?\n"); + } + assert(rv != NULL); + filled_border_router_ip = true; + } + + *l2addr_len = sizeof(eui64_t); + gnrc_ipv6_autoconf_ipv6_to_l2addr((eui64_t*) l2addr, &border_router_ip); + + return sixlowpan_pid; + +#endif +} + +/** @} */ diff --git a/sys/net/gnrc/network_layer/ipv6/gnrc_ipv6.c b/sys/net/gnrc/network_layer/ipv6/gnrc_ipv6.c index 68c1b12126b6..3f7570896f69 100644 --- a/sys/net/gnrc/network_layer/ipv6/gnrc_ipv6.c +++ b/sys/net/gnrc/network_layer/ipv6/gnrc_ipv6.c @@ -25,6 +25,7 @@ #include "net/gnrc/sixlowpan/ctx.h" #include "net/gnrc/sixlowpan/nd.h" #include "net/gnrc/sixlowpan/nd/router.h" +#include "net/gnrc/ipv6/autoconf_onehop.h" #include "net/protnum.h" #include "thread.h" #include "utlist.h" @@ -636,6 +637,12 @@ static inline kernel_pid_t _next_hop_l2addr(uint8_t *l2addr, uint8_t *l2addr_len return found_iface; } #endif +#if defined(MODULE_GNRC_IPV6_AUTOCONF_ONEHOP) + found_iface = gnrc_ipv6_autoconf_next_hop_l2addr(l2addr, l2addr_len, iface, dst); + if (found_iface > KERNEL_PID_UNDEF) { + return found_iface; + } +#endif #if defined(MODULE_GNRC_NDP_NODE) found_iface = gnrc_ndp_node_next_hop_l2addr(l2addr, l2addr_len, iface, dst, pkt); #elif !defined(MODULE_GNRC_SIXLOWPAN_ND) && defined(MODULE_GNRC_IPV6_NC) diff --git a/sys/net/gnrc/pktbuf_static/gnrc_pktbuf_static.c b/sys/net/gnrc/pktbuf_static/gnrc_pktbuf_static.c index fe10bc4728eb..80a6ebd0cb02 100644 --- a/sys/net/gnrc/pktbuf_static/gnrc_pktbuf_static.c +++ b/sys/net/gnrc/pktbuf_static/gnrc_pktbuf_static.c @@ -426,6 +426,8 @@ static gnrc_pktsnip_t *_create_snip(gnrc_pktsnip_t *next, void *data, size_t siz return pkt; } +static int null_allocs = 0; + static void *_pktbuf_alloc(size_t size) { _unused_t *prev = NULL, *ptr = _first_unused; @@ -437,6 +439,7 @@ static void *_pktbuf_alloc(size_t size) } if (ptr == NULL) { DEBUG("pktbuf: no space left in packet buffer\n"); + null_allocs += 1; return NULL; } /* _unused_t struct would fit => add new space at ptr */ diff --git a/sys/net/gnrc/sock/include/sock_types.h b/sys/net/gnrc/sock/include/sock_types.h index 82e446d414b4..88694da80a9c 100644 --- a/sys/net/gnrc/sock/include/sock_types.h +++ b/sys/net/gnrc/sock/include/sock_types.h @@ -32,6 +32,10 @@ #include "net/sock/ip.h" #include "net/sock/udp.h" +/* These two are needed for FreeBSD TCP. */ +#include "condition.h" +#include "net/tcp_freebsd.h" + #ifdef __cplusplus extern "C" { #endif @@ -75,6 +79,65 @@ struct sock_udp { uint16_t flags; /**< option flags */ }; +/* + * @brief Used in TCP FREEBSD sock type + * @internal + */ +struct sock_tcp_freebsd_send_state { + size_t buflen; + struct sock_tcp_freebsd_send_state* next; + struct lbufent entry; +}; + +/* + * @brief Used in TCP FREEBSD sock type + * @internal + */ +struct sock_tcp_freebsd_accept_queue_entry { + int asockid; + void* recvbuf; +}; + +/* + * @brief TCP FREEBSD sock type + * @internal + */ +struct sock_tcp_freebsd { + gnrc_nettype_t l3_type; + gnrc_nettype_t l4_type; + gnrc_netreg_entry_t netreg_entry; // to follow the inheritance + + ipv6_addr_t local_addr; + uint16_t local_port; + + mutex_t lock; + union { + struct { + int asock; + void* recvbuf; + mutex_t connect_lock; + condition_t connect_cond; + condition_t receive_cond; + condition_t send_cond; + + struct sock_tcp_freebsd_send_state* send_head; + struct sock_tcp_freebsd_send_state* send_tail; + size_t in_send_buffer; + } active; + struct { + int psock; + condition_t accept_cond; + + /* Circular buffer for accept queue. */ + cib_t accept_cib; + struct sock_tcp_freebsd_accept_queue_entry* accept_queue; + } passive; + } sfields; /* specific fields */ + int errstat; + bool hasactive; + bool haspassive; +}; + #ifdef __cplusplus } #endif diff --git a/sys/net/gnrc/sock/tcp_freebsd/Makefile b/sys/net/gnrc/sock/tcp_freebsd/Makefile new file mode 100644 index 000000000000..6dbb2eb1d5e2 --- /dev/null +++ b/sys/net/gnrc/sock/tcp_freebsd/Makefile @@ -0,0 +1,5 @@ +MODULE = gnrc_sock_tcp_freebsd + +DIRS += zone + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/sock/tcp_freebsd/gnrc_sock_tcp_freebsd.c b/sys/net/gnrc/sock/tcp_freebsd/gnrc_sock_tcp_freebsd.c new file mode 100644 index 000000000000..dce5c4ab31c1 --- /dev/null +++ b/sys/net/gnrc/sock/tcp_freebsd/gnrc_sock_tcp_freebsd.c @@ -0,0 +1,671 @@ +/** +* This file is subject to the terms and conditions of the GNU Lesser +* General Public License v2.1. See the file LICENSE in the top level +* directory for more details. +*/ + +/** + * @{ + * + * @file + * @brief Implementation of conn API for GNRC TCP derived from FreeBSD + * + * @author Sam Kumar + */ + +#include +#include "gnrc_sock_internal.h" +#include "net/af.h" +#include "net/sock.h" +#include "net/gnrc/ipv6.h" +#include "net/gnrc/tcp_freebsd.h" +#include "net/sock/tcp_freebsd.h" +#include "net/tcp_freebsd.h" +#include "zone/gnrc_sock_tcp_freebsd_zalloc.h" + +#define ENABLE_DEBUG (0) + +#include "debug.h" + +#define RECV_BUF_LEN 2449 +#define REASS_BMP_LEN ((RECV_BUF_LEN + 7) >> 3) + +#define SENDMAXCOPY 52 +#define COPYBUFSIZE (SENDMAXCOPY << 1) +#define SENDBUFSIZE 2448 + +#ifndef SOCK_HAS_IPV6 +#error "TCP FREEBSD requires IPv6" +#endif + +/* Cached copy buffer, which may help us avoid a dynamic memory allocation. */ +struct sock_tcp_freebsd_send_state* extracopybuf = NULL; + +static uint32_t _free_sendstates(sock_tcp_freebsd_t* conn, uint32_t howmany) { + uint32_t totalbytesremoved = 0; + uint32_t i; + + struct sock_tcp_freebsd_send_state* head; + struct sock_tcp_freebsd_send_state* newhead; + for (i = 0; (i < howmany) && (conn->sfields.active.send_head != NULL); i++) { + head = conn->sfields.active.send_head; + newhead = head->next; + totalbytesremoved += (head->buflen - head->entry.extraspace); + if (head->buflen == COPYBUFSIZE && extracopybuf == NULL) { + /* Hang on to the reference, to avoid a future memory allocation. */ + extracopybuf = head; + } else { + sock_tcp_freebsd_zfree(head); + } + conn->sfields.active.send_head = newhead; + } + if (conn->sfields.active.send_head == NULL) { + conn->sfields.active.send_tail = NULL; + } + assert(totalbytesremoved <= conn->sfields.active.in_send_buffer); + conn->sfields.active.in_send_buffer -= totalbytesremoved; + return totalbytesremoved; +} + +static void sock_tcp_freebsd_connectDone(uint8_t ai, struct sockaddr_in6* faddr, void* ctx) +{ + assert(ctx != NULL); + (void) ai; + (void) faddr; + sock_tcp_freebsd_t* conn = ctx; + mutex_lock(&conn->lock); + assert(conn->hasactive && !conn->haspassive); + conn->errstat = 0; + cond_signal(&conn->sfields.active.connect_cond); + mutex_unlock(&conn->lock); +} + +static void sock_tcp_freebsd_sendDone(uint8_t ai, uint32_t tofree, void* ctx) +{ + (void) ai; + uint32_t freed; + + sock_tcp_freebsd_t* conn = ctx; + assert(conn != NULL); + + mutex_lock(&conn->lock); + assert(conn->hasactive && !conn->haspassive); + freed = _free_sendstates(conn, tofree); + if (freed > 0) { + cond_broadcast(&conn->sfields.active.send_cond); + } + mutex_unlock(&conn->lock); +} + +static void sock_tcp_freebsd_receiveReady(uint8_t ai, int gotfin, void* ctx) +{ + // ctx might actually be NULL, in which case we just ignore this. + (void) ai; + (void) gotfin; + sock_tcp_freebsd_t* conn = ctx; + if (conn == NULL) { + // We got data on a socket on the accept queue that hasn't been accepted yet + return; + } + mutex_lock(&conn->lock); + assert(conn->hasactive && !conn->haspassive); + conn->errstat = 0; + cond_signal(&conn->sfields.active.receive_cond); + mutex_unlock(&conn->lock); +} + +static void sock_tcp_freebsd_connectionLost(acceptArgs_t* lost, uint8_t how, void* ctx) +{ + (void) how; + if (ctx == NULL) { + /* This could happen if we get a SYN, so that acceptReady is called, but the + * connection dies before a SYN-ACK is received, so the socket never hits + * the accept queue. + * In that case, we need to free the receive buffer, which we allocated. + */ + sock_tcp_freebsd_zfree(lost->recvbuf); + return; + } + sock_tcp_freebsd_t* conn = ctx; + mutex_lock(&conn->lock); + assert(conn->hasactive && !conn->haspassive); + conn->errstat = -((int) how); + cond_broadcast(&conn->sfields.active.connect_cond); + cond_broadcast(&conn->sfields.active.receive_cond); + cond_broadcast(&conn->sfields.active.send_cond); + mutex_unlock(&conn->lock); +} + +static acceptArgs_t sock_tcp_freebsd_acceptReady(uint8_t pi, void* ctx) +{ + /* To be returned after filling in members. */ + acceptArgs_t args; + + assert(ctx != NULL); + sock_tcp_freebsd_t* conn = ctx; + + mutex_lock(&conn->lock); + assert(conn->haspassive && !conn->hasactive); + + void* recvbuf = sock_tcp_freebsd_zalloc(RECV_BUF_LEN + REASS_BMP_LEN); + if (recvbuf == NULL) { + DEBUG("Out of memory in acceptReady\n"); + goto fail; + } + + int asockid = bsdtcp_active_socket(sock_tcp_freebsd_connectDone, + sock_tcp_freebsd_sendDone, sock_tcp_freebsd_receiveReady, + sock_tcp_freebsd_connectionLost, NULL); + + if (asockid == -1) { + sock_tcp_freebsd_zfree(recvbuf); + goto fail; + } + + args.asockid = asockid; + args.recvbuf = recvbuf; + args.recvbuflen = RECV_BUF_LEN; + args.reassbmp = ((uint8_t*) args.recvbuf) + RECV_BUF_LEN; + +done: + mutex_unlock(&conn->lock); + return args; + +fail: + args.asockid = -1; + args.recvbuf = NULL; + args.recvbuflen = 0; + args.reassbmp = NULL; + + goto done; +} + +static bool sock_tcp_freebsd_acceptDone(uint8_t pi, struct sockaddr_in6* faddr, acceptArgs_t* accepted, void* ctx) +{ + (void) pi; + (void) faddr; + + int putidx; + + sock_tcp_freebsd_t* conn = ctx; + assert(conn != NULL); + + mutex_lock(&conn->lock); + assert(conn->haspassive && !conn->hasactive); + + + putidx = cib_put(&conn->sfields.passive.accept_cib); + if (putidx == -1) { + DEBUG("accept queue full\n"); + mutex_unlock(&conn->lock); + return false; + } + struct sock_tcp_freebsd_accept_queue_entry* queue_slot = &conn->sfields.passive.accept_queue[putidx]; + queue_slot->asockid = accepted->asockid; + queue_slot->recvbuf = accepted->recvbuf; + + cond_signal(&conn->sfields.passive.accept_cond); + + mutex_unlock(&conn->lock); + + return true; +} + + +static void sock_tcp_freebsd_general_init(sock_tcp_freebsd_t* conn, uint16_t port) +{ + conn->l3_type = GNRC_NETTYPE_IPV6; + conn->l4_type = GNRC_NETTYPE_TCP; + + conn->local_port = port; + + mutex_init(&conn->lock); + conn->errstat = 0; + conn->hasactive = false; + conn->haspassive = false; +} + +static void sock_tcp_freebsd_passive_clear(sock_tcp_freebsd_t* conn) +{ + if (conn->haspassive) { + int asockidx; + + conn->haspassive = false; + bsdtcp_close(conn->sfields.passive.psock); + cond_broadcast(&conn->sfields.passive.accept_cond); + + struct sock_tcp_freebsd_accept_queue_entry* queue_slot; + while ((asockidx = cib_get(&conn->sfields.passive.accept_cib)) != -1) { + queue_slot = &conn->sfields.passive.accept_queue[asockidx]; + sock_tcp_freebsd_zfree(queue_slot->recvbuf); + bsdtcp_close(queue_slot->asockid); + } + sock_tcp_freebsd_zfree(conn->sfields.passive.accept_queue); + } +} + +static void sock_tcp_freebsd_active_clear(sock_tcp_freebsd_t* conn) +{ + if (conn->hasactive) { + conn->hasactive = false; + mutex_lock(&conn->sfields.active.connect_lock); + bsdtcp_close(conn->sfields.active.asock); + sock_tcp_freebsd_zfree(conn->sfields.active.recvbuf); + cond_broadcast(&conn->sfields.active.connect_cond); + cond_broadcast(&conn->sfields.active.receive_cond); + cond_broadcast(&conn->sfields.active.send_cond); + _free_sendstates(conn, (uint32_t) 0xFFFFFFFFu); + assert(conn->sfields.active.in_send_buffer == 0); + } +} + +static bool sock_tcp_freebsd_active_set(sock_tcp_freebsd_t* conn, int asock) +{ + sock_tcp_freebsd_passive_clear(conn); + if (!conn->hasactive) { + conn->hasactive = true; + if (asock == -1) { + conn->sfields.active.asock = bsdtcp_active_socket(sock_tcp_freebsd_connectDone, + sock_tcp_freebsd_sendDone, sock_tcp_freebsd_receiveReady, + sock_tcp_freebsd_connectionLost, conn); + if (conn->sfields.active.asock == -1) { + conn->hasactive = false; + return false; + } + + conn->sfields.active.recvbuf = sock_tcp_freebsd_zalloc(RECV_BUF_LEN + REASS_BMP_LEN); + if (conn->sfields.active.recvbuf == NULL) { + conn->hasactive = false; + bsdtcp_close(conn->sfields.active.asock); + conn->sfields.active.asock = -1; + return false; + } + + } else { + conn->sfields.active.asock = asock; + /* How to get the recvbuf? */ + } + bsdtcp_bind(conn->sfields.active.asock, conn->local_port); + + mutex_init(&conn->sfields.active.connect_lock); + cond_init(&conn->sfields.active.connect_cond); + cond_init(&conn->sfields.active.receive_cond); + cond_init(&conn->sfields.active.send_cond); + + conn->sfields.active.send_head = NULL; + conn->sfields.active.send_tail = NULL; + conn->sfields.active.in_send_buffer = 0; + } + return true; +} + +static bool sock_tcp_freebsd_passive_set(sock_tcp_freebsd_t* conn, int queue_len) +{ + assert(queue_len >= 0 && queue_len < (1 << (8 * sizeof(int) - 2))); + sock_tcp_freebsd_active_clear(conn); + if (!conn->haspassive) { + conn->haspassive = true; + conn->sfields.passive.psock = bsdtcp_passive_socket(sock_tcp_freebsd_acceptReady, sock_tcp_freebsd_acceptDone, conn); + if (conn->sfields.passive.psock == -1) { + conn->haspassive = false; + return false; + } + bsdtcp_bind(conn->sfields.passive.psock, conn->local_port); + + cond_init(&conn->sfields.passive.accept_cond); + + /* Set adj_queue_len to the power of two above queue_len. */ + unsigned int adj_queue_len = 1; + while (queue_len != 0) { + queue_len >>= 1; + adj_queue_len <<= 1; + } + adj_queue_len >>= 1; + + cib_init(&conn->sfields.passive.accept_cib, adj_queue_len); + + conn->sfields.passive.accept_queue = sock_tcp_freebsd_zalloc(adj_queue_len * sizeof(struct sock_tcp_freebsd_accept_queue_entry)); + if (conn->sfields.passive.accept_queue == NULL && adj_queue_len != 0) { + conn->haspassive = false; + bsdtcp_close(conn->sfields.passive.psock); + conn->sfields.passive.psock = -1; + return false; + } + } + return true; +} + +/* This used to be in sys/net/gnrc/conn/gnrc_conn.c, as the function + * gnrc_conn6_set_local_addr. I'm duplicating the code here, as conn is + * deprecated and so I don't want to pull it in as a dependency. + */ +bool sock_tcp_freebsd_set_local_ipv6_addr(uint8_t *conn_addr, const ipv6_addr_t *addr) +{ + ipv6_addr_t *tmp; + if (!ipv6_addr_is_unspecified(addr) && + !ipv6_addr_is_loopback(addr) && + gnrc_ipv6_netif_find_by_addr(&tmp, addr) == KERNEL_PID_UNDEF) { + return false; + } + else if (ipv6_addr_is_loopback(addr) || ipv6_addr_is_unspecified(addr)) { + ipv6_addr_set_unspecified((ipv6_addr_t *)conn_addr); + } + else { + memcpy(conn_addr, addr, sizeof(ipv6_addr_t)); + } + return true; +} + +static uint16_t _dyn_port_next = 0; +/** + * @brief returns a UDP port, and checks for reuse if required + * I copied this from the sock_udp module, with a minor modification. + * + * complies to RFC 6056, see https://tools.ietf.org/html/rfc6056#section-3.3.3 + */ +static uint16_t _get_dyn_port(sock_tcp_freebsd_t *sock) +{ + uint16_t port; + unsigned count = GNRC_SOCK_DYN_PORTRANGE_NUM; + do { + port = GNRC_SOCK_DYN_PORTRANGE_MIN + + (_dyn_port_next * GNRC_SOCK_DYN_PORTRANGE_OFF) % GNRC_SOCK_DYN_PORTRANGE_NUM; + _dyn_port_next++; + if ((sock == NULL) || gnrc_tcp_freebsd_portisfree(port)) { + return port; + } + --count; + } while (count > 0); + return GNRC_SOCK_DYN_PORTRANGE_ERR; +} + +int sock_tcp_freebsd_create(sock_tcp_freebsd_t *conn, const void *addr, size_t addr_len, int family, + uint16_t port) +{ + conn->l4_type = GNRC_NETTYPE_TCP; + switch (family) { +#ifdef MODULE_GNRC_IPV6 + case AF_INET6: + if (addr_len != sizeof(ipv6_addr_t)) { + return -EINVAL; + } + if (port == 0) { + port = _get_dyn_port(conn); + if (port == GNRC_SOCK_DYN_PORTRANGE_ERR) { + return -EADDRINUSE; + } + } + if (sock_tcp_freebsd_set_local_ipv6_addr((uint8_t*) &conn->local_addr, addr)) { + sock_tcp_freebsd_general_init(conn, port); + } + else { + return -EADDRNOTAVAIL; + } + break; +#endif + default: + (void)addr; + (void)addr_len; + (void)port; + return -EAFNOSUPPORT; + } + return 0; +} + +void sock_tcp_freebsd_close(sock_tcp_freebsd_t *conn) +{ + mutex_lock(&conn->lock); + assert(!(conn->hasactive && conn->haspassive)); + sock_tcp_freebsd_active_clear(conn); + sock_tcp_freebsd_passive_clear(conn); + mutex_unlock(&conn->lock); +} + +int sock_tcp_freebsd_getlocaladdr(sock_tcp_freebsd_t *conn, void *addr, uint16_t *port) +{ + mutex_lock(&conn->lock); + memcpy(addr, &conn->local_addr, sizeof(ipv6_addr_t)); + *port = conn->local_port; + mutex_unlock(&conn->lock); + return 0; +} + +int sock_tcp_freebsd_getpeeraddr(sock_tcp_freebsd_t *conn, void *addr, uint16_t *port) +{ + struct in6_addr* addrptr; + uint16_t* portptr; + mutex_lock(&conn->lock); + if (!conn->hasactive || !bsdtcp_isestablished(conn->sfields.active.asock)) + { + mutex_unlock(&conn->lock); + return -ENOTCONN; + } + bsdtcp_peerinfo(conn->sfields.active.asock, &addrptr, &portptr); + memcpy(addr, addrptr, sizeof(struct in6_addr)); + *port = *portptr; + mutex_unlock(&conn->lock); + return 0; +} + +int sock_tcp_freebsd_connect(sock_tcp_freebsd_t *conn, const void *addr, size_t addr_len, uint16_t port) +{ + int rv; + + struct sockaddr_in6 faddrport; + + mutex_lock(&conn->lock); + if (addr_len != sizeof(struct in6_addr)) { + rv = -EAFNOSUPPORT; + goto unlockreturn; + } + memcpy(&faddrport.sin6_addr, addr, addr_len); + faddrport.sin6_port = htons(port); + bool res = sock_tcp_freebsd_active_set(conn, -1); + if (!res) { + rv = -ENOMEM; + goto unlockreturn; + } + + mutex_lock(&conn->sfields.active.connect_lock); + if (bsdtcp_isestablished(conn->sfields.active.asock)) { + rv = -EISCONN; + goto unlockboth; + } + conn->errstat = 0; + int error = bsdtcp_connect(conn->sfields.active.asock, &faddrport, + conn->sfields.active.recvbuf, RECV_BUF_LEN, + ((uint8_t*) conn->sfields.active.recvbuf) + RECV_BUF_LEN); + if (error != 0) { + rv = -error; + goto unlockboth; + } + + /* Wait until either connection done OR connection lost */ + cond_wait(&conn->sfields.active.connect_cond, &conn->lock); + + rv = conn->errstat; + +unlockboth: + mutex_unlock(&conn->sfields.active.connect_lock); +unlockreturn: + mutex_unlock(&conn->lock); + return rv; +} + +int sock_tcp_freebsd_listen(sock_tcp_freebsd_t *conn, int queue_len) +{ + int rv; + mutex_lock(&conn->lock); + bool res = sock_tcp_freebsd_passive_set(conn, queue_len); + if (!res) { + rv = -ENOMEM; + goto unlockreturn; + } + + rv = -bsdtcp_listen(conn->sfields.passive.psock); + +unlockreturn: + mutex_unlock(&conn->lock); + return rv; +} + +int sock_tcp_freebsd_accept(sock_tcp_freebsd_t* conn, sock_tcp_freebsd_t* out_conn) +{ + mutex_lock(&conn->lock); + if (!conn->haspassive) { + mutex_unlock(&conn->lock); + return -EINVAL; + } + + assert(!conn->hasactive); + + int asockidx; + while ((asockidx = cib_get(&conn->sfields.passive.accept_cib)) == -1) { + cond_wait(&conn->sfields.passive.accept_cond, &conn->lock); + } + + struct sock_tcp_freebsd_accept_queue_entry* queue_slot = &conn->sfields.passive.accept_queue[asockidx]; + int asock = queue_slot->asockid; + + memcpy(&out_conn->local_addr, &conn->local_addr, sizeof(ipv6_addr_t)); + sock_tcp_freebsd_general_init(out_conn, conn->local_port); + + mutex_lock(&out_conn->lock); + out_conn->sfields.active.recvbuf = queue_slot->recvbuf; + sock_tcp_freebsd_active_set(out_conn, asock); + int rv = bsdtcp_set_ctx(asock, out_conn); + assert(rv == 0); + (void) rv; + mutex_unlock(&out_conn->lock); + + mutex_unlock(&conn->lock); + return 0; +} + +int sock_tcp_freebsd_recv(sock_tcp_freebsd_t *conn, void *data, size_t max_len) +{ + size_t bytes_read; + int error; + + assert(conn->hasactive && !conn->haspassive); + mutex_lock(&conn->lock); + + conn->errstat = 0; + error = bsdtcp_receive(conn->sfields.active.asock, data, max_len, &bytes_read); + while (bytes_read == 0 && error == 0 && conn->errstat == 0 && !bsdtcp_hasrcvdfin(conn->sfields.active.asock)) { + cond_wait(&conn->sfields.active.receive_cond, &conn->lock); + error = bsdtcp_receive(conn->sfields.active.asock, data, max_len, &bytes_read); + } + + mutex_unlock(&conn->lock); + + if (error != 0) { + return -error; + } else if (conn->errstat != 0) { + return conn->errstat; + } + return (int) bytes_read; +} + +/* SEND POLICY + * If a buffer is smaller than or equal to SENDMAXCOPY bytes, then + * COPYBUFSIZE bytes are allocated, and the buffer is copied into the + * space. This allows the TCP stack to coalesce small buffers within the + * remaining space in COPYBUFSIZE. + * Otherwise, the TCP stack is provided a reference to the buffer, with no + * extra space. + */ +int sock_tcp_freebsd_send(sock_tcp_freebsd_t *conn, const void* data, size_t len) +{ + int error = 0; + struct lbufent* bufent; + struct sock_tcp_freebsd_send_state* sstate; + + mutex_lock(&conn->lock); + assert(conn->hasactive && !conn->haspassive); + + const char* buffer = data; + + while (len > 0 && error == 0) { + /* + * Look at the remaining space in the send buffer to figure out how much + * we can send. + */ + while (conn->sfields.active.in_send_buffer >= SENDBUFSIZE) { + assert(conn->sfields.active.in_send_buffer == SENDBUFSIZE); + cond_wait(&conn->sfields.active.send_cond, &conn->lock); + } + size_t buflen = SENDBUFSIZE - conn->sfields.active.in_send_buffer; + if (len < buflen) { + buflen = len; + } + + bool copy = (buflen <= SENDMAXCOPY); + if (copy) { + if (extracopybuf == NULL) { + sstate = sock_tcp_freebsd_zalloc(sizeof(*sstate) + COPYBUFSIZE); + if (sstate != NULL) { + sstate->buflen = COPYBUFSIZE; + } + } else { + sstate = extracopybuf; + assert(sstate->buflen == COPYBUFSIZE); + extracopybuf = NULL; + } + } else { + sstate = sock_tcp_freebsd_zalloc(sizeof(*sstate) + buflen); + if (sstate != NULL) { + sstate->buflen = buflen; + } + } + + if (sstate == NULL) { + error = ENOMEM; + goto unlockreturn; + } + sstate->next = NULL; + + bufent = &sstate->entry; + bufent->iov.iov_next = NULL; + bufent->iov.iov_len = buflen; + + bufent->iov.iov_base = (uint8_t*) (sstate + 1); + bufent->extraspace = (copy ? (COPYBUFSIZE - buflen) : 0); + memcpy(bufent->iov.iov_base, buffer, buflen); + + int state; + + error = (int) bsdtcp_send(conn->sfields.active.asock, bufent, &state); + + if (state == 1) { + /* The TCP stack has a reference to this buffer, and we must keep track of it. */ + if (conn->sfields.active.send_tail == NULL) { + conn->sfields.active.send_head = sstate; + } else { + conn->sfields.active.send_tail->next = sstate; + } + conn->sfields.active.send_tail = sstate; + } else { + /* Either the send failed, or this was copied into the last buffer already. */ + if (copy) { + assert(extracopybuf == NULL); + /* Cache this copy, to avoid another dynamic memory allocation */ + extracopybuf = sstate; + } else { + sock_tcp_freebsd_zfree(sstate); + } + } + + if (state != 0) { + /* The send didn't fail, so we need to keep track of queued bytes. */ + conn->sfields.active.in_send_buffer += buflen; + } + + buffer += buflen; + len -= buflen; + } + +unlockreturn: + mutex_unlock(&conn->lock); + return -error; +} diff --git a/sys/net/gnrc/sock/tcp_freebsd/zone/Makefile b/sys/net/gnrc/sock/tcp_freebsd/zone/Makefile new file mode 100644 index 000000000000..53559cba3e46 --- /dev/null +++ b/sys/net/gnrc/sock/tcp_freebsd/zone/Makefile @@ -0,0 +1,3 @@ +MODULE = gnrc_sock_tcp_freebsd_zalloc + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/sock/tcp_freebsd/zone/gnrc_sock_tcp_freebsd_zalloc.h b/sys/net/gnrc/sock/tcp_freebsd/zone/gnrc_sock_tcp_freebsd_zalloc.h new file mode 100644 index 000000000000..b4a31f4db2a0 --- /dev/null +++ b/sys/net/gnrc/sock/tcp_freebsd/zone/gnrc_sock_tcp_freebsd_zalloc.h @@ -0,0 +1,50 @@ +/* + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +#ifndef GNRC_SOCK_TCP_FREEBSD_ZALLOC_H_ +#define GNRC_SOCK_TCP_FREEBSD_ZALLOC_H_ + +#include "memmgr.h" +#include "mutex.h" + +#include "debug.h" + +static bool initialized = false; + +mutex_t sock_tcp_freebsd_zalloc_mutex = MUTEX_INIT; + +static inline void sock_tcp_freebsd_zone_init(void) +{ + if (!initialized) { + initialized = true; + memmgr_init(); + } +} + +static inline void* sock_tcp_freebsd_zalloc(unsigned long numbytes) { + if (numbytes == 0) { + return NULL; + } + mutex_lock(&sock_tcp_freebsd_zalloc_mutex); + sock_tcp_freebsd_zone_init(); + void* p = memmgr_alloc(numbytes); + mutex_unlock(&sock_tcp_freebsd_zalloc_mutex); + DEBUG("Allocating %lu bytes: %p\n", numbytes, p); + return p; +} + +static inline void sock_tcp_freebsd_zfree(void* ptr) { + if (ptr == NULL) { + return; + } + mutex_lock(&sock_tcp_freebsd_zalloc_mutex); + assert(initialized); + memmgr_free(ptr); + mutex_unlock(&sock_tcp_freebsd_zalloc_mutex); + DEBUG("Freeing %p\n", ptr); +} + +#endif diff --git a/sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.c b/sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.c new file mode 100644 index 000000000000..0103840dba46 --- /dev/null +++ b/sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.c @@ -0,0 +1,253 @@ +//---------------------------------------------------------------- +// Statically-allocated memory manager +// +// by Eli Bendersky (eliben@gmail.com) +// +// This code is in the public domain. +//---------------------------------------------------------------- +#include +#include "memmgr.h" + +typedef ulong Align; + +union mem_header_union +{ + struct + { + // Pointer to the next block in the free list + // + union mem_header_union* next; + + // Size of the block (in quantas of sizeof(mem_header_t)) + // + ulong size; + } s; + + // Used to align headers in memory to a boundary + // + Align align_dummy; +}; + +typedef union mem_header_union mem_header_t; + +// Initial empty list +// +static mem_header_t base; + +// Start of free list +// +static mem_header_t* freep = 0; + +// Static pool for new allocations +// +static byte pool[POOL_SIZE] = {0}; +static ulong pool_free_pos = 0; + + +void memmgr_init(void) +{ + base.s.next = 0; + base.s.size = 0; + freep = 0; + pool_free_pos = 0; +} + + +void memmgr_print_stats(void) +{ + #ifdef DEBUG_MEMMGR_SUPPORT_STATS + mem_header_t* p; + + printf("------ Memory manager stats ------\n\n"); + printf( "Pool: free_pos = %lu (%lu bytes left)\n\n", + pool_free_pos, POOL_SIZE - pool_free_pos); + + p = (mem_header_t*) pool; + + while (p < (mem_header_t*) (pool + pool_free_pos)) + { + printf( " * Addr: 0x%8lu; Size: %8lu\n", + p, p->s.size); + + p += p->s.size; + } + + printf("\nFree list:\n\n"); + + if (freep) + { + p = freep; + + while (1) + { + printf( " * Addr: 0x%8lu; Size: %8lu; Next: 0x%8lu\n", + p, p->s.size, p->s.next); + + p = p->s.next; + + if (p == freep) + break; + } + } + else + { + printf("Empty\n"); + } + + printf("\n"); + #endif // DEBUG_MEMMGR_SUPPORT_STATS +} + + +static mem_header_t* get_mem_from_pool(ulong nquantas) +{ + ulong total_req_size; + + mem_header_t* h; + + if (nquantas < MIN_POOL_ALLOC_QUANTAS) + nquantas = MIN_POOL_ALLOC_QUANTAS; + + total_req_size = nquantas * sizeof(mem_header_t); + + if (pool_free_pos + total_req_size <= POOL_SIZE) + { + h = (mem_header_t*) (pool + pool_free_pos); + h->s.size = nquantas; + memmgr_free((void*) (h + 1)); + pool_free_pos += total_req_size; + } + else + { + return 0; + } + + return freep; +} + + +// Allocations are done in 'quantas' of header size. +// The search for a free block of adequate size begins at the point 'freep' +// where the last block was found. +// If a too-big block is found, it is split and the tail is returned (this +// way the header of the original needs only to have its size adjusted). +// The pointer returned to the user points to the free space within the block, +// which begins one quanta after the header. +// +void* memmgr_alloc(ulong nbytes) +{ + mem_header_t* p; + mem_header_t* prevp; + + // Calculate how many quantas are required: we need enough to house all + // the requested bytes, plus the header. The -1 and +1 are there to make sure + // that if nbytes is a multiple of nquantas, we don't allocate too much + // + ulong nquantas = (nbytes + sizeof(mem_header_t) - 1) / sizeof(mem_header_t) + 1; + + // First alloc call, and no free list yet ? Use 'base' for an initial + // denegerate block of size 0, which points to itself + // + if ((prevp = freep) == 0) + { + base.s.next = freep = prevp = &base; + base.s.size = 0; + } + + for (p = prevp->s.next; ; prevp = p, p = p->s.next) + { + // big enough ? + if (p->s.size >= nquantas) + { + // exactly ? + if (p->s.size == nquantas) + { + // just eliminate this block from the free list by pointing + // its prev's next to its next + // + prevp->s.next = p->s.next; + } + else // too big + { + p->s.size -= nquantas; + p += p->s.size; + p->s.size = nquantas; + } + + freep = prevp; + return (void*) (p + 1); + } + // Reached end of free list ? + // Try to allocate the block from the pool. If that succeeds, + // get_mem_from_pool adds the new block to the free list and + // it will be found in the following iterations. If the call + // to get_mem_from_pool doesn't succeed, we've run out of + // memory + // + else if (p == freep) + { + if ((p = get_mem_from_pool(nquantas)) == 0) + { + #ifdef DEBUG_MEMMGR_FATAL + printf("!! Memory allocation failed !!\n"); + #endif + return 0; + } + } + } +} + + +// Scans the free list, starting at freep, looking the the place to insert the +// free block. This is either between two existing blocks or at the end of the +// list. In any case, if the block being freed is adjacent to either neighbor, +// the adjacent blocks are combined. +// +void memmgr_free(void* ap) +{ + mem_header_t* block; + mem_header_t* p; + + // acquire pointer to block header + block = ((mem_header_t*) ap) - 1; + + // Find the correct place to place the block in (the free list is sorted by + // address, increasing order) + // + for (p = freep; !(block > p && block < p->s.next); p = p->s.next) + { + // Since the free list is circular, there is one link where a + // higher-addressed block points to a lower-addressed block. + // This condition checks if the block should be actually + // inserted between them + // + if (p >= p->s.next && (block > p || block < p->s.next)) + break; + } + + // Try to combine with the higher neighbor + // + if (block + block->s.size == p->s.next) + { + block->s.size += p->s.next->s.size; + block->s.next = p->s.next->s.next; + } + else + { + block->s.next = p->s.next; + } + + // Try to combine with the lower neighbor + // + if (p + p->s.size == block) + { + p->s.size += block->s.size; + p->s.next = block->s.next; + } + else + { + p->s.next = block; + } + + freep = p; +} diff --git a/sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.h b/sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.h new file mode 100644 index 000000000000..c178b5e610bd --- /dev/null +++ b/sys/net/gnrc/sock/tcp_freebsd/zone/memmgr.h @@ -0,0 +1,96 @@ +//---------------------------------------------------------------- +// Statically-allocated memory manager +// +// by Eli Bendersky (eliben@gmail.com) +// +// This code is in the public domain. +//---------------------------------------------------------------- +#ifndef MEMMGR_H +#define MEMMGR_H + +// +// Memory manager: dynamically allocates memory from +// a fixed pool that is allocated statically at link-time. +// +// Usage: after calling memmgr_init() in your +// initialization routine, just use memmgr_alloc() instead +// of malloc() and memmgr_free() instead of free(). +// Naturally, you can use the preprocessor to define +// malloc() and free() as aliases to memmgr_alloc() and +// memmgr_free(). This way the manager will be a drop-in +// replacement for the standard C library allocators, and can +// be useful for debugging memory allocation problems and +// leaks. +// +// Preprocessor flags you can define to customize the +// memory manager: +// +// DEBUG_MEMMGR_FATAL +// Allow printing out a message when allocations fail +// +// DEBUG_MEMMGR_SUPPORT_STATS +// Allow printing out of stats in function +// memmgr_print_stats When this is disabled, +// memmgr_print_stats does nothing. +// +// Note that in production code on an embedded system +// you'll probably want to keep those undefined, because +// they cause printf to be called. +// +// POOL_SIZE +// Size of the pool for new allocations. This is +// effectively the heap size of the application, and can +// be changed in accordance with the available memory +// resources. +// +// MIN_POOL_ALLOC_QUANTAS +// Internally, the memory manager allocates memory in +// quantas roughly the size of two ulong objects. To +// minimize pool fragmentation in case of multiple allocations +// and deallocations, it is advisable to not allocate +// blocks that are too small. +// This flag sets the minimal ammount of quantas for +// an allocation. If the size of a ulong is 4 and you +// set this flag to 16, the minimal size of an allocation +// will be 4 * 2 * 16 = 128 bytes +// If you have a lot of small allocations, keep this value +// low to conserve memory. If you have mostly large +// allocations, it is best to make it higher, to avoid +// fragmentation. +// +// Notes: +// 1. This memory manager is *not thread safe*. Use it only +// for single thread/task applications. +// + +//#define DEBUG_MEMMGR_SUPPORT_STATS 1 + +#define POOL_SIZE (7 * 1024 + 256) +#define MIN_POOL_ALLOC_QUANTAS 4 + + +typedef unsigned char byte; +typedef unsigned long ulong; + + + +// Initialize the memory manager. This function should be called +// only once in the beginning of the program. +// +void memmgr_init(void); + +// 'malloc' clone +// +void* memmgr_alloc(ulong nbytes); + +// 'free' clone +// +void memmgr_free(void* ap); + +// Prints statistics about the current state of the memory +// manager +// +void memmgr_print_stats(void); + + +#endif // MEMMGR_H diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/Makefile b/sys/net/gnrc/transport_layer/tcp_freebsd/Makefile new file mode 100644 index 000000000000..404045ddb1de --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/Makefile @@ -0,0 +1,7 @@ +MODULE = gnrc_tcp_freebsd + +DIRS += blip +DIRS += bsdtcp +DIRS += lib + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/blip/Makefile b/sys/net/gnrc/transport_layer/tcp_freebsd/blip/Makefile new file mode 100644 index 000000000000..b8881ebf1c45 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/blip/Makefile @@ -0,0 +1,3 @@ +MODULE = gnrc_tcp_freebsd_blip + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.c b/sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.c new file mode 100644 index 000000000000..18cf5532dbda --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.c @@ -0,0 +1,80 @@ +#include +#include +#include + +#include "iovec.h" + +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +/** + * read len bytes starting at offset into the buffer pointed to by buf + * + * + */ +int iov_read(struct ip_iovec *iov, int offset, int len, uint8_t *buf) { + int cur_offset = 0, written = 0; + // printf("iov_read iov: %p offset: %i len: %i buf: %p\n", iov, offset, len, buf); + + while (iov != NULL && cur_offset + iov->iov_len <= offset) { + cur_offset += iov->iov_len; + iov = iov->iov_next; + } + if (!iov) goto done; + + while (len > 0) { + int start, len_here; + start = offset - cur_offset; + len_here = MIN(iov->iov_len - start, len); + + // copy + memcpy(buf, iov->iov_base + start, len_here); + // printf("iov_read: %i/%i\n", len_here, len); + + cur_offset += start + len_here; + offset += len_here; + written += len_here; + len -= len_here; + buf += len_here; + iov = iov->iov_next; + + if (!iov) { + goto done; + } + } + done: + return written; +} + +int iov_len(struct ip_iovec *iov) { + int rv = 0; + while (iov) { + rv += iov->iov_len; + iov = iov->iov_next; + } + return rv; +} + +void iov_prefix(struct ip_iovec *iov, struct ip_iovec *new, uint8_t *buf, size_t len) { + new->iov_base = buf; + new->iov_len = len; + new->iov_next = iov; +} + +int iov_update(struct ip_iovec *iov, int offset, int len, uint8_t *buf) { + int written = 0; + + /* advance to the first block where we could write */ + while (offset >= iov->iov_len) { + offset -= iov->iov_len; + iov = iov->iov_next; + } + + while (iov != NULL && written < len) { + int writelen = MIN(iov->iov_len - offset, len); + memcpy(iov->iov_base + offset, buf, writelen); + buf += writelen; + len -= writelen; + offset = 0; + iov = iov->iov_next; + } + return written; +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.h b/sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.h new file mode 100644 index 000000000000..d3fd92e65abe --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/blip/iovec.h @@ -0,0 +1,19 @@ +#ifndef IOVEC_H_ +#define IOVEC_H_ + +#include +#include + +struct ip_iovec { + uint8_t *iov_base; + size_t iov_len; + struct ip_iovec *iov_next; +}; + +int iov_read(struct ip_iovec *iov, int offset, int len, uint8_t *buf); +int iov_len(struct ip_iovec *iov); +void iov_prefix(struct ip_iovec *iov, struct ip_iovec *new_iov, uint8_t *buf, size_t len); +int iov_update(struct ip_iovec *iov, int offset, int len, uint8_t *buf); +void iov_print(struct ip_iovec *iov); + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/Makefile b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/Makefile new file mode 100644 index 000000000000..a1586abc3899 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/Makefile @@ -0,0 +1,5 @@ +MODULE = gnrc_tcp_freebsd_internal + +DIRS += cc + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/_types.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/_types.h new file mode 100644 index 000000000000..e5c5399e59da --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/_types.h @@ -0,0 +1,120 @@ +/*- + * Copyright (c) 2002 Mike Barcroft + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS__TYPES_H_ +#define _SYS__TYPES_H_ + +//#include +//#include +#include "types.h" + +#if 0 +/* + * Standard type definitions. + */ + +typedef __int32_t __blksize_t; /* file block size */ +typedef __int64_t __blkcnt_t; /* file block count */ +typedef __int32_t __clockid_t; /* clock_gettime()... */ +typedef __uint32_t __fflags_t; /* file flags */ +typedef __uint64_t __fsblkcnt_t; +typedef __uint64_t __fsfilcnt_t; +typedef __uint32_t __gid_t; +typedef __int64_t __id_t; /* can hold a gid_t, pid_t, or uid_t */ +typedef __uint32_t __ino_t; /* inode number */ +typedef long __key_t; /* IPC key (for Sys V IPC) */ +typedef __int32_t __lwpid_t; /* Thread ID (a.k.a. LWP) */ +typedef __uint16_t __mode_t; /* permissions */ +typedef int __accmode_t; /* access permissions */ +typedef int __nl_item; +typedef __uint16_t __nlink_t; /* link count */ +typedef __int64_t __off_t; /* file offset */ +typedef __int32_t __pid_t; /* process [group] */ +typedef __int64_t __rlim_t; /* resource limit - intentionally */ + /* signed, because of legacy code */ + /* that uses -1 for RLIM_INFINITY */ +typedef __uint8_t __sa_family_t; +typedef __uint32_t __socklen_t; +typedef long __suseconds_t; /* microseconds (signed) */ +typedef struct __timer *__timer_t; /* timer_gettime()... */ +typedef struct __mq *__mqd_t; /* mq_open()... */ +typedef __uint32_t __uid_t; +typedef unsigned int __useconds_t; /* microseconds (unsigned) */ +typedef int __cpuwhich_t; /* which parameter for cpuset. */ +typedef int __cpulevel_t; /* level parameter for cpuset. */ +typedef int __cpusetid_t; /* cpuset identifier. */ + +/* + * Unusual type definitions. + */ +/* + * rune_t is declared to be an ``int'' instead of the more natural + * ``unsigned long'' or ``long''. Two things are happening here. It is not + * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, + * it looks like 10646 will be a 31 bit standard. This means that if your + * ints cannot hold 32 bits, you will be in trouble. The reason an int was + * chosen over a long is that the is*() and to*() routines take ints (says + * ANSI C), but they use __ct_rune_t instead of int. + * + * NOTE: rune_t is not covered by ANSI nor other standards, and should not + * be instantiated outside of lib/libc/locale. Use wchar_t. wint_t and + * rune_t must be the same type. Also, wint_t should be able to hold all + * members of the largest character set plus one extra value (WEOF), and + * must be at least 16 bits. + */ +typedef int __ct_rune_t; /* arg type for ctype funcs */ +typedef __ct_rune_t __rune_t; /* rune_t (see above) */ +typedef __ct_rune_t __wint_t; /* wint_t (see above) */ + +/* Clang already provides these types as built-ins, but only in C++ mode. */ +#if !defined(__clang__) || !defined(__cplusplus) +typedef __uint_least16_t __char16_t; +typedef __uint_least32_t __char32_t; +#endif +/* In C++11, char16_t and char32_t are built-in types. */ +#if defined(__cplusplus) && __cplusplus >= 201103L +#define _CHAR16_T_DECLARED +#define _CHAR32_T_DECLARED +#endif + +typedef __uint32_t __dev_t; /* device number */ + +typedef __uint32_t __fixpt_t; /* fixed point number */ + +/* + * mbstate_t is an opaque object to keep conversion state during multibyte + * stream conversions. + */ +typedef union { + char __mbstate8[128]; + __int64_t _mbstateL; /* for alignment */ +} __mbstate_t; + +#endif + +#endif /* !_SYS__TYPES_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc.h new file mode 100644 index 000000000000..a8fddf32d414 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc.h @@ -0,0 +1,188 @@ +/*- + * Copyright (c) 2007-2008 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart and + * James Healy, made possible in part by a grant from the Cisco University + * Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This software was first released in 2007 by James Healy and Lawrence Stewart + * whilst working on the NewTCP research project at Swinburne University of + * Technology's Centre for Advanced Internet Architectures, Melbourne, + * Australia, which was made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#ifndef _NETINET_CC_H_ +#define _NETINET_CC_H_ + +/* XXX: TCP_CA_NAME_MAX define lives in tcp.h for compat reasons. */ +//#include +#include "tcp.h" + +#if 0 +/* Global CC vars. */ +extern STAILQ_HEAD(cc_head, cc_algo) cc_list; +extern const int tcprexmtthresh; +#endif +extern const struct cc_algo newreno_cc_algo; + +#if 0 +/* Per-netstack bits. */ +VNET_DECLARE(struct cc_algo *, default_cc_ptr); +#define V_default_cc_ptr VNET(default_cc_ptr) + +/* Define the new net.inet.tcp.cc sysctl tree. */ +SYSCTL_DECL(_net_inet_tcp_cc); +#endif + +#if 0 +// Defined in cc/cc_newreno.c +extern struct cc_algo* V_default_cc_ptr; +#endif + +#if 0 +/* CC housekeeping functions. */ +int cc_register_algo(struct cc_algo *add_cc); +int cc_deregister_algo(struct cc_algo *remove_cc); +#endif +/* + * Wrapper around transport structs that contain same-named congestion + * control variables. Allows algos to be shared amongst multiple CC aware + * transprots. + */ +struct cc_var { + void *cc_data; /* Per-connection private CC algorithm data. */ + int bytes_this_ack; /* # bytes acked by the current ACK. */ + tcp_seq curack; /* Most recent ACK. */ + uint32_t flags; /* Flags for cc_var (see below) */ +// int type; /* Indicates which ptr is valid in ccvc. */ + union ccv_container { + struct tcpcb *tcp; + struct sctp_nets *sctp; + } ccvc; +}; + +/* cc_var flags. */ +#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */ +#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */ +#define CCF_DELACK 0x0004 /* Is this ack delayed? */ +#define CCF_ACKNOW 0x0008 /* Will this ack be sent now? */ +#define CCF_IPHDR_CE 0x0010 /* Does this packet set CE bit? */ +#define CCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */ + +/* ACK types passed to the ack_received() hook. */ +#define CC_ACK 0x0001 /* Regular in sequence ACK. */ +#define CC_DUPACK 0x0002 /* Duplicate ACK. */ +#define CC_PARTIALACK 0x0004 /* Not yet. */ +#define CC_SACK 0x0008 /* Not yet. */ + +/* + * Congestion signal types passed to the cong_signal() hook. The highest order 8 + * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own + * congestion signal types. + */ +#define CC_ECN 0x00000001 /* ECN marked packet received. */ +#define CC_RTO 0x00000002 /* RTO fired. */ +#define CC_RTO_ERR 0x00000004 /* RTO fired in error. */ +#define CC_NDUPACK 0x00000008 /* Threshold of dupack's reached. */ + +#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */ + +/* + * Structure to hold data and function pointers that together represent a + * congestion control algorithm. + */ +struct cc_algo { + char name[TCP_CA_NAME_MAX]; + + /* Init global module state on kldload. */ + int (*mod_init)(void); + + /* Cleanup global module state on kldunload. */ + int (*mod_destroy)(void); + + /* Init CC state for a new control block. */ + int (*cb_init)(struct cc_var *ccv); + + /* Cleanup CC state for a terminating control block. */ + void (*cb_destroy)(struct cc_var *ccv); + + /* Init variables for a newly established connection. */ + void (*conn_init)(struct cc_var *ccv); + + /* Called on receipt of an ack. */ + void (*ack_received)(struct cc_var *ccv, uint16_t type); + + /* Called on detection of a congestion signal. */ + void (*cong_signal)(struct cc_var *ccv, uint32_t type); + + /* Called after exiting congestion recovery. */ + void (*post_recovery)(struct cc_var *ccv); + + /* Called when data transfer resumes after an idle period. */ + void (*after_idle)(struct cc_var *ccv); + + /* Called for an additional ECN processing apart from RFC3168. */ + void (*ecnpkt_handler)(struct cc_var *ccv); + +// STAILQ_ENTRY (cc_algo) entries; +}; + +/* Macro to obtain the CC algo's struct ptr. */ +//#define CC_ALGO(tp) ((tp)->cc_algo) +#define CC_ALGO(tp) (&newreno_cc_algo) // This allows the #defines in cc_newreno.c to work as intended + +/* Macro to obtain the CC algo's data ptr. */ +#define CC_DATA(tp) ((tp)->ccv->cc_data) + +/* Macro to obtain the system default CC algo's struct ptr. */ +//#define CC_DEFAULT() V_default_cc_ptr + +#if 0 +extern struct rwlock cc_list_lock; +#define CC_LIST_LOCK_INIT() rw_init(&cc_list_lock, "cc_list") +#define CC_LIST_LOCK_DESTROY() rw_destroy(&cc_list_lock) +#define CC_LIST_RLOCK() rw_rlock(&cc_list_lock) +#define CC_LIST_RUNLOCK() rw_runlock(&cc_list_lock) +#define CC_LIST_WLOCK() rw_wlock(&cc_list_lock) +#define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock) +#define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED) +#endif + +#endif /* _NETINET_CC_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/Makefile b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/Makefile new file mode 100644 index 000000000000..df785177ed75 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/Makefile @@ -0,0 +1,3 @@ +MODULE = gnrc_tcp_freebsd_internal_cc + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_module.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_module.h new file mode 100644 index 000000000000..dffcf41e9b18 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_module.h @@ -0,0 +1,72 @@ +/*- + * Copyright (c) 2009-2010 Lawrence Stewart + * All rights reserved. + * + * This software was developed by Lawrence Stewart while studying at the Centre + * for Advanced Internet Architectures, Swinburne University of Technology, made + * possible in part by a grant from the Cisco University Research Program Fund + * at Community Foundation Silicon Valley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This software was first released in 2009 by Lawrence Stewart as part of the + * NewTCP research project at Swinburne University of Technology's Centre for + * Advanced Internet Architectures, Melbourne, Australia, which was made + * possible in part by a grant from the Cisco University Research Program Fund + * at Community Foundation Silicon Valley. More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#ifndef _NETINET_CC_MODULE_H_ +#define _NETINET_CC_MODULE_H_ + +/* + * Allows a CC algorithm to manipulate a commonly named CC variable regardless + * of the transport protocol and associated C struct. + * XXXLAS: Out of action until the work to support SCTP is done. + * +#define CCV(ccv, what) \ +(*( \ + (ccv)->type == IPPROTO_TCP ? &(ccv)->ccvc.tcp->what : \ + &(ccv)->ccvc.sctp->what \ +)) + */ +#define CCV(ccv, what) (ccv)->ccvc.tcp->what + +#if 0 +#define DECLARE_CC_MODULE(ccname, ccalgo) \ + static moduledata_t cc_##ccname = { \ + .name = #ccname, \ + .evhand = cc_modevent, \ + .priv = ccalgo \ + }; \ + DECLARE_MODULE(ccname, cc_##ccname, \ + SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY) + +int cc_modevent(module_t mod, int type, void *data); +#endif + +#endif /* _NETINET_CC_MODULE_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_newreno.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_newreno.c new file mode 100644 index 000000000000..8f0461c3c231 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/cc/cc_newreno.c @@ -0,0 +1,259 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. + * Copyright (c) 2007-2008,2010 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart, James + * Healy and David Hayes, made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This software was first released in 2007 by James Healy and Lawrence Stewart + * whilst working on the NewTCP research project at Swinburne University of + * Technology's Centre for Advanced Internet Architectures, Melbourne, + * Australia, which was made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * More details are available at: + * http://caia.swin.edu.au/urp/newtcp/ + */ + +#if 0 +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#endif + +#include "../cc.h" +#include "../tcp.h" +#include "../tcp_seq.h" +#include "../tcp_var.h" +#include "cc_module.h" + +#include "../tcp_const.h" + +static int min(int a, int b) { return (a < b) ? a : b; } + +static void newreno_ack_received(struct cc_var *ccv, uint16_t type); +static void newreno_after_idle(struct cc_var *ccv); +static void newreno_cong_signal(struct cc_var *ccv, uint32_t type); +static void newreno_post_recovery(struct cc_var *ccv); + +const struct cc_algo newreno_cc_algo = { + .name = "newreno", + .ack_received = newreno_ack_received, + .after_idle = newreno_after_idle, + .cong_signal = newreno_cong_signal, + .post_recovery = newreno_post_recovery, +}; + +#if 0 +// Normally, this is done in cc.c, but this is the only line from that file that I need. +struct cc_algo* V_default_cc_ptr = &newreno_cc_algo; +#endif + +// Constant that is referenced (may want to change this later) +enum { + V_tcp_do_rfc3465 = 1 +}; + +static void +newreno_ack_received(struct cc_var *ccv, uint16_t type) +{ + if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + (ccv->flags & CCF_CWND_LIMITED)) { + u_int cw = CCV(ccv, snd_cwnd); + u_int incr = CCV(ccv, t_maxseg); + + /* + * Regular in-order ACK, open the congestion window. + * Method depends on which congestion control state we're + * in (slow start or cong avoid) and if ABC (RFC 3465) is + * enabled. + * + * slow start: cwnd <= ssthresh + * cong avoid: cwnd > ssthresh + * + * slow start and ABC (RFC 3465): + * Grow cwnd exponentially by the amount of data + * ACKed capping the max increment per ACK to + * (abc_l_var * maxseg) bytes. + * + * slow start without ABC (RFC 5681): + * Grow cwnd exponentially by maxseg per ACK. + * + * cong avoid and ABC (RFC 3465): + * Grow cwnd linearly by maxseg per RTT for each + * cwnd worth of ACKed data. + * + * cong avoid without ABC (RFC 5681): + * Grow cwnd linearly by approximately maxseg per RTT using + * maxseg^2 / cwnd per ACK as the increment. + * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to + * avoid capping cwnd. + */ + if (cw > CCV(ccv, snd_ssthresh)) { + if (V_tcp_do_rfc3465) { + if (ccv->flags & CCF_ABC_SENTAWND) + ccv->flags &= ~CCF_ABC_SENTAWND; + else + incr = 0; + } else + incr = max((incr * incr / cw), 1); + } else if (V_tcp_do_rfc3465) { + /* + * In slow-start with ABC enabled and no RTO in sight? + * (Must not use abc_l_var > 1 if slow starting after + * an RTO. On RTO, snd_nxt = snd_una, so the + * snd_nxt == snd_max check is sufficient to + * handle this). + * + * XXXLAS: Find a way to signal SS after RTO that + * doesn't rely on tcpcb vars. + */ + if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) + incr = min(ccv->bytes_this_ack, + V_tcp_abc_l_var * CCV(ccv, t_maxseg)); + else + incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg)); + } + /* ABC is on by default, so incr equals 0 frequently. */ + if (incr > 0) + CCV(ccv, snd_cwnd) = min(cw + incr, + TCP_MAXWIN << CCV(ccv, snd_scale)); + } +} + +static void +newreno_after_idle(struct cc_var *ccv) +{ + int rw; + + /* + * If we've been idle for more than one retransmit timeout the old + * congestion window is no longer current and we have to reduce it to + * the restart window before we can transmit again. + * + * The restart window is the initial window or the last CWND, whichever + * is smaller. + * + * This is done to prevent us from flooding the path with a full CWND at + * wirespeed, overloading router and switch buffers along the way. + * + * See RFC5681 Section 4.1. "Restarting Idle Connections". + */ + if (V_tcp_do_rfc3390) + rw = min(4 * CCV(ccv, t_maxseg), + max(2 * CCV(ccv, t_maxseg), 4380)); + else + rw = CCV(ccv, t_maxseg) * 2; + + CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd)); +} + +/* + * Perform any necessary tasks before we enter congestion recovery. + */ +static void +newreno_cong_signal(struct cc_var *ccv, uint32_t type) +{ + u_int win; + + /* Catch algos which mistakenly leak private signal types. */ + KASSERT((type & CC_SIGPRIVMASK) == 0, + ("%s: congestion signal type 0x%08x is private\n", __func__, (unsigned int) type)); + + win = max(CCV(ccv, snd_cwnd) / 2 / CCV(ccv, t_maxseg), 2) * + CCV(ccv, t_maxseg); + + switch (type) { + case CC_NDUPACK: + if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { + if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) + CCV(ccv, snd_ssthresh) = win; + ENTER_RECOVERY(CCV(ccv, t_flags)); + } + break; + case CC_ECN: + if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { + CCV(ccv, snd_ssthresh) = win; + CCV(ccv, snd_cwnd) = win; + ENTER_CONGRECOVERY(CCV(ccv, t_flags)); + } + break; + } +} + +/* + * Perform any necessary tasks before we exit congestion recovery. + */ +static void +newreno_post_recovery(struct cc_var *ccv) +{ + if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { + /* + * Fast recovery will conclude after returning from this + * function. Window inflation should have left us with + * approximately snd_ssthresh outstanding data. But in case we + * would be inclined to send a burst, better to do it via the + * slow start mechanism. + * + * XXXLAS: Find a way to do this without needing curack + */ + if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh), + CCV(ccv, snd_max))) + CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - + ccv->curack + CCV(ccv, t_maxseg); + else + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); + } +} + + +//DECLARE_CC_MODULE(newreno, &newreno_cc_algo); diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/icmp_var.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/icmp_var.h new file mode 100644 index 000000000000..b16c32a4e29f --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/icmp_var.h @@ -0,0 +1,102 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)icmp_var.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET_ICMP_VAR_H_ +#define _NETINET_ICMP_VAR_H_ + +#if 0 +/* + * Variables related to this implementation + * of the internet control message protocol. + */ +struct icmpstat { +/* statistics related to icmp packets generated */ + u_long icps_error; /* # of calls to icmp_error */ + u_long icps_oldshort; /* no error 'cuz old ip too short */ + u_long icps_oldicmp; /* no error 'cuz old was icmp */ + u_long icps_outhist[ICMP_MAXTYPE + 1]; +/* statistics related to input messages processed */ + u_long icps_badcode; /* icmp_code out of range */ + u_long icps_tooshort; /* packet < ICMP_MINLEN */ + u_long icps_checksum; /* bad checksum */ + u_long icps_badlen; /* calculated bound mismatch */ + u_long icps_reflect; /* number of responses */ + u_long icps_inhist[ICMP_MAXTYPE + 1]; + u_long icps_bmcastecho; /* b/mcast echo requests dropped */ + u_long icps_bmcasttstamp; /* b/mcast tstamp requests dropped */ + u_long icps_badaddr; /* bad return address */ + u_long icps_noroute; /* no route back */ +}; + +#ifdef _KERNEL +#include + +VNET_PCPUSTAT_DECLARE(struct icmpstat, icmpstat); +/* + * In-kernel consumers can use these accessor macros directly to update + * stats. + */ +#define ICMPSTAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct icmpstat, icmpstat, name, (val)) +#define ICMPSTAT_INC(name) ICMPSTAT_ADD(name, 1) + +/* + * Kernel module consumers must use this accessor macro. + */ +void kmod_icmpstat_inc(int statnum); +#define KMOD_ICMPSTAT_INC(name) \ + kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(uint64_t)) +#endif +#endif +/* + * Identifiers for ICMP sysctl nodes + */ +#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */ +#define ICMPCTL_STATS 2 /* statistics (read-only) */ +#define ICMPCTL_ICMPLIM 3 + +//#ifdef _KERNEL +//SYSCTL_DECL(_net_inet_icmp); + +//extern int badport_bandlim(int); +#define BANDLIM_UNLIMITED -1 +#define BANDLIM_ICMP_UNREACH 0 +#define BANDLIM_ICMP_ECHO 1 +#define BANDLIM_ICMP_TSTAMP 2 +#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ +#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ +#define BANDLIM_ICMP6_UNREACH 5 +#define BANDLIM_SCTP_OOTB 6 +#define BANDLIM_MAX 6 +//#endif + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip.h new file mode 100644 index 000000000000..63ea35c760d5 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip.h @@ -0,0 +1,232 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.2 (Berkeley) 6/1/94 + * $FreeBSD$ + */ + +#ifndef _NETINET_IP_H_ +#define _NETINET_IP_H_ + + +#if 0 +#include + +/* + * Definitions for internet protocol version 4. + * + * Per RFC 791, September 1981. + */ +#define IPVERSION 4 + +/* + * Structure of an internet header, naked of options. + */ +struct ip { +//#if BYTE_ORDER == LITTLE_ENDIAN + u_char ip_hl:4, /* header length */ + ip_v:4; /* version */ +//#endif +#if 0 +#if BYTE_ORDER == BIG_ENDIAN + u_char ip_v:4, /* version */ + ip_hl:4; /* header length */ +#endif +#endif + u_char ip_tos; /* type of service */ + u_short ip_len; /* total length */ + u_short ip_id; /* identification */ + u_short ip_off; /* fragment offset field */ +#define IP_RF 0x8000 /* reserved fragment flag */ +#define IP_DF 0x4000 /* dont fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + u_char ip_ttl; /* time to live */ + u_char ip_p; /* protocol */ + u_short ip_sum; /* checksum */ + struct in_addr ip_src,ip_dst; /* source and dest address */ +} __packed __aligned(2); +#endif +#define IP_MAXPACKET 65535 /* maximum packet size */ + +/* + * Definitions for IP type of service (ip_tos). + */ +#define IPTOS_LOWDELAY 0x10 +#define IPTOS_THROUGHPUT 0x08 +#define IPTOS_RELIABILITY 0x04 +#define IPTOS_MINCOST 0x02 + +/* + * Definitions for IP precedence (also in ip_tos) (deprecated). + */ +#define IPTOS_PREC_NETCONTROL IPTOS_DSCP_CS7 +#define IPTOS_PREC_INTERNETCONTROL IPTOS_DSCP_CS6 +#define IPTOS_PREC_CRITIC_ECP IPTOS_DSCP_CS5 +#define IPTOS_PREC_FLASHOVERRIDE IPTOS_DSCP_CS4 +#define IPTOS_PREC_FLASH IPTOS_DSCP_CS3 +#define IPTOS_PREC_IMMEDIATE IPTOS_DSCP_CS2 +#define IPTOS_PREC_PRIORITY IPTOS_DSCP_CS1 +#define IPTOS_PREC_ROUTINE IPTOS_DSCP_CS0 + +/* + * Definitions for DiffServ Codepoints as per RFC2474 and RFC5865. + */ +#define IPTOS_DSCP_CS0 0x00 +#define IPTOS_DSCP_CS1 0x20 +#define IPTOS_DSCP_AF11 0x28 +#define IPTOS_DSCP_AF12 0x30 +#define IPTOS_DSCP_AF13 0x38 +#define IPTOS_DSCP_CS2 0x40 +#define IPTOS_DSCP_AF21 0x48 +#define IPTOS_DSCP_AF22 0x50 +#define IPTOS_DSCP_AF23 0x58 +#define IPTOS_DSCP_CS3 0x60 +#define IPTOS_DSCP_AF31 0x68 +#define IPTOS_DSCP_AF32 0x70 +#define IPTOS_DSCP_AF33 0x78 +#define IPTOS_DSCP_CS4 0x80 +#define IPTOS_DSCP_AF41 0x88 +#define IPTOS_DSCP_AF42 0x90 +#define IPTOS_DSCP_AF43 0x98 +#define IPTOS_DSCP_CS5 0xa0 +#define IPTOS_DSCP_VA 0xb0 +#define IPTOS_DSCP_EF 0xb8 +#define IPTOS_DSCP_CS6 0xc0 +#define IPTOS_DSCP_CS7 0xe0 + +/* + * ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the + * lower 2 bits of the TOS field. + */ +#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */ +#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */ +#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */ +#define IPTOS_ECN_CE 0x03 /* congestion experienced */ +#define IPTOS_ECN_MASK 0x03 /* ECN field mask */ + +/* + * Definitions for options. + */ +#define IPOPT_COPIED(o) ((o)&0x80) +#define IPOPT_CLASS(o) ((o)&0x60) +#define IPOPT_NUMBER(o) ((o)&0x1f) + +#define IPOPT_CONTROL 0x00 +#define IPOPT_RESERVED1 0x20 +#define IPOPT_DEBMEAS 0x40 +#define IPOPT_RESERVED2 0x60 + +#define IPOPT_EOL 0 /* end of option list */ +#define IPOPT_NOP 1 /* no operation */ + +#define IPOPT_RR 7 /* record packet route */ +#define IPOPT_TS 68 /* timestamp */ +#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ +#define IPOPT_LSRR 131 /* loose source route */ +#define IPOPT_ESO 133 /* extended security */ +#define IPOPT_CIPSO 134 /* commerical security */ +#define IPOPT_SATID 136 /* satnet id */ +#define IPOPT_SSRR 137 /* strict source route */ +#define IPOPT_RA 148 /* router alert */ + +/* + * Offsets to fields in options other than EOL and NOP. + */ +#define IPOPT_OPTVAL 0 /* option ID */ +#define IPOPT_OLEN 1 /* option length */ +#define IPOPT_OFFSET 2 /* offset within option */ +#define IPOPT_MINOFF 4 /* min value of above */ + +#if 0 +/* + * Time stamp option structure. + */ +struct ip_timestamp { + u_char ipt_code; /* IPOPT_TS */ + u_char ipt_len; /* size of structure (variable) */ + u_char ipt_ptr; /* index of current entry */ +#if BYTE_ORDER == LITTLE_ENDIAN + u_char ipt_flg:4, /* flags, see below */ + ipt_oflw:4; /* overflow counter */ +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_char ipt_oflw:4, /* overflow counter */ + ipt_flg:4; /* flags, see below */ +#endif + union ipt_timestamp { + uint32_t ipt_time[1]; /* network format */ + struct ipt_ta { + struct in_addr ipt_addr; + uint32_t ipt_time; /* network format */ + } ipt_ta[1]; + } ipt_timestamp; +}; + +#endif + +/* Flag bits for ipt_flg. */ +#define IPOPT_TS_TSONLY 0 /* timestamps only */ +#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ +#define IPOPT_TS_PRESPEC 3 /* specified modules only */ + +/* Bits for security (not byte swapped). */ +#define IPOPT_SECUR_UNCLASS 0x0000 +#define IPOPT_SECUR_CONFID 0xf135 +#define IPOPT_SECUR_EFTO 0x789a +#define IPOPT_SECUR_MMMM 0xbc4d +#define IPOPT_SECUR_RESTR 0xaf13 +#define IPOPT_SECUR_SECRET 0xd788 +#define IPOPT_SECUR_TOPSECRET 0x6bc5 + +/* + * Internet implementation parameters. + */ +#define MAXTTL 255 /* maximum time to live (seconds) */ +#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ +#define IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define IPTTLDEC 1 /* subtracted when forwarding */ +#define IP_MSS 576 /* default maximum segment size */ + +#if 0 +/* + * This is the real IPv4 pseudo header, used for computing the TCP and UDP + * checksums. For the Internet checksum, struct ipovly can be used instead. + * For stronger checksums, the real thing must be used. + */ +struct ippseudo { + struct in_addr ippseudo_src; /* source internet address */ + struct in_addr ippseudo_dst; /* destination internet address */ + u_char ippseudo_pad; /* pad, must be zero */ + u_char ippseudo_p; /* protocol */ + u_short ippseudo_len; /* protocol length */ +}; +#endif + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip6.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip6.h new file mode 100644 index 000000000000..6feb80a27411 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/ip6.h @@ -0,0 +1,401 @@ +/* $FreeBSD$ */ +/* $KAME: ip6.h,v 1.18 2001/03/29 05:34:30 itojun Exp $ */ + +/*- + * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.1 (Berkeley) 6/10/93 + */ + +#ifndef _NETINET_IP6_H_ +#define _NETINET_IP6_H_ + +#include "types.h" +#include "netinet/in.h" + +/* + * Definition for internet protocol version 6. + * RFC 2460 + */ +struct ip6_hdr { + union { + struct ip6_hdrctl { + u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */ + u_int16_t ip6_un1_plen; /* payload length */ + u_int8_t ip6_un1_nxt; /* next header */ + u_int8_t ip6_un1_hlim; /* hop limit */ + } ip6_un1; + u_int8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ + } ip6_ctlun; + struct in6_addr ip6_src; /* source address */ + struct in6_addr ip6_dst; /* destination address */ +} __packed; + +#define ip6_vfc ip6_ctlun.ip6_un2_vfc +#define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow +#define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt +#define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim +#define ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim + +#define IPV6_VERSION 0x60 +#define IPV6_VERSION_MASK 0xf0 + +#define BYTE_ORDER LITTLE_ENDIAN // This is true on Storm + +#if BYTE_ORDER == BIG_ENDIAN +#define IPV6_FLOWINFO_MASK 0x0fffffff /* flow info (28 bits) */ +#define IPV6_FLOWLABEL_MASK 0x000fffff /* flow label (20 bits) */ +#else +#if BYTE_ORDER == LITTLE_ENDIAN +#define IPV6_FLOWINFO_MASK 0xffffff0f /* flow info (28 bits) */ +#define IPV6_FLOWLABEL_MASK 0xffff0f00 /* flow label (20 bits) */ +#endif /* LITTLE_ENDIAN */ +#endif +#if 1 +/* ECN bits proposed by Sally Floyd */ +#define IP6TOS_CE 0x01 /* congestion experienced */ +#define IP6TOS_ECT 0x02 /* ECN-capable transport */ +#endif + +// Copied from in6.h +#define IN6_ARE_ADDR_EQUAL(a, b) \ + (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) + +// Use the RIOT-defined macros where possible +#if 0 +/* Multicast */ +#define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) + +/* + * Unspecified + */ +#define IN6_IS_ADDR_UNSPECIFIED(a) \ + ((a)->__u6_addr.__u6_addr32[0] == 0 && \ + (a)->__u6_addr.__u6_addr32[1] == 0 && \ + (a)->__u6_addr.__u6_addr32[2] == 0 && \ + (a)->__u6_addr.__u6_addr32[3] == 0) + +/* + * Loopback + */ +#define IN6_IS_ADDR_LOOPBACK(a) \ + ((a)->__u6_addr.__u6_addr32[0] == 0 && \ + (a)->__u6_addr.__u6_addr32[1] == 0 && \ + (a)->__u6_addr.__u6_addr32[2] == 0 && \ + (a)->__u6_addr.__u6_addr32[3] == ntohl(1)) + +/* + * Unicast Scope + * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). + */ +#define IN6_IS_ADDR_LINKLOCAL(a) \ + (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) +#define IN6_IS_ADDR_SITELOCAL(a) \ + (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) + +/* + * Mapped + */ + +#define IN6_IS_ADDR_V4MAPPED(a) \ + ((a)->__u6_addr.__u6_addr32[0] == 0 && \ + (a)->__u6_addr.__u6_addr32[1] == 0 && \ + (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff)) +#endif + + +/* For compatibility between BSD's in6_addr struct and TinyOS's in6_addr struct. */ +#define __u6_addr in6_u +#define __u6_addr32 u6_addr32 + +/* + * Extension Headers + */ +#if 0 // Already defined in TinyOS library +struct ip6_ext { + u_int8_t ip6e_nxt; + u_int8_t ip6e_len; +} __packed; + +/* Hop-by-Hop options header */ +/* XXX should we pad it to force alignment on an 8-byte boundary? */ +struct ip6_hbh { + u_int8_t ip6h_nxt; /* next header */ + u_int8_t ip6h_len; /* length in units of 8 octets */ + /* followed by options */ +} __packed; + +/* Destination options header */ +/* XXX should we pad it to force alignment on an 8-byte boundary? */ +struct ip6_dest { + u_int8_t ip6d_nxt; /* next header */ + u_int8_t ip6d_len; /* length in units of 8 octets */ + /* followed by options */ +} __packed; +#endif +/* Option types and related macros */ +#define IP6OPT_PAD1 0x00 /* 00 0 00000 */ +#define IP6OPT_PADN 0x01 /* 00 0 00001 */ +#define IP6OPT_JUMBO 0xC2 /* 11 0 00010 = 194 */ +#define IP6OPT_NSAP_ADDR 0xC3 /* 11 0 00011 */ +#define IP6OPT_TUNNEL_LIMIT 0x04 /* 00 0 00100 */ +#ifndef _KERNEL +#define IP6OPT_RTALERT 0x05 /* 00 0 00101 (KAME definition) */ +#endif +#define IP6OPT_ROUTER_ALERT 0x05 /* 00 0 00101 (RFC3542, recommended) */ + +#define IP6OPT_RTALERT_LEN 4 +#define IP6OPT_RTALERT_MLD 0 /* Datagram contains an MLD message */ +#define IP6OPT_RTALERT_RSVP 1 /* Datagram contains an RSVP message */ +#define IP6OPT_RTALERT_ACTNET 2 /* contains an Active Networks msg */ +#define IP6OPT_MINLEN 2 + +#define IP6OPT_EID 0x8a /* 10 0 01010 */ + +#define IP6OPT_TYPE(o) ((o) & 0xC0) +#define IP6OPT_TYPE_SKIP 0x00 +#define IP6OPT_TYPE_DISCARD 0x40 +#define IP6OPT_TYPE_FORCEICMP 0x80 +#define IP6OPT_TYPE_ICMP 0xC0 + +#define IP6OPT_MUTABLE 0x20 + +/* IPv6 options: common part */ +struct ip6_opt { + u_int8_t ip6o_type; + u_int8_t ip6o_len; +} __packed; + +/* Jumbo Payload Option */ +struct ip6_opt_jumbo { + u_int8_t ip6oj_type; + u_int8_t ip6oj_len; + u_int8_t ip6oj_jumbo_len[4]; +} __packed; +#define IP6OPT_JUMBO_LEN 6 + +/* NSAP Address Option */ +struct ip6_opt_nsap { + u_int8_t ip6on_type; + u_int8_t ip6on_len; + u_int8_t ip6on_src_nsap_len; + u_int8_t ip6on_dst_nsap_len; + /* followed by source NSAP */ + /* followed by destination NSAP */ +} __packed; + +/* Tunnel Limit Option */ +struct ip6_opt_tunnel { + u_int8_t ip6ot_type; + u_int8_t ip6ot_len; + u_int8_t ip6ot_encap_limit; +} __packed; + +/* Router Alert Option */ +struct ip6_opt_router { + u_int8_t ip6or_type; + u_int8_t ip6or_len; + u_int8_t ip6or_value[2]; +} __packed; +/* Router alert values (in network byte order) */ +#if BYTE_ORDER == BIG_ENDIAN +#define IP6_ALERT_MLD 0x0000 +#define IP6_ALERT_RSVP 0x0001 +#define IP6_ALERT_AN 0x0002 +#else +#if BYTE_ORDER == LITTLE_ENDIAN +#define IP6_ALERT_MLD 0x0000 +#define IP6_ALERT_RSVP 0x0100 +#define IP6_ALERT_AN 0x0200 +#endif /* LITTLE_ENDIAN */ +#endif + +/* Routing header */ +struct ip6_rthdr { + u_int8_t ip6r_nxt; /* next header */ + u_int8_t ip6r_len; /* length in units of 8 octets */ + u_int8_t ip6r_type; /* routing type */ + u_int8_t ip6r_segleft; /* segments left */ + /* followed by routing type specific data */ +} __packed; + +/* Type 0 Routing header, deprecated by RFC 5095. */ +struct ip6_rthdr0 { + u_int8_t ip6r0_nxt; /* next header */ + u_int8_t ip6r0_len; /* length in units of 8 octets */ + u_int8_t ip6r0_type; /* always zero */ + u_int8_t ip6r0_segleft; /* segments left */ + u_int32_t ip6r0_reserved; /* reserved field */ + /* followed by up to 127 struct in6_addr */ +} __packed; + +/* Fragment header */ +struct ip6_frag { + u_int8_t ip6f_nxt; /* next header */ + u_int8_t ip6f_reserved; /* reserved field */ + u_int16_t ip6f_offlg; /* offset, reserved, and flag */ + u_int32_t ip6f_ident; /* identification */ +} __packed; + +#if BYTE_ORDER == BIG_ENDIAN +#define IP6F_OFF_MASK 0xfff8 /* mask out offset from _offlg */ +#define IP6F_RESERVED_MASK 0x0006 /* reserved bits in ip6f_offlg */ +#define IP6F_MORE_FRAG 0x0001 /* more-fragments flag */ +#else /* BYTE_ORDER == LITTLE_ENDIAN */ +#define IP6F_OFF_MASK 0xf8ff /* mask out offset from _offlg */ +#define IP6F_RESERVED_MASK 0x0600 /* reserved bits in ip6f_offlg */ +#define IP6F_MORE_FRAG 0x0100 /* more-fragments flag */ +#endif /* BYTE_ORDER == LITTLE_ENDIAN */ + +/* + * Internet implementation parameters. + */ +#define IPV6_MAXHLIM 255 /* maximum hoplimit */ +#define IPV6_DEFHLIM 64 /* default hlim */ +#define IPV6_FRAGTTL 120 /* ttl for fragment packets, in slowtimo tick */ +#define IPV6_HLIMDEC 1 /* subtracted when forwarding */ + +#define IPV6_MMTU 1280 /* minimal MTU and reassembly. 1024 + 256 */ +#define IPV6_MAXPACKET 65535 /* ip6 max packet size without Jumbo payload*/ +#define IPV6_MAXOPTHDR 2048 /* max option header size, 256 64-bit words */ + +#ifdef _KERNEL +/* + * IP6_EXTHDR_CHECK ensures that region between the IP6 header and the + * target header (including IPv6 itself, extension headers and + * TCP/UDP/ICMP6 headers) are contiguous. KAME requires drivers + * to store incoming data into one internal mbuf or one or more external + * mbufs(never into two or more internal mbufs). Thus, the third case is + * supposed to never be matched but is prepared just in case. + */ + +#define IP6_EXTHDR_CHECK(m, off, hlen, ret) \ +do { \ + if ((m)->m_next != NULL) { \ + if (((m)->m_flags & M_LOOP) && \ + ((m)->m_len < (off) + (hlen)) && \ + (((m) = m_pullup((m), (off) + (hlen))) == NULL)) { \ + IP6STAT_INC(ip6s_exthdrtoolong); \ + return ret; \ + } else { \ + if ((m)->m_len < (off) + (hlen)) { \ + IP6STAT_INC(ip6s_exthdrtoolong); \ + m_freem(m); \ + return ret; \ + } \ + } \ + } else { \ + if ((m)->m_len < (off) + (hlen)) { \ + IP6STAT_INC(ip6s_tooshort); \ + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); \ + m_freem(m); \ + return ret; \ + } \ + } \ +} while (/*CONSTCOND*/ 0) + +/* + * IP6_EXTHDR_GET ensures that intermediate protocol header (from "off" to + * "len") is located in single mbuf, on contiguous memory region. + * The pointer to the region will be returned to pointer variable "val", + * with type "typ". + * IP6_EXTHDR_GET0 does the same, except that it aligns the structure at the + * very top of mbuf. GET0 is likely to make memory copy than GET. + * + * XXX we're now testing this, needs m_pulldown() + */ +#define IP6_EXTHDR_GET(val, typ, m, off, len) \ +do { \ + struct mbuf *t; \ + int tmp; \ + if ((m)->m_len >= (off) + (len)) \ + (val) = (typ)(mtod((m), caddr_t) + (off)); \ + else { \ + t = m_pulldown((m), (off), (len), &tmp); \ + if (t) { \ + if (t->m_len < tmp + (len)) \ + panic("m_pulldown malfunction"); \ + (val) = (typ)(mtod(t, caddr_t) + tmp); \ + } else { \ + (val) = (typ)NULL; \ + (m) = NULL; \ + } \ + } \ +} while (/*CONSTCOND*/ 0) + +#define IP6_EXTHDR_GET0(val, typ, m, off, len) \ +do { \ + struct mbuf *t; \ + if ((off) == 0) \ + (val) = (typ)mtod(m, caddr_t); \ + else { \ + t = m_pulldown((m), (off), (len), NULL); \ + if (t) { \ + if (t->m_len < (len)) \ + panic("m_pulldown malfunction"); \ + (val) = (typ)mtod(t, caddr_t); \ + } else { \ + (val) = (typ)NULL; \ + (m) = NULL; \ + } \ + } \ +} while (/*CONSTCOND*/ 0) + +#endif /*_KERNEL*/ + +#endif /* not _NETINET_IP6_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/sys/queue.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/sys/queue.h new file mode 100644 index 000000000000..3d23bc9755a7 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/sys/queue.h @@ -0,0 +1,753 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD$ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +//#include + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may be traversed in either direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _CLASS_HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _CLASS_ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - + - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_FROM + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_FROM_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_FROM - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _FOREACH_REVERSE_FROM_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_AFTER + - + - + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * _SWAP + + + + + * + */ +#ifdef QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + unsigned long lastline; + unsigned long prevline; + const char *lastfile; + const char *prevfile; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRACEBUF_INITIALIZER { __LINE__, 0, __FILE__, NULL } , +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) +#define QMD_SAVELINK(name, link) void **name = (void *)&(link) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define QMD_SAVELINK(name, link) +#define TRACEBUF +#define TRACEBUF_INITIALIZER +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +#ifdef __cplusplus +/* + * In C++ there can be structure lists and class lists: + */ +#define QUEUE_TYPEOF(type) type +#else +#define QUEUE_TYPEOF(type) struct type +#endif + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_CLASS_HEAD(name, type) \ +struct name { \ + class type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +#define SLIST_CLASS_ENTRY(type) \ +struct { \ + class type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : SLIST_FIRST((head))); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : SLIST_FIRST((head))); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.sle_next); \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + QUEUE_TYPEOF(type) *curelm = SLIST_FIRST(head); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_REMOVE_AFTER(curelm, field); \ + } \ + TRASHIT(*oldnext); \ +} while (0) + +#define SLIST_REMOVE_AFTER(elm, field) do { \ + SLIST_NEXT(elm, field) = \ + SLIST_NEXT(SLIST_NEXT(elm, field), field); \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +#define SLIST_SWAP(head1, head2, type) do { \ + QUEUE_TYPEOF(type) *swap_first = SLIST_FIRST(head1); \ + SLIST_FIRST(head1) = SLIST_FIRST(head2); \ + SLIST_FIRST(head2) = swap_first; \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_CLASS_HEAD(name, type) \ +struct name { \ + class type *stqh_first; /* first element */ \ + class type **stqh_last; /* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +#define STAILQ_CLASS_ENTRY(type) \ +struct { \ + class type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + +#define STAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : STAILQ_FIRST((head))); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? NULL : \ + __containerof((head)->stqh_last, \ + QUEUE_TYPEOF(type), field.stqe_next)) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.stqe_next); \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + QUEUE_TYPEOF(type) *curelm = STAILQ_FIRST(head); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + STAILQ_REMOVE_AFTER(head, curelm, field); \ + } \ + TRASHIT(*oldnext); \ +} while (0) + +#define STAILQ_REMOVE_AFTER(head, elm, field) do { \ + if ((STAILQ_NEXT(elm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_SWAP(head1, head2, type) do { \ + QUEUE_TYPEOF(type) *swap_first = STAILQ_FIRST(head1); \ + QUEUE_TYPEOF(type) **swap_last = (head1)->stqh_last; \ + STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_FIRST(head2) = swap_first; \ + (head2)->stqh_last = swap_last; \ + if (STAILQ_EMPTY(head1)) \ + (head1)->stqh_last = &STAILQ_FIRST(head1); \ + if (STAILQ_EMPTY(head2)) \ + (head2)->stqh_last = &STAILQ_FIRST(head2); \ +} while (0) + + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_CLASS_HEAD(name, type) \ +struct name { \ + class type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +#define LIST_CLASS_ENTRY(type) \ +struct { \ + class type *le_next; /* next element */ \ + class type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_LIST_CHECK_HEAD(head, field) do { \ + if (LIST_FIRST((head)) != NULL && \ + LIST_FIRST((head))->field.le_prev != \ + &LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL && \ + LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_LIST_CHECK_HEAD(head, field) +#define QMD_LIST_CHECK_NEXT(elm, field) +#define QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : LIST_FIRST((head))); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : LIST_FIRST((head))); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + QMD_LIST_CHECK_NEXT(listelm, field); \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_LIST_CHECK_PREV(listelm, field); \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + QMD_LIST_CHECK_HEAD((head), field); \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_PREV(elm, head, type, field) \ + ((elm)->field.le_prev == &LIST_FIRST((head)) ? NULL : \ + __containerof((elm)->field.le_prev, \ + QUEUE_TYPEOF(type), field.le_next)) + +#define LIST_REMOVE(elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.le_next); \ + QMD_SAVELINK(oldprev, (elm)->field.le_prev); \ + QMD_LIST_CHECK_NEXT(elm, field); \ + QMD_LIST_CHECK_PREV(elm, field); \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ +} while (0) + +#define LIST_SWAP(head1, head2, type, field) do { \ + QUEUE_TYPEOF(type) *swap_tmp = LIST_FIRST(head1); \ + LIST_FIRST((head1)) = LIST_FIRST((head2)); \ + LIST_FIRST((head2)) = swap_tmp; \ + if ((swap_tmp = LIST_FIRST((head1))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head1)); \ + if ((swap_tmp = LIST_FIRST((head2))) != NULL) \ + swap_tmp->field.le_prev = &LIST_FIRST((head2)); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_CLASS_HEAD(name, type) \ +struct name { \ + class type *tqh_first; /* first element */ \ + class type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first, TRACEBUF_INITIALIZER } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +#define TAILQ_CLASS_ENTRY(type) \ +struct { \ + class type *tqe_next; /* next element */ \ + class type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!TAILQ_EMPTY(head) && \ + TAILQ_FIRST((head))->field.tqe_prev != \ + &TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (TAILQ_NEXT((elm), field) != NULL && \ + TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head1); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&(listelm)->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&(listelm)->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + QMD_TAILQ_CHECK_HEAD(head, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + QMD_TAILQ_CHECK_TAIL(head, field); \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ + QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_SWAP(head1, head2, type, field) do { \ + QUEUE_TYPEOF(type) *swap_first = (head1)->tqh_first; \ + QUEUE_TYPEOF(type) **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) + +#endif /* !_SYS_QUEUE_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp.h new file mode 100644 index 000000000000..fa09a73511f5 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp.h @@ -0,0 +1,261 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_H_ +#define _NETINET_TCP_H_ + +#include +#include + +#define __func__ "BSD TCP function" + +// From ip.h: +#define IP_MAXPACKET 65535 + +#define KASSERT(COND, MSG) if (!(COND)) printf MSG + +/*#include +#include */ + +//#if __BSD_VISIBLE + +typedef uint32_t tcp_seq; + +#define tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */ +#define tcp6hdr tcphdr /* for KAME src sync over BSD*'s */ + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +struct tcphdr { + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#if 1 //BYTE_ORDER == LITTLE_ENDIAN + uint8_t th_x2:4, /* (unused) */ + th_off:4; /* data offset */ +#endif +#if 0 //BYTE_ORDER == BIG_ENDIAN + u_char th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#endif + uint8_t th_flags; +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#define TH_ECE 0x40 +#define TH_CWR 0x80 +#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR) +#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" + + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +}; + +#define TCPOPT_EOL 0 +#define TCPOLEN_EOL 1 +#define TCPOPT_PAD 0 /* padding after EOL */ +#define TCPOLEN_PAD 1 +#define TCPOPT_NOP 1 +#define TCPOLEN_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOLEN_MAXSEG 4 +#define TCPOPT_WINDOW 3 +#define TCPOLEN_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOPT_SACK 5 +#define TCPOLEN_SACKHDR 2 +#define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */ +#define TCPOPT_TIMESTAMP 8 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ +#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ +#define TCPOLEN_SIGNATURE 18 + +/* Miscellaneous constants */ +#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ +#define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */ + + +/* + * The default maximum segment size (MSS) to be used for new TCP connections + * when path MTU discovery is not enabled. + * + * RFC879 derives the default MSS from the largest datagram size hosts are + * minimally required to handle directly or through IP reassembly minus the + * size of the IP and TCP header. With IPv6 the minimum MTU is specified + * in RFC2460. + * + * For IPv4 the MSS is 576 - sizeof(struct tcpiphdr) + * For IPv6 the MSS is IPV6_MMTU - sizeof(struct ip6_hdr) - sizeof(struct tcphdr) + * + * We use explicit numerical definition here to avoid header pollution. + */ +#define TCP_MSS 536 +#define TCP6_MSS 1220 + +/* + * Limit the lowest MSS we accept for path MTU discovery and the TCP SYN MSS + * option. Allowing low values of MSS can consume significant resources and + * be used to mount a resource exhaustion attack. + * Connections requesting lower MSS values will be rounded up to this value + * and the IP_DF flag will be cleared to allow fragmentation along the path. + * + * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments. Setting + * it to "0" disables the minmss check. + * + * The default value is fine for TCP across the Internet's smallest official + * link MTU (256 bytes for AX.25 packet radio). However, a connection is very + * unlikely to come across such low MTU interfaces these days (anno domini 2003). + */ +#define TCP_MINMSS 216 + +#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ +#define TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */ + +#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +#define TCP_MAXBURST 4 /* maximum segments in a burst */ + +#define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */ +#define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr)) + /* max space left for options */ +//#endif /* __BSD_VISIBLE */ + +/* + * User-settable options (used with setsockopt). These are discrete + * values and are not masked together. Some values appear to be + * bitmasks for historical reasons. + */ +#define TCP_NODELAY 1 /* don't delay send to coalesce packets */ +//#if __BSD_VISIBLE +#define TCP_MAXSEG 2 /* set maximum segment size */ +#define TCP_NOPUSH 4 /* don't push last block of write */ +#define TCP_NOOPT 8 /* don't use TCP options */ +#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */ +#define TCP_INFO 32 /* retrieve tcp_info structure */ +#define TCP_CONGESTION 64 /* get/set congestion control algorithm */ +#define TCP_KEEPINIT 128 /* N, time to establish connection */ +#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */ +#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */ +#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */ +#define TCP_PCAP_OUT 2048 /* number of output packets to keep */ +#define TCP_PCAP_IN 4096 /* number of input packets to keep */ + +/* Start of reserved space for third-party user-settable options. */ +#define TCP_VENDOR SO_VENDOR + +#define TCP_CA_NAME_MAX 16 /* max congestion control name length */ + +#define TCPI_OPT_TIMESTAMPS 0x01 +#define TCPI_OPT_SACK 0x02 +#define TCPI_OPT_WSCALE 0x04 +#define TCPI_OPT_ECN 0x08 +#define TCPI_OPT_TOE 0x10 + +/* + * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits + * the caller to query certain information about the state of a TCP + * connection. We provide an overlapping set of fields with the Linux + * implementation, but since this is a fixed size structure, room has been + * left for growth. In order to maximize potential future compatibility with + * the Linux API, the same variable names and order have been adopted, and + * padding left to make room for omitted fields in case they are added later. + * + * XXX: This is currently an unstable ABI/API, in that it is expected to + * change. + */ +#if 0 +struct tcp_info { + u_int8_t tcpi_state; /* TCP FSM state. */ + u_int8_t __tcpi_ca_state; + u_int8_t __tcpi_retransmits; + u_int8_t __tcpi_probes; + u_int8_t __tcpi_backoff; + u_int8_t tcpi_options; /* Options enabled on conn. */ + u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */ + tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */ + + u_int32_t tcpi_rto; /* Retransmission timeout (usec). */ + u_int32_t __tcpi_ato; + u_int32_t tcpi_snd_mss; /* Max segment size for send. */ + u_int32_t tcpi_rcv_mss; /* Max segment size for receive. */ + + u_int32_t __tcpi_unacked; + u_int32_t __tcpi_sacked; + u_int32_t __tcpi_lost; + u_int32_t __tcpi_retrans; + u_int32_t __tcpi_fackets; + + /* Times; measurements in usecs. */ + u_int32_t __tcpi_last_data_sent; + u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */ + u_int32_t tcpi_last_data_recv; /* Time since last recv data. */ + u_int32_t __tcpi_last_ack_recv; + + /* Metrics; variable units. */ + u_int32_t __tcpi_pmtu; + u_int32_t __tcpi_rcv_ssthresh; + u_int32_t tcpi_rtt; /* Smoothed RTT in usecs. */ + u_int32_t tcpi_rttvar; /* RTT variance in usecs. */ + u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */ + u_int32_t tcpi_snd_cwnd; /* Send congestion window. */ + u_int32_t __tcpi_advmss; + u_int32_t __tcpi_reordering; + + u_int32_t __tcpi_rcv_rtt; + u_int32_t tcpi_rcv_space; /* Advertised recv window. */ + + /* FreeBSD extensions to tcp_info. */ + u_int32_t tcpi_snd_wnd; /* Advertised send window. */ + u_int32_t tcpi_snd_bwnd; /* No longer used. */ + u_int32_t tcpi_snd_nxt; /* Next egress seqno */ + u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */ + u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ + u_int32_t tcpi_snd_rexmitpack; /* Retransmitted packets */ + u_int32_t tcpi_rcv_ooopack; /* Out-of-order packets */ + u_int32_t tcpi_snd_zerowin; /* Zero-sized windows sent */ + + /* Padding to grow without breaking ABI. */ + u_int32_t __tcpi_pad[26]; /* Padding. */ +}; +#endif +//#endif + +#endif /* !_NETINET_TCP_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_const.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_const.h new file mode 100644 index 000000000000..9eea802b0347 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_const.h @@ -0,0 +1,76 @@ +/** + * @ingroup net_gnrc_tcp_freebsd + * @{ + * + * @file + * @brief Constants used by FreeBSD TCP Protocol logic + * + * @author Sam Kumar + * + * I created this file to store many of the constants shared by the + * various files in the FreeBSD protocol logic. I've changed the + * definitions to be enumerations rather than globals, to save + * some memory. + * @} + */ + +#ifndef _TCP_CONST_H_ +#define _TCP_CONST_H_ + +#include "../gnrc_tcp_freebsd_internal.h" + +#include "tcp_var.h" +#include "tcp_timer.h" + +#define MSS_6LOWPAN ((FRAMES_PER_SEG * FRAMECAP_6LOWPAN) - IP6HDR_SIZE - sizeof(struct tcphdr)) + +// I may change some of these flags later +enum tcp_input_consts { + tcp_keepcnt = TCPTV_KEEPCNT, + tcp_fast_finwait2_recycle = 0, + tcprexmtthresh = 3, + V_drop_synfin = 0, + V_tcp_do_ecn = 0, + V_tcp_do_rfc3042 = 0, + V_path_mtu_discovery = 0, + V_tcp_delack_enabled = 1, + V_tcp_initcwnd_segments = 0, + V_tcp_do_rfc3390 = 0, + V_tcp_abc_l_var = 2 // this is what was in the original tcp_input.c +}; + +enum tcp_subr_consts { + tcp_delacktime = TCPTV_DELACK, + tcp_keepinit = TCPTV_KEEP_INIT, + tcp_keepidle = TCPTV_KEEP_IDLE, + tcp_keepintvl = TCPTV_KEEPINTVL, + tcp_maxpersistidle = TCPTV_KEEP_IDLE, + tcp_msl = TCPTV_MSL, + tcp_rexmit_slop = TCPTV_CPU_VAR, + tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT, + + V_tcp_do_rfc1323 = 1, + V_tcp_v6mssdflt = MSS_6LOWPAN, + /* Normally, this is used to prevent DoS attacks by sending tiny MSS values in the options. */ + V_tcp_minmss = TCP_MAXOLEN + 1, // Must have enough space for TCP options, and one more byte for data. Default is 216. + V_tcp_do_sack = 1 +}; + +enum tcp_timer_consts { +// V_tcp_v6pmtud_blackhole_mss = FRAMECAP_6LOWPAN - sizeof(struct ip6_hdr) - sizeof(struct tcphdr), // Doesn't matter unless blackhole_detect is 1. + tcp_rexmit_drop_options = 1, // drop options after a few retransmits + always_keepalive = 1, +}; + +/* + * Force a time value to be in a certain range. + */ +#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \ + (tv) = (value) + tcp_rexmit_slop; \ + if ((u_long)(tv) < (u_long)(tvmin)) \ + (tv) = (tvmin); \ + if ((u_long)(tv) > (u_long)(tvmax)) \ + (tv) = (tvmax); \ +} while(0) + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_fsm.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_fsm.h new file mode 100644 index 000000000000..d66c41344995 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_fsm.h @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_FSM_H_ +#define _NETINET_TCP_FSM_H_ + +#include "types.h" + +/* + * TCP FSM state definitions. + * + * Per RFC793, September, 1981. + */ + +#define TCP_NSTATES 11 + +#define TCPS_CLOSED 0 /* closed */ +#define TCPS_LISTEN 1 /* listening for connection */ +#define TCPS_SYN_SENT 2 /* active, have sent syn */ +#define TCPS_SYN_RECEIVED 3 /* have sent and received syn */ +/* states < TCPS_ESTABLISHED are those where connections not established */ +#define TCPS_ESTABLISHED 4 /* established */ +#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +/* states > TCPS_CLOSE_WAIT are those where user has closed */ +#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ +/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ +#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ + +/* for KAME src sync over BSD*'s */ +#define TCP6_NSTATES TCP_NSTATES +#define TCP6S_CLOSED TCPS_CLOSED +#define TCP6S_LISTEN TCPS_LISTEN +#define TCP6S_SYN_SENT TCPS_SYN_SENT +#define TCP6S_SYN_RECEIVED TCPS_SYN_RECEIVED +#define TCP6S_ESTABLISHED TCPS_ESTABLISHED +#define TCP6S_CLOSE_WAIT TCPS_CLOSE_WAIT +#define TCP6S_FIN_WAIT_1 TCPS_FIN_WAIT_1 +#define TCP6S_CLOSING TCPS_CLOSING +#define TCP6S_LAST_ACK TCPS_LAST_ACK +#define TCP6S_FIN_WAIT_2 TCPS_FIN_WAIT_2 +#define TCP6S_TIME_WAIT TCPS_TIME_WAIT + +#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) +#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) +#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) + +//#ifdef TCPOUTFLAGS +/* + * Flags used when sending segments in tcp_output. Basic flags (TH_RST, + * TH_ACK,TH_SYN,TH_FIN) are totally determined by state, with the proviso + * that TH_FIN is sent only if all data queued for output is included in the + * segment. + */ +static const u_char tcp_outflags[TCP_NSTATES] = { + TH_RST|TH_ACK, /* 0, CLOSED */ + 0, /* 1, LISTEN */ + TH_SYN, /* 2, SYN_SENT */ + TH_SYN|TH_ACK, /* 3, SYN_RECEIVED */ + TH_ACK, /* 4, ESTABLISHED */ + TH_ACK, /* 5, CLOSE_WAIT */ + TH_FIN|TH_ACK, /* 6, FIN_WAIT_1 */ + TH_FIN|TH_ACK, /* 7, CLOSING */ + TH_FIN|TH_ACK, /* 8, LAST_ACK */ + TH_ACK, /* 9, FIN_WAIT_2 */ + TH_ACK, /* 10, TIME_WAIT */ +}; +//#endif + +#ifdef KPROF +int tcp_acounts[TCP_NSTATES][PRU_NREQ]; +#endif + +//#ifdef TCPSTATES +static char const * const tcpstates[] = { + "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", + "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING", + "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT", +}; +//#endif + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_input.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_input.c new file mode 100644 index 000000000000..f687a2bbf623 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_input.c @@ -0,0 +1,3923 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2007-2008,2010 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * Portions of this software were developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart, + * James Healy and David Hayes, made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 + */ + + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing interface + * without forcing IP to fragment. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + * + * Also take into account the space needed for options that we + * send regularly. Make maxseg shorter by that amount to assure + * that we can send maxseg amount of data even when the options + * are present. Store the upper limit of the length of options plus + * data in maxopd. + * + * NOTE that this routine is only called when we process an incoming + * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS + * settings are handled in tcp_mssopt(). + */ + +#include + +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "../lib/bitmap.h" +#include "../lib/cbuf.h" +#include "icmp_var.h" +#include "ip.h" +#include "ip6.h" +#include "sys/queue.h" + +#include "tcp_const.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +// Copied from in.h +#define IPPROTO_DONE 267 + +// Copied from sys/libkern.h +static int imax(int a, int b) { return (a > b ? a : b); } +static int imin(int a, int b) { return (a < b ? a : b); } + +static int min(int a, int b) { return imin(a, b); } + +static void tcp_dooptions(struct tcpopt *, u_char *, int, int); +static void +tcp_do_segment(struct ip6_hdr* ip6, struct tcphdr *th, + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, + uint8_t* signals, uint32_t* freedentries); +static void tcp_xmit_timer(struct tcpcb *, int); +void tcp_hc_get(/*struct in_conninfo *inc*/ struct tcpcb* tp, struct hc_metrics_lite *hc_metrics_lite); +static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); + +/* + * CC wrapper hook functions + */ +static void inline +cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type) +{ +// INP_WLOCK_ASSERT(tp->t_inpcb); + + tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); + if (tp->snd_cwnd <= tp->snd_wnd) + tp->ccv->flags |= CCF_CWND_LIMITED; + else + tp->ccv->flags &= ~CCF_CWND_LIMITED; + + if (type == CC_ACK) { + if (tp->snd_cwnd > tp->snd_ssthresh) { + tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, + V_tcp_abc_l_var * tp->t_maxseg); + if (tp->t_bytes_acked >= tp->snd_cwnd) { + tp->t_bytes_acked -= tp->snd_cwnd; + tp->ccv->flags |= CCF_ABC_SENTAWND; + } + } else { + tp->ccv->flags &= ~CCF_ABC_SENTAWND; + tp->t_bytes_acked = 0; + } + } + + if (CC_ALGO(tp)->ack_received != NULL) { + /* XXXLAS: Find a way to live without this */ + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->ack_received(tp->ccv, type); + } +} + +static void inline +cc_conn_init(struct tcpcb *tp) +{ + struct hc_metrics_lite metrics; +// struct inpcb *inp = tp->t_inpcb; + int rtt; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + + tcp_hc_get(/*&inp->inp_inc*/tp, &metrics); + + if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { + tp->t_srtt = rtt; + tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; +// TCPSTAT_INC(tcps_usedrtt); + if (metrics.rmx_rttvar) { + tp->t_rttvar = metrics.rmx_rttvar; +// TCPSTAT_INC(tcps_usedrttvar); + } else { + /* default variation is +- 1 rtt */ + tp->t_rttvar = + tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; + } + TCPT_RANGESET(tp->t_rxtcur, + ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1, + tp->t_rttmin, TCPTV_REXMTMAX); + } + if (metrics.rmx_ssthresh) { + /* + * There's some sort of gateway or interface + * buffer limit on the path. Use this to set + * the slow start threshhold, but set the + * threshold to no less than 2*mss. + */ + tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh); +// TCPSTAT_INC(tcps_usedssthresh); + } + + /* + * Set the initial slow-start flight size. + * + * RFC5681 Section 3.1 specifies the default conservative values. + * RFC3390 specifies slightly more aggressive values. + * RFC6928 increases it to ten segments. + * Support for user specified value for initial flight size. + * + * If a SYN or SYN/ACK was lost and retransmitted, we have to + * reduce the initial CWND to one segment as congestion is likely + * requiring us to be cautious. + */ + if (tp->snd_cwnd == 1) + tp->snd_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ + else if (V_tcp_initcwnd_segments) + tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg, + max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460)); + else if (V_tcp_do_rfc3390) + tp->snd_cwnd = min(4 * tp->t_maxseg, + max(2 * tp->t_maxseg, 4380)); + else { + /* Per RFC5681 Section 3.1 */ + if (tp->t_maxseg > 2190) + tp->snd_cwnd = 2 * tp->t_maxseg; + else if (tp->t_maxseg > 1095) + tp->snd_cwnd = 3 * tp->t_maxseg; + else + tp->snd_cwnd = 4 * tp->t_maxseg; + } + + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); +} + +void inline +cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) +{ +// INP_WLOCK_ASSERT(tp->t_inpcb); + + switch(type) { + case CC_NDUPACK: + if (!IN_FASTRECOVERY(tp->t_flags)) { + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + tp->t_flags |= TF_ECN_SND_CWR; + } + break; + case CC_ECN: + if (!IN_CONGRECOVERY(tp->t_flags)) { +// TCPSTAT_INC(tcps_ecn_rcwnd); + tp->snd_recover = tp->snd_max; + if (tp->t_flags & TF_ECN_PERMIT) + tp->t_flags |= TF_ECN_SND_CWR; + } + break; + case CC_RTO: + tp->t_dupacks = 0; + tp->t_bytes_acked = 0; + EXIT_RECOVERY(tp->t_flags); + tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 / + tp->t_maxseg) * tp->t_maxseg; + tp->snd_cwnd = tp->t_maxseg; + break; + case CC_RTO_ERR: +// TCPSTAT_INC(tcps_sndrexmitbad); + /* RTO was unnecessary, so reset everything. */ + tp->snd_cwnd = tp->snd_cwnd_prev; + tp->snd_ssthresh = tp->snd_ssthresh_prev; + tp->snd_recover = tp->snd_recover_prev; + if (tp->t_flags & TF_WASFRECOVERY) + ENTER_FASTRECOVERY(tp->t_flags); + if (tp->t_flags & TF_WASCRECOVERY) + ENTER_CONGRECOVERY(tp->t_flags); + tp->snd_nxt = tp->snd_max; + tp->t_flags &= ~TF_PREVVALID; + tp->t_badrxtwin = 0; + break; + } + + if (CC_ALGO(tp)->cong_signal != NULL) { + if (th != NULL) + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->cong_signal(tp->ccv, type); + } +} + +static void inline +cc_post_recovery(struct tcpcb *tp, struct tcphdr *th) +{ +// INP_WLOCK_ASSERT(tp->t_inpcb); + + /* XXXLAS: KASSERT that we're in recovery? */ + if (CC_ALGO(tp)->post_recovery != NULL) { + tp->ccv->curack = th->th_ack; + CC_ALGO(tp)->post_recovery(tp->ccv); + } + /* XXXLAS: EXIT_RECOVERY ? */ + tp->t_bytes_acked = 0; +} + + +/* + * Indicate whether this ack should be delayed. We can delay the ack if + * following conditions are met: + * - There is no delayed ack timer in progress. + * - Our last ack wasn't a 0-sized window. We never want to delay + * the ack that opens up a 0-sized window. + * - LRO wasn't used for this segment. We make sure by checking that the + * segment size is not larger than the MSS. + * - Delayed acks are enabled or this is a half-synchronized T/TCP + * connection. + */ +#define DELAY_ACK(tp, tlen) \ + ((!tcp_timer_active(tp, TT_DELACK) && \ + (tp->t_flags & TF_RXWIN0SENT) == 0) && \ + (tlen <= tp->t_maxopd) && \ + (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) + +static void inline +cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) +{ +// INP_WLOCK_ASSERT(tp->t_inpcb); + + if (CC_ALGO(tp)->ecnpkt_handler != NULL) { + switch (iptos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + tp->ccv->flags |= CCF_IPHDR_CE; + break; + case IPTOS_ECN_ECT0: + tp->ccv->flags &= ~CCF_IPHDR_CE; + break; + case IPTOS_ECN_ECT1: + tp->ccv->flags &= ~CCF_IPHDR_CE; + break; + } + + if (th->th_flags & TH_CWR) + tp->ccv->flags |= CCF_TCPHDR_CWR; + else + tp->ccv->flags &= ~CCF_TCPHDR_CWR; + + if (tp->t_flags & TF_DELACK) + tp->ccv->flags |= CCF_DELACK; + else + tp->ccv->flags &= ~CCF_DELACK; + + CC_ALGO(tp)->ecnpkt_handler(tp->ccv); + + if (tp->ccv->flags & CCF_ACKNOW) + tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); + } +} + +/* + * External function: look up an entry in the hostcache and fill out the + * supplied TCP metrics structure. Fills in NULL when no entry was found or + * a value is not set. + * Taken from tcp_hostcache.c. + * Sam: I changed this to always act as if there is a miss. + */ +void +tcp_hc_get(/*struct in_conninfo *inc*/ struct tcpcb* tp, struct hc_metrics_lite *hc_metrics_lite) +{ +#if 0 + struct hc_metrics *hc_entry; + + /* + * Find the right bucket. + */ + hc_entry = tcp_hc_lookup(inc); + + /* + * If we don't have an existing object. + */ + if (hc_entry == NULL) { +#endif + bzero(hc_metrics_lite, sizeof(*hc_metrics_lite)); +#if 0 + return; + } + hc_entry->rmx_hits++; + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ + + hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu; + hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh; + hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt; + hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar; + hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth; + hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd; + hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe; + hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe; + + /* + * Unlock bucket row. + */ + THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); +#endif +} + +/* + * External function: look up an entry in the hostcache and return the + * discovered path MTU. Returns NULL if no entry is found or value is not + * set. + * Taken from tcp_hostcache.c. + * Sam: I changed this always act as if there is a miss. + */ +u_long +tcp_hc_getmtu(/*struct in_conninfo *inc*/ struct tcpcb* tp) +{ +#if 0 + struct hc_metrics *hc_entry; + u_long mtu; + + hc_entry = tcp_hc_lookup(inc); + if (hc_entry == NULL) { +#endif + return 0; +#if 0 + } + hc_entry->rmx_hits++; + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ + + mtu = hc_entry->rmx_mtu; + THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); + return mtu; +#endif +} + + +/* + * Issue RST and make ACK acceptable to originator of segment. + * The mbuf must still include the original packet header. + * tp may be NULL. + */ +/* Original signature was: + static void tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, + int tlen, int rstreason) +*/ +void +tcp_dropwithreset(struct ip6_hdr* ip6, struct tcphdr *th, struct tcpcb *tp, + int tlen, int rstreason) +{ +#if 0 +#ifdef INET + struct ip *ip; +#endif +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + + if (tp != NULL) { + INP_WLOCK_ASSERT(tp->t_inpcb); + } + + /* Don't bother if destination was broadcast/multicast. */ + if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST)) + goto drop; +#ifdef INET6 + if (mtod(m, struct ip *)->ip_v == 6) { + ip6 = mtod(m, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) + goto drop; + /* IPv6 anycast check is done at tcp6_input() */ + } +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + ip = mtod(m, struct ip *); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || + IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || + ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || + in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) + goto drop; + } +#endif + + /* Perform bandwidth limiting. */ + if (badport_bandlim(rstreason) < 0) + goto drop; +#endif + /* tcp_respond consumes the mbuf chain. */ + if (th->th_flags & TH_ACK) { +// tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, +// th->th_ack, TH_RST); + tcp_respond(tp, ip6, th, (tcp_seq) 0, th->th_ack, TH_RST); + } else { + if (th->th_flags & TH_SYN) + tlen++; +// tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen, +// (tcp_seq)0, TH_RST|TH_ACK); + tcp_respond(tp, ip6, th, th->th_seq + tlen, (tcp_seq) 0, TH_RST | TH_ACK); + } + return; +/* +drop: + m_freem(m); +*/ +} + +/* + * TCP input handling is split into multiple parts: + * tcp6_input is a thin wrapper around tcp_input for the extended + * ip6_protox[] call format in ip6_input + * tcp_input handles primary segment validation, inpcb lookup and + * SYN processing on listen sockets + * tcp_do_segment processes the ACK and text of the segment for + * establishing, established and closing connections + */ +/* The signature of this function was originally: + tcp_input(struct mbuf **mp, int *offp, int proto) */ +/* NOTE: tcp_fields_to_host(th) must be called before this function is called. */ +int +tcp_input(struct ip6_hdr* ip6, struct tcphdr* th, struct tcpcb* tp, struct tcpcb_listen* tpl, + uint8_t* signals, uint32_t* freedentries) +{ + int tlen = 0, off; + int thflags; + uint8_t iptos = 0; + int drop_hdrlen; + int rstreason = 0; + //uint32_t ticks = get_ticks(); + struct tcpopt to; /* options in this segment */ + u_char* optp = NULL; + int optlen = 0; + to.to_flags = 0; + KASSERT(tp || tpl, ("One of tp and tpl must be positive")); +#if 0 + struct mbuf *m = *mp; + struct tcphdr *th = NULL; + struct ip *ip = NULL; + struct inpcb *inp = NULL; + struct tcpcb *tp = NULL; + struct socket *so = NULL; + u_char *optp = NULL; + int off0; /* It seems that this is the offset of the TCP header from the IP header. */ + int optlen = 0; +#ifdef INET + int len; +#endif + int tlen = 0, off; + int drop_hdrlen; + int thflags; + int rstreason = 0; /* For badport_bandlim accounting purposes */ +#ifdef TCP_SIGNATURE + uint8_t sig_checked = 0; +#endif + uint8_t iptos = 0; + struct m_tag *fwd_tag = NULL; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; + int isipv6; +#else + const void *ip6 = NULL; +#endif /* INET6 */ + struct tcpopt to; /* options in this segment */ + char *s = NULL; /* address and port logging */ + int ti_locked; +#define TI_UNLOCKED 1 +#define TI_RLOCKED 2 +#endif +#if 0 // DON'T NEED THE PREPROCESSING; I'LL DO THAT MYSELF +#ifdef TCPDEBUG + /* + * The size of tcp_saveipgen must be the size of the max ip header, + * now IPv6. + */ + u_char tcp_saveipgen[IP6_HDR_LEN]; + struct tcphdr tcp_savetcp; + short ostate = 0; +#endif + +#ifdef INET6 + isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; +#endif + + off0 = *offp; + m = *mp; + *mp = NULL; + to.to_flags = 0; + TCPSTAT_INC(tcps_rcvtotal); + +#ifdef INET6 + if (isipv6) { + /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */ + + if (m->m_len < (sizeof(*ip6) + sizeof(*th))) { + m = m_pullup(m, sizeof(*ip6) + sizeof(*th)); + if (m == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return (IPPROTO_DONE); + } + } + + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)((caddr_t)ip6 + off0); + tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) + th->th_sum = m->m_pkthdr.csum_data; + else + th->th_sum = in6_cksum_pseudo(ip6, tlen, + IPPROTO_TCP, m->m_pkthdr.csum_data); + th->th_sum ^= 0xffff; + } else + th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); + if (th->th_sum) { + TCPSTAT_INC(tcps_rcvbadsum); + goto drop; + } + + /* + * Be proactive about unspecified IPv6 address in source. + * As we use all-zero to indicate unbounded/unconnected pcb, + * unspecified IPv6 address can be used to confuse us. + * + * Note that packets with unspecified IPv6 destination is + * already dropped in ip6_input. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { + /* XXX stat */ + goto drop; + } + } +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + if (off0 > sizeof (struct ip)) { + ip_stripoptions(m); + off0 = sizeof(struct ip); + } + if (m->m_len < sizeof (struct tcpiphdr)) { + if ((m = m_pullup(m, sizeof (struct tcpiphdr))) + == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return (IPPROTO_DONE); + } + } + ip = mtod(m, struct ip *); + th = (struct tcphdr *)((caddr_t)ip + off0); + tlen = ntohs(ip->ip_len) - off0; + + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) + th->th_sum = m->m_pkthdr.csum_data; + else + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + htonl(m->m_pkthdr.csum_data + tlen + + IPPROTO_TCP)); + th->th_sum ^= 0xffff; + } else { + struct ipovly *ipov = (struct ipovly *)ip; + + /* + * Checksum extended TCP header and data. + */ + len = off0 + tlen; + bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); + ipov->ih_len = htons(tlen); + th->th_sum = in_cksum(m, len); + /* Reset length for SDT probes. */ + ip->ip_len = htons(tlen + off0); + } + + if (th->th_sum) { + TCPSTAT_INC(tcps_rcvbadsum); + goto drop; + } + /* Re-initialization for later version check */ + ip->ip_v = IPVERSION; + } +#endif /* INET */ +#endif + tlen = ntohs(ip6->ip6_plen); // assume *off == sizeof(*ip6) +/* +#ifdef INET6 + if (isipv6) +*/ + iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; +/* +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + iptos = ip->ip_tos; +#endif +*/ + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = th->th_off << 2; + if (off < sizeof (struct tcphdr) || off > tlen) { +// TCPSTAT_INC(tcps_rcvbadoff); + goto drop; + } + tlen -= off; /* tlen is used instead of ti->ti_len */ + // It seems that now tlen is the length of the data + + if (off > sizeof (struct tcphdr)) { +#if 0 /* OMIT HANDLING OF EXTRA OPTIONS. */ +#ifdef INET6 + if (isipv6) { + IP6_EXTHDR_CHECK(m, off0, off, IPPROTO_DONE); + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)((caddr_t)ip6 + off0); + } +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + if (m->m_len < sizeof(struct ip) + off) { + if ((m = m_pullup(m, sizeof (struct ip) + off)) + == NULL) { + TCPSTAT_INC(tcps_rcvshort); + return (IPPROTO_DONE); + } + ip = mtod(m, struct ip *); + th = (struct tcphdr *)((caddr_t)ip + off0); + } + } +#endif +#endif + optlen = off - sizeof (struct tcphdr); + optp = (u_char *)(th + 1); + } + + thflags = th->th_flags; + + /* + * Convert TCP protocol specific fields to host format. + * Sam: I moved this call before this function, in case we return early on a time-wait socket and start over. + */ + //tcp_fields_to_host(th); + + /* + * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options. + * Sam: My TCP header is in a different buffer from the IP header. + * drop_hdrlen is only meaningful as an offset into the TCP buffer, + * because it is used to determine how much of the packet to discard + * before copying it into the receive buffer. Therefore, my offset does + * not include the length of IP header and options, only the length of + * the TCP header and options. + */ + drop_hdrlen = /*off0 +*/ off; + + /* + * Locate pcb for segment; if we're likely to add or remove a + * connection then first acquire pcbinfo lock. There are three cases + * where we might discover later we need a write lock despite the + * flags: ACKs moving a connection out of the syncache, ACKs for a + * connection in TIMEWAIT and SYNs not targeting a listening socket. + */ +#if 0 // Ignore synchronization code + if ((thflags & (TH_FIN | TH_RST)) != 0) { + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; + } else + ti_locked = TI_UNLOCKED; +#endif +/* I BELIEVE THAT THIS IS ALREADY HANDLED AT A LOWER LAYER ON STORM */ +#if 0 + /* + * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. + */ + if ( +#ifdef INET6 + (isipv6 && (m->m_flags & M_IP6_NEXTHOP)) +#ifdef INET + || (!isipv6 && (m->m_flags & M_IP_NEXTHOP)) +#endif +#endif +#if defined(INET) && !defined(INET6) + (m->m_flags & M_IP_NEXTHOP) +#endif + ) + fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); +#endif +//findpcb: +#if 0 // I DON'T NEED ANY OF THEIR SYNCHRONIZATION CODE +#ifdef INVARIANTS + if (ti_locked == TI_RLOCKED) { + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + } else { + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif +#endif +#if 0 // THIS IS THE CODE TO LOOK UP THE SOCKET. I'VE ALREADY DONE THAT +#ifdef INET6 + if (isipv6 && fwd_tag != NULL) { + struct sockaddr_in6 *next_hop6; + + next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); + /* + * Transparently forwarded. Pretend to be the destination. + * Already got one like this? + */ + inp = in6_pcblookup_mbuf(&V_tcbinfo, + &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, + INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); + if (!inp) { + /* + * It's new. Try to find the ambushing socket. + * Because we've rewritten the destination address, + * any hardware-generated hash is ignored. + */ + inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, + th->th_sport, &next_hop6->sin6_addr, + next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : + th->th_dport, INPLOOKUP_WILDCARD | + INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); + } + } else if (isipv6) { + inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, + th->th_sport, &ip6->ip6_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, + m->m_pkthdr.rcvif, m); + } +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + if (fwd_tag != NULL) { + struct sockaddr_in *next_hop; + + next_hop = (struct sockaddr_in *)(fwd_tag+1); + /* + * Transparently forwarded. Pretend to be the destination. + * already got one like this? + */ + inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, + ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB, + m->m_pkthdr.rcvif, m); + if (!inp) { + /* + * It's new. Try to find the ambushing socket. + * Because we've rewritten the destination address, + * any hardware-generated hash is ignored. + */ + inp = in_pcblookup(&V_tcbinfo, ip->ip_src, + th->th_sport, next_hop->sin_addr, + next_hop->sin_port ? ntohs(next_hop->sin_port) : + th->th_dport, INPLOOKUP_WILDCARD | + INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); + } + } else + inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, + th->th_sport, ip->ip_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, + m->m_pkthdr.rcvif, m); +#endif /* INET */ + + /* + * If the INPCB does not exist then all data in the incoming + * segment is discarded and an appropriate RST is sent back. + * XXX MRT Send RST using which routing table? + */ + if (inp == NULL) { + /* + * Log communication attempts to ports that are not + * in use. + */ + if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) || + tcp_log_in_vain == 2) { + if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6))) + log(LOG_INFO, "%s; %s: Connection attempt " + "to closed port\n", s, __func__); + } + /* + * When blackholing do not respond with a RST but + * completely ignore the segment and drop it. + */ + if ((V_blackhole == 1 && (thflags & TH_SYN)) || + V_blackhole == 2) + goto dropunlock; + + rstreason = BANDLIM_RST_CLOSEDPORT; + goto dropwithreset; + } + INP_WLOCK_ASSERT(inp); + if ((inp->inp_flowtype == M_HASHTYPE_NONE) && + (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) && + ((inp->inp_socket == NULL) || + (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) { + inp->inp_flowid = m->m_pkthdr.flowid; + inp->inp_flowtype = M_HASHTYPE_GET(m); + } +#ifdef IPSEC +#ifdef INET6 + if (isipv6 && ipsec6_in_reject(m, inp)) { + goto dropunlock; + } else +#endif /* INET6 */ + if (ipsec4_in_reject(m, inp) != 0) { + goto dropunlock; + } +#endif /* IPSEC */ + + /* + * Check the minimum TTL for socket. + */ + if (inp->inp_ip_minttl != 0) { +#ifdef INET6 + if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim) + goto dropunlock; + else +#endif + if (inp->inp_ip_minttl > ip->ip_ttl) + goto dropunlock; + } +#endif + /* + * A previous connection in TIMEWAIT state is supposed to catch stray + * or duplicate segments arriving late. If this segment was a + * legitimate new connection attempt, the old INPCB gets removed and + * we can try again to find a listening socket. + * + * At this point, due to earlier optimism, we may hold only an inpcb + * lock, and not the inpcbinfo write lock. If so, we need to try to + * acquire it, or if that fails, acquire a reference on the inpcb, + * drop all locks, acquire a global write lock, and then re-acquire + * the inpcb lock. We may at that point discover that another thread + * has tried to free the inpcb, in which case we need to loop back + * and try to find a new inpcb to deliver to. + * + * XXXRW: It may be time to rethink timewait locking. + */ +//relocked: + if (tp && /*inp->inp_flags & INP_TIMEWAIT*/tp->t_state == TCP6S_TIME_WAIT) { +#if 0 // REMOVE SYNCHRONIZATION + if (ti_locked == TI_UNLOCKED) { + if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { + in_pcbref(inp); +// INP_WUNLOCK(inp); +// INP_INFO_RLOCK(&V_tcbinfo); +// ti_locked = TI_RLOCKED; +// INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) { + inp = NULL; + goto findpcb; + } + } else + ti_locked = TI_RLOCKED; + } + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); +#endif +// if (thflags & TH_SYN) +// tcp_dooptions(&to, optp, optlen, TO_SYN); + /* + * NB: tcp_twcheck unlocks the INP and frees the mbuf. + */ + if (tcp_twcheck(tp,/*inp, &to,*/ th, /*m,*/ tlen)) + //goto findpcb; + return (RELOOKUP_REQUIRED); +// INP_INFO_RUNLOCK(&V_tcbinfo); + return (IPPROTO_DONE); + } + /* + * The TCPCB may no longer exist if the connection is winding + * down or it is in the CLOSED state. Either way we drop the + * segment and send an appropriate response. + */ +#if 0 + tp = intotcpcb(inp); + if (tp == NULL || tp->t_state == TCPS_CLOSED) { + rstreason = BANDLIM_RST_CLOSEDPORT; + goto dropwithreset; + } + +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE) { + tcp_offload_input(tp, m); + m = NULL; /* consumed by the TOE driver */ + goto dropunlock; + } +#endif +#endif + /* + * We've identified a valid inpcb, but it could be that we need an + * inpcbinfo write lock but don't hold it. In this case, attempt to + * acquire using the same strategy as the TIMEWAIT case above. If we + * relock, we have to jump back to 'relocked' as the connection might + * now be in TIMEWAIT. + */ +#if 0 +#ifdef INVARIANTS + if ((thflags & (TH_FIN | TH_RST)) != 0) + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); +#endif + if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) || + (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) { + if (ti_locked == TI_UNLOCKED) { + if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) { + in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_RLOCK(&V_tcbinfo); + ti_locked = TI_RLOCKED; + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) { + inp = NULL; + goto findpcb; + } + goto relocked; + } else + ti_locked = TI_RLOCKED; + } + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + } + +#ifdef MAC + INP_WLOCK_ASSERT(inp); + if (mac_inpcb_check_deliver(inp, m)) + goto dropunlock; +#endif + so = inp->inp_socket; + KASSERT(so != NULL, ("%s: so == NULL", __func__)); +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) { + ostate = tp->t_state; +#ifdef INET6 + if (isipv6) { + bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6)); + } else +#endif + bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip)); + tcp_savetcp = *th; + } +#endif /* TCPDEBUG */ +#endif + /* + * When the socket is accepting connections (the INPCB is in LISTEN + * state) we look into the SYN cache if this is a new connection + * attempt or the completion of a previous one. + */ + + if (/*so->so_options & SO_ACCEPTCONN*/tp == NULL) { + KASSERT(tpl->t_state == TCP6S_LISTEN, ("listen socket must be in listening state!")); +#if 0 // HANDLING OF SYN_RECEIVED HAPPENS NORMALLY, EVEN IF THIS BRANCH ISN'T TAKEN + //struct in_conninfo inc; + struct syncache_ent inc; + +// KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " +// "tp not listening", __func__)); +// bzero(&inc, sizeof(inc)); + memset(&inc, 0, sizeof(inc)); +//#ifdef INET6 +// if (isipv6) { +// inc.inc_flags |= INC_ISIPV6; +// inc.inc6_faddr = ip6->ip6_src; +// inc.inc6_laddr = ip6->ip6_dst; + inc.faddr = ip6->ip6_src; + inc.laddr = ip6->ip6_dst; +// } else +//#endif +// { +// inc.inc_faddr = ip->ip_src; +// inc.inc_laddr = ip->ip_dst; +// } +// inc.inc_fport = th->th_sport; +// inc.inc_lport = th->th_dport; +// inc.inc_fibnum = so->so_fibnum; + inc.fport = th->th_sport; + inc.lport = th->th_dport; + + /* + * Check for an existing connection attempt in syncache if + * the flag is only ACK. A successful lookup creates a new + * socket appended to the listen queue in SYN_RECEIVED state. + */ + if (tp->state == TCP6S_SYN_RECEIVED && (thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + /* + * Parse the TCP options here because + * syncookies need access to the reflected + * timestamp. + */ + tcp_dooptions(&to, optp, optlen, 0); +#if 0 + /* + * NB: syncache_expand() doesn't unlock + * inp and tcpinfo locks. + */ + if (!syncache_expand(&inc,/* &to, */th, tp, tp->acceptinto/*m*/)) { + /* + * No syncache entry or ACK was not + * for our SYN/ACK. Send a RST. + * NB: syncache did its own logging + * of the failure cause. + */ + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + if (so == NULL) { + /* + * We completed the 3-way handshake + * but could not allocate a socket + * either due to memory shortage, + * listen queue length limits or + * global socket limits. Send RST + * or wait and have the remote end + * retransmit the ACK for another + * try. + */ + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Socket allocation failed due to " + "limits or memory shortage, %s\n", + s, __func__, + V_tcp_sc_rst_sock_fail ? + "sending RST" : "try again"); + if (V_tcp_sc_rst_sock_fail) { + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } else + goto dropunlock; + } +#endif + /* + * Socket is created in state SYN_RECEIVED. + * Unlock the listen socket, lock the newly + * created socket and update the tp variable. + */ +// INP_WUNLOCK(inp); /* listen socket */ +// inp = sotoinpcb(so); + /* + * New connection inpcb is already locked by + * syncache_expand(). + */ +// INP_WLOCK_ASSERT(inp); +// tp = intotcpcb(inp); + tp = tp->acceptinto; + KASSERT(tp->t_state == TCPS_SYN_RECEIVED, + ("%s: ", __func__)); +#if 0 +#ifdef TCP_SIGNATURE + if (sig_checked == 0) { + tcp_dooptions(&to, optp, optlen, + (thflags & TH_SYN) ? TO_SYN : 0); + if (!tcp_signature_verify_input(m, off0, tlen, + optlen, &to, th, tp->t_flags)) { + + /* + * In SYN_SENT state if it receives an + * RST, it is allowed for further + * processing. + */ + if ((thflags & TH_RST) == 0 || + (tp->t_state == TCPS_SYN_SENT) == 0) + goto dropunlock; + } + sig_checked = 1; + } +#endif +#endif + /* + * Process the segment and the data it + * contains. tcp_do_segment() consumes + * the mbuf chain and unlocks the inpcb. + */ + tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, + iptos, ti_locked); +// INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + return (IPPROTO_DONE); + } + /* + * Segment flag validation for new connection attempts: + * + * Our (SYN|ACK) response was rejected. + * Check with syncache and remove entry to prevent + * retransmits. + * + * NB: syncache_chkrst does its own logging of failure + * causes. + */ + if (thflags & TH_RST) { + //syncache_chkrst(&inc, th); + goto dropunlock; + } +#endif + /* + * We can't do anything without SYN. + */ + if ((thflags & TH_SYN) == 0) { + //if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Listen socket: " + "SYN is missing, segment ignored\n", + /*s*/"note", __func__); +// TCPSTAT_INC(tcps_badsyn); + goto dropunlock; + } + /* + * (SYN|ACK) is bogus on a listen socket. + */ + if (thflags & TH_ACK) { + //if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Listen socket: " + "SYN|ACK invalid, segment rejected\n", + /*s*/"note", __func__); +// syncache_badack(&inc); /* XXX: Not needed! */ +// TCPSTAT_INC(tcps_badsyn); + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + /* + * If the drop_synfin option is enabled, drop all + * segments with both the SYN and FIN bits set. + * This prevents e.g. nmap from identifying the + * TCP/IP stack. + * XXX: Poor reasoning. nmap has other methods + * and is constantly refining its stack detection + * strategies. + * XXX: This is a violation of the TCP specification + * and was used by RFC1644. + */ + if ((thflags & TH_FIN) && V_drop_synfin) { + //if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Listen socket: " + "SYN|FIN segment ignored (based on " + "sysctl setting)\n", /*s*/"note", __func__); +// TCPSTAT_INC(tcps_badsyn); + goto dropunlock; + } + /* + * Segment's flags are (SYN) or (SYN|FIN). + * + * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored + * as they do not affect the state of the TCP FSM. + * The data pointed to by TH_URG and th_urp is ignored. + */ + KASSERT((thflags & (TH_RST|TH_ACK)) == 0, + ("%s: Listen socket: TH_RST or TH_ACK set", __func__)); + KASSERT(thflags & (TH_SYN), + ("%s: Listen socket: TH_SYN not set", __func__)); +#if 0 +#ifdef INET6 + /* + * If deprecated address is forbidden, + * we do not accept SYN to deprecated interface + * address to prevent any new inbound connection from + * getting established. + * When we do not accept SYN, we send a TCP RST, + * with deprecated source address (instead of dropping + * it). We compromise it as it is much better for peer + * to send a RST, and RST will be the final packet + * for the exchange. + * + * If we do not forbid deprecated addresses, we accept + * the SYN packet. RFC2462 does not suggest dropping + * SYN in this case. + * If we decipher RFC2462 5.5.4, it says like this: + * 1. use of deprecated addr with existing + * communication is okay - "SHOULD continue to be + * used" + * 2. use of it with new communication: + * (2a) "SHOULD NOT be used if alternate address + * with sufficient scope is available" + * (2b) nothing mentioned otherwise. + * Here we fall into (2b) case as we have no choice in + * our source address selection - we must obey the peer. + * + * The wording in RFC2462 is confusing, and there are + * multiple description text for deprecated address + * handling - worse, they are not exactly the same. + * I believe 5.5.4 is the best one, so we follow 5.5.4. + */ + if (isipv6 && !V_ip6_use_deprecated) { + struct in6_ifaddr *ia6; + + ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia6 != NULL && + (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { + ifa_free(&ia6->ia_ifa); + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt to deprecated " + "IPv6 address rejected\n", + s, __func__); + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + if (ia6) + ifa_free(&ia6->ia_ifa); + } +#endif /* INET6 */ +#endif + /* + * Basic sanity checks on incoming SYN requests: + * Don't respond if the destination is a link layer + * broadcast according to RFC1122 4.2.3.10, p. 104. + * If it is from this socket it must be forged. + * Don't respond if the source or destination is a + * global or subnet broad- or multicast address. + * Note that it is quite possible to receive unicast + * link-layer packets with a broadcast IP address. Use + * in_broadcast() to find them. + */ +/* + if (m->m_flags & (M_BCAST|M_MCAST)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from broad- or multicast " + "link layer address ignored\n", s, __func__); + goto dropunlock; + } +*/ +//#ifdef INET6 +// if (isipv6) { + if (th->th_dport == th->th_sport && + IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { + //if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Listen socket: " + "Connection attempt to/from self " + "ignored\n", /*s*/ "note", __func__); + goto dropunlock; + } + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || + IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + //if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Listen socket: " + "Connection attempt from/to multicast " + "address ignored\n", /*s*/ "note", __func__); + goto dropunlock; + } +// } +//#endif +#if 0 +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + if (th->th_dport == th->th_sport && + ip->ip_dst.s_addr == ip->ip_src.s_addr) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from/to self " + "ignored\n", s, __func__); + goto dropunlock; + } + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || + IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || + ip->ip_src.s_addr == htonl(INADDR_BROADCAST) || + in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt from/to broad- " + "or multicast address ignored\n", + s, __func__); + goto dropunlock; + } + } +#endif + /* + * SYN appears to be valid. Create compressed TCP state + * for syncache. + */ +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, + (void *)tcp_saveipgen, &tcp_savetcp, 0); +#endif + TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); +#endif + tcp_dooptions(&to, optp, optlen, TO_SYN); + + //syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); + // INSTEAD OF ADDING TO THE SYNCACHE, INITIALIZE THE NEW SOCKET RIGHT AWAY + // CODE IS TAKEN FROM THE syncache_socket FUNCTION + tp = accept_ready(tpl); // Try to allocate an active socket to accept into + if (tp == NULL) { + /* If we couldn't allocate, just ignore the SYN. */ + return IPPROTO_DONE; + } + tcp_state_change(tp, TCPS_SYN_RECEIVED); + tpmarkpassiveopen(tp); + tp->t_flags |= TF_ACKNOW; // my addition + tp->iss = tcp_new_isn(tp); + tp->irs = th->th_seq; + tcp_rcvseqinit(tp); + tcp_sendseqinit(tp); + tp->snd_wl1 = th->th_seq; + tp->snd_max = tp->iss/* + 1*/; + tp->snd_nxt = tp->iss/* + 1*/; + tp->rcv_up = th->th_seq + 1; + tp->rcv_wnd = imin(imax(cbuf_free_space(&tp->recvbuf), 0), TCP_MAXWIN); + tp->rcv_adv += tp->rcv_wnd; + tp->last_ack_sent = tp->rcv_nxt; + memcpy(&tp->faddr, &ip6->ip6_src, sizeof(tp->faddr)); + tp->fport = th->th_sport; + tp->lport = tpl->lport; + + tp->t_flags = tp->t_flags & (TF_NOPUSH | TF_NODELAY | TF_NOOPT); +// tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY); +// if (sc->sc_flags & SCF_NOOPT) +// tp->t_flags |= TF_NOOPT; +// else { + if (!(tp->t_flags & TF_NOOPT) && V_tcp_do_rfc1323) { + if (/*sc->sc_flags & SCF_WINSCALE*/to.to_flags & TOF_SCALE) { + int wscale = 0; + + /* + * Pick the smallest possible scaling factor that + * will still allow us to scale up to sb_max, aka + * kern.ipc.maxsockbuf. + * + * We do this because there are broken firewalls that + * will corrupt the window scale option, leading to + * the other endpoint believing that our advertised + * window is unscaled. At scale factors larger than + * 5 the unscaled window will drop below 1500 bytes, + * leading to serious problems when traversing these + * broken firewalls. + * + * With the default maxsockbuf of 256K, a scale factor + * of 3 will be chosen by this algorithm. Those who + * choose a larger maxsockbuf should watch out + * for the compatiblity problems mentioned above. + * + * RFC1323: The Window field in a SYN (i.e., a + * or ) segment itself is never scaled. + */ + /* + while (wscale < TCP_MAX_WINSHIFT && + (TCP_MAXWIN << wscale) < sb_max) + wscale++; + */ + /* I have ~30K of memory. There's no reason I would need + window scaling. */ + + tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE; + tp->snd_scale = /*sc->sc_requested_s_scale*/to.to_wscale; + tp->request_r_scale = wscale; + } + if (/*sc->sc_flags & SCF_TIMESTAMP*/to.to_flags & TOF_TS) { + tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; + tp->ts_recent = /*sc->sc_tsreflect*/to.to_tsval; + tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_offset = /*sc->sc_tsoff*/0; // No syncookies, so this should always be 0 + } +#if 0 + #ifdef TCP_SIGNATURE + if (sc->sc_flags & SCF_SIGNATURE) + tp->t_flags |= TF_SIGNATURE; + #endif +#endif + if (/*sc->sc_flags & SCF_SACK*/ to.to_flags & TOF_SACKPERM) + tp->t_flags |= TF_SACK_PERMIT; + } + if (/*sc->sc_flags & SCF_ECN*/(th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn) + tp->t_flags |= TF_ECN_PERMIT; + + /* + * Set up MSS and get cached values from tcp_hostcache. + * This might overwrite some of the defaults we just set. + */ + tcp_mss(tp, /*sc->sc_peer_mss*/to.to_mss); + + /* + * Entry added to syncache and mbuf consumed. + * Only the listen socket is unlocked by syncache_add(). + */ +// if (ti_locked == TI_RLOCKED) { +// INP_INFO_RUNLOCK(&V_tcbinfo); +// ti_locked = TI_UNLOCKED; +// } +// INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + tcp_output(tp); // to send the SYN-ACK + + tp->accepted_from = tpl; + return (IPPROTO_DONE); + } else if (tp->t_state == TCPS_LISTEN) { + /* + * When a listen socket is torn down the SO_ACCEPTCONN + * flag is removed first while connections are drained + * from the accept queue in a unlock/lock cycle of the + * ACCEPT_LOCK, opening a race condition allowing a SYN + * attempt go through unhandled. + */ + goto dropunlock; + } + + KASSERT(tp, ("tp is still NULL!")); + +#if 0 // DON'T DO TCP SIGNATURE +#ifdef TCP_SIGNATURE + if (sig_checked == 0) { + tcp_dooptions(&to, optp, optlen, + (thflags & TH_SYN) ? TO_SYN : 0); + if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, + th, tp->t_flags)) { + + /* + * In SYN_SENT state if it receives an RST, it is + * allowed for further processing. + */ + if ((thflags & TH_RST) == 0 || + (tp->t_state == TCPS_SYN_SENT) == 0) + goto dropunlock; + } + sig_checked = 1; + } +#endif +#endif +// TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); + + /* + * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later + * state. tcp_do_segment() always consumes the mbuf chain, unlocks + * the inpcb, and unlocks pcbinfo. + */ + tcp_do_segment(ip6, th, tp, drop_hdrlen, tlen, iptos, signals, freedentries); +// INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + return (IPPROTO_DONE); + +dropwithreset: +/* + TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); + + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } +*/ +#if 0 +#ifdef INVARIANTS + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif +#endif +// if (inp != NULL) { + if (tp) { + tcp_dropwithreset(ip6, th, tp, tlen, rstreason); +// INP_WUNLOCK(inp); + } else + tcp_dropwithreset(ip6, th, NULL, tlen, rstreason); +// m = NULL; /* mbuf chain got consumed. */ + goto drop; + +dropunlock: +#if 0 + if (m != NULL) + TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th); + + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } +#ifdef INVARIANTS + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif + + if (inp != NULL) + INP_WUNLOCK(inp); +#endif +drop: +#if 0 // I BELIEVE THAT THE MEMORY MANAGEMENT IS DONE FOR ME BY THE IP STACK + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + if (s != NULL) + free(s, M_TCPLOG); + if (m != NULL) + m_freem(m); +#endif + return (IPPROTO_DONE); +} + +/* Original signature +static void +tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, + int ti_locked) +*/ +static void +tcp_do_segment(struct ip6_hdr* ip6, struct tcphdr *th, + struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, + uint8_t* signals, uint32_t* freedentries) +{ + int thflags, acked, ourfinisacked, needoutput = 0; + int rstreason, todrop, win; + u_long tiwin; + //char *s; + //struct in_conninfo *inc; + //struct mbuf *mfree; + struct tcpopt to; + uint32_t ticks = get_ticks(); +#if 0 +#ifdef TCPDEBUG + /* + * The size of tcp_saveipgen must be the size of the max ip header, + * now IPv6. + */ + u_char tcp_saveipgen[IP6_HDR_LEN]; + struct tcphdr tcp_savetcp; + short ostate = 0; +#endif +#endif + thflags = th->th_flags; + //inc = &tp->t_inpcb->inp_inc; + tp->sackhint.last_sack_ack = 0; + + /* + * If this is either a state-changing packet or current state isn't + * established, we require a write lock on tcbinfo. Otherwise, we + * allow the tcbinfo to be in either alocked or unlocked, as the + * caller may have unnecessarily acquired a write lock due to a race. + */ +#if 0 // OMIT THEIR SYNCHRONIZATION + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || + tp->t_state != TCPS_ESTABLISHED) { + KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " + "SYN/FIN/RST/!EST", __func__, ti_locked)); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + } else { +#ifdef INVARIANTS + if (ti_locked == TI_RLOCKED) + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif + } + INP_WLOCK_ASSERT(tp->t_inpcb); +#endif + KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", + __func__)); + KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", + __func__)); +#if 0 +#ifdef TCPPCAP + /* Save segment, if requested. */ + tcp_pcap_add(th, m, &(tp->t_inpkts)); +#endif +#endif + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + * XXX: This should be done after segment + * validation to ignore broken/spoofed segs. + */ + tp->t_rcvtime = ticks; + if (TCPS_HAVEESTABLISHED(tp->t_state)) + tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); + + /* + * Scale up the window into a 32-bit value. + * For the SYN_SENT state the scale is zero. + */ + tiwin = th->th_win << tp->snd_scale; + + /* + * TCP ECN processing. + */ + if (tp->t_flags & TF_ECN_PERMIT) { + if (thflags & TH_CWR) + tp->t_flags &= ~TF_ECN_SND_ECE; + switch (iptos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + tp->t_flags |= TF_ECN_SND_ECE; +// TCPSTAT_INC(tcps_ecn_ce); + break; + case IPTOS_ECN_ECT0: +// TCPSTAT_INC(tcps_ecn_ect0); + break; + case IPTOS_ECN_ECT1: +// TCPSTAT_INC(tcps_ecn_ect1); + break; + } +#if 0 + /* Process a packet differently from RFC3168. */ + cc_ecnpkt_handler(tp, th, iptos); + + /* Congestion experienced. */ + if (thflags & TH_ECE) { + cc_cong_signal(tp, th, CC_ECN); + } +#endif + } + + /* + * Parse options on any incoming segment. + */ + tcp_dooptions(&to, (u_char *)(th + 1), + (th->th_off << 2) - sizeof(struct tcphdr), + (thflags & TH_SYN) ? TO_SYN : 0); + + /* + * If echoed timestamp is later than the current time, + * fall back to non RFC1323 RTT calculation. Normalize + * timestamp if syncookies were used when this connection + * was established. + */ + + if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { + to.to_tsecr -= tp->ts_offset; + if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) + to.to_tsecr = 0; + } + /* + * If timestamps were negotiated during SYN/ACK they should + * appear on every segment during this session and vice versa. + */ + if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) { +// if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Timestamp missing, " + "no action\n", /*s*/"note", __func__); +// free(s, M_TCPLOG); +// } + } + if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) { +// if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + DEBUG(/*log(LOG_DEBUG, */"%s; %s: Timestamp not expected, " + "no action\n", /*s*/"note", __func__); +// free(s, M_TCPLOG); +// } + } + + /* + * Process options only when we get SYN/ACK back. The SYN case + * for incoming connections is handled in tcp_syncache. + * According to RFC1323 the window field in a SYN (i.e., a + * or ) segment itself is never scaled. + * XXX this is traditional behavior, may need to be cleaned up. + */ + if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { + if ((to.to_flags & TOF_SCALE) && + (tp->t_flags & TF_REQ_SCALE)) { + tp->t_flags |= TF_RCVD_SCALE; + tp->snd_scale = to.to_wscale; + } + /* + * Initial send window. It will be updated with + * the next incoming segment to the scaled value. + */ + tp->snd_wnd = th->th_win; + if (to.to_flags & TOF_TS) { + tp->t_flags |= TF_RCVD_TSTMP; + tp->ts_recent = to.to_tsval; + tp->ts_recent_age = tcp_ts_getticks(); + } + if (to.to_flags & TOF_MSS) + tcp_mss(tp, to.to_mss); + if ((tp->t_flags & TF_SACK_PERMIT) && + (to.to_flags & TOF_SACKPERM) == 0) + tp->t_flags &= ~TF_SACK_PERMIT; + } + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + * Make sure that the hidden state-flags are also off. + * Since we check for TCPS_ESTABLISHED first, it can only + * be TH_NEEDSYN. + */ + if (tp->t_state == TCPS_ESTABLISHED && + th->th_seq == tp->rcv_nxt && + (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && + tp->snd_nxt == tp->snd_max && + tiwin && tiwin == tp->snd_wnd && + ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && + /*LIST_EMPTY(&tp->t_segq) &&*/ + bmp_isempty(tp->reassbmp, REASSBMP_SIZE(tp)) && // Added by Sam + ((to.to_flags & TOF_TS) == 0 || + TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { + + /* + * If last ACK falls within this segment's sequence numbers, + * record the timestamp. + * NOTE that the test is modified according to the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). + */ + if ((to.to_flags & TOF_TS) != 0 && + SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { + tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent = to.to_tsval; + } + + if (tlen == 0) { + if (SEQ_GT(th->th_ack, tp->snd_una) && + SEQ_LEQ(th->th_ack, tp->snd_max) && + !IN_RECOVERY(tp->t_flags) && + (to.to_flags & TOF_SACK) == 0 && + TAILQ_EMPTY(&tp->snd_holes)) { + /* + * This is a pure ack for outstanding data. + */ + uint32_t poppedbytes; // Added by Sam + int ntraversed = 0; // Added by Sam +/* + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + + TCPSTAT_INC(tcps_predack); +*/ + /* + * "bad retransmit" recovery. + */ + if (tp->t_rxtshift == 1 && + tp->t_flags & TF_PREVVALID && + (int)(ticks - tp->t_badrxtwin) < 0) { + cc_cong_signal(tp, th, CC_RTO_ERR); + } + + /* + * Recalculate the transmit timer / rtt. + * + * Some boxes send broken timestamp replies + * during the SYN+ACK phase, ignore + * timestamps of 0 or we could calculate a + * huge RTT and blow up the retransmit timer. + */ + + if ((to.to_flags & TOF_TS) != 0 && + to.to_tsecr) { + u_int t; + + t = tcp_ts_getticks() - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > t) + tp->t_rttlow = t; + tcp_xmit_timer(tp, + TCP_TS_TO_TICKS(t) + 1); + } else if (tp->t_rtttime && + SEQ_GT(th->th_ack, tp->t_rtseq)) { + if (!tp->t_rttlow || + tp->t_rttlow > ticks - tp->t_rtttime) + tp->t_rttlow = ticks - tp->t_rtttime; + tcp_xmit_timer(tp, + ticks - tp->t_rtttime); + } + + acked = BYTES_THIS_ACK(tp, th); + + /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ +// hhook_run_tcp_est_in(tp, th, &to); + +// TCPSTAT_INC(tcps_rcvackpack); +// TCPSTAT_ADD(tcps_rcvackbyte, acked); +// sbdrop(&so->so_snd, acked); + poppedbytes = lbuf_pop(&tp->sendbuf, acked, &ntraversed); + KASSERT(poppedbytes == acked, ("More bytes were acked than are in the send buffer")); + *freedentries += ntraversed; + if (SEQ_GT(tp->snd_una, tp->snd_recover) && + SEQ_LEQ(th->th_ack, tp->snd_recover)) + tp->snd_recover = th->th_ack - 1; + + /* + * Let the congestion control algorithm update + * congestion control related information. This + * typically means increasing the congestion + * window. + */ + cc_ack_received(tp, th, CC_ACK); + + tp->snd_una = th->th_ack; + /* + * Pull snd_wl2 up to prevent seq wrap relative + * to th_ack. + */ + tp->snd_wl2 = th->th_ack; + tp->t_dupacks = 0; +// m_freem(m); + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let tcp_output + * decide between more output or persist. + */ +#if 0 +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, + (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + TCP_PROBE3(debug__input, tp, th, + mtod(m, const char *)); +#endif + if (tp->snd_una == tp->snd_max) + tcp_timer_activate(tp, TT_REXMT, 0); + else if (!tcp_timer_active(tp, TT_PERSIST)) + tcp_timer_activate(tp, TT_REXMT, + tp->t_rxtcur); +// sowwakeup(so); + if (lbuf_used_space(&tp->sendbuf)) +// if (sbavail(&so->so_snd)) + (void) tcp_output(tp); + goto check_delack; + } + } else if (th->th_ack == tp->snd_una && + tlen <= /*sbspace(&so->so_rcv)*/cbuf_free_space(&tp->recvbuf)) { + //int newsize = 0; /* automatic sockbuf scaling */ + + /* + * This is a pure, in-sequence data packet with + * nothing on the reassembly queue and we have enough + * buffer space to take it. + */ +#if 0 + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; +#endif + /* Clean receiver SACK report if present */ + if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks) + tcp_clean_sackreport(tp); +// TCPSTAT_INC(tcps_preddat); + + tp->rcv_nxt += tlen; + /* + * Pull snd_wl1 up to prevent seq wrap relative to + * th_seq. + */ + tp->snd_wl1 = th->th_seq; + /* + * Pull rcv_up up to prevent seq wrap relative to + * rcv_nxt. + */ + tp->rcv_up = tp->rcv_nxt; +#if 0 + TCPSTAT_INC(tcps_rcvpack); + TCPSTAT_ADD(tcps_rcvbyte, tlen); +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, + (void *)tcp_saveipgen, &tcp_savetcp, 0); +#endif + TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); +#endif + /* + * Automatic sizing of receive socket buffer. Often the send + * buffer size is not optimally adjusted to the actual network + * conditions at hand (delay bandwidth product). Setting the + * buffer size too small limits throughput on links with high + * bandwidth and high delay (eg. trans-continental/oceanic links). + * + * On the receive side the socket buffer memory is only rarely + * used to any significant extent. This allows us to be much + * more aggressive in scaling the receive socket buffer. For + * the case that the buffer space is actually used to a large + * extent and we run out of kernel memory we can simply drop + * the new segments; TCP on the sender will just retransmit it + * later. Setting the buffer size too big may only consume too + * much kernel memory if the application doesn't read() from + * the socket or packet loss or reordering makes use of the + * reassembly queue. + * + * The criteria to step up the receive buffer one notch are: + * 1. Application has not set receive buffer size with + * SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. + * 2. the number of bytes received during the time it takes + * one timestamp to be reflected back to us (the RTT); + * 3. received bytes per RTT is within seven eighth of the + * current socket buffer size; + * 4. receive buffer size has not hit maximal automatic size; + * + * This algorithm does one step per RTT at most and only if + * we receive a bulk stream w/o packet losses or reorderings. + * Shrinking the buffer during idle times is not necessary as + * it doesn't consume any memory when idle. + * + * TODO: Only step up if the application is actually serving + * the buffer to better manage the socket buffer resources. + */ +#if 0 // Don't bother with this; the receive buffer is statically allocated and can't be resized + if (V_tcp_do_autorcvbuf && + (to.to_flags & TOF_TS) && + to.to_tsecr && + (so->so_rcv.sb_flags & SB_AUTOSIZE)) { + if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && + to.to_tsecr - tp->rfbuf_ts < hz) { + if (tp->rfbuf_cnt > + (so->so_rcv.sb_hiwat / 8 * 7) && + so->so_rcv.sb_hiwat < + V_tcp_autorcvbuf_max) { + newsize = + min(so->so_rcv.sb_hiwat + + V_tcp_autorcvbuf_inc, + V_tcp_autorcvbuf_max); + } + /* Start over with next RTT. */ + tp->rfbuf_ts = 0; + tp->rfbuf_cnt = 0; + } else + tp->rfbuf_cnt += tlen; /* add up */ + } +#endif + /* Add data to socket buffer. */ +// SOCKBUF_LOCK(&so->so_rcv); +/* if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + m_freem(m); + } else { */ + /* + * Set new socket buffer size. + * Give up when limit is reached. + */ +#if 0 // The circular buffer isn't resizable + if (newsize) + if (!sbreserve_locked(&so->so_rcv, + newsize, so, NULL)) + so->so_rcv.sb_flags &= ~SB_AUTOSIZE; + m_adj(m, drop_hdrlen); /* delayed header drop */ +#endif + /* Sam: We just add the offset when copying into the receive buffer, + rather than adding it to the th pointer (which would be the closest + thing I could do to trimming an mbuf). */ +// sbappendstream_locked(&so->so_rcv, m, 0); + if (!tpiscantrcv(tp)) { + size_t usedbefore = cbuf_used_space(&tp->recvbuf); + cbuf_write(&tp->recvbuf, ((uint8_t*) th) + drop_hdrlen, tlen); + if (usedbefore == 0 && tlen > 0) { + *signals |= SIG_RECVBUF_NOTEMPTY; + } + } else { + /* Sam: We already know tlen != 0, so if we got here, then it means + that we got data after we called SHUT_RD, or after receiving a FIN. + I'm going to drop the connection in this case. */ + tcp_drop(tp, ECONNABORTED); + goto drop; + } +// } + /* NB: sorwakeup_locked() does an implicit unlock. */ +// sorwakeup_locked(so); + if (DELAY_ACK(tp, tlen)) { + tp->t_flags |= TF_DELACK; + } else { + tp->t_flags |= TF_ACKNOW; + tcp_output(tp); + } + goto check_delack; + } + } + + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ +// win = sbspace(&so->so_rcv); + win = cbuf_free_space(&tp->recvbuf); + if (win < 0) + win = 0; + tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + + /* Reset receive buffer auto scaling when not in bulk receive mode. */ +#if 0 + tp->rfbuf_ts = 0; + tp->rfbuf_cnt = 0; +#endif + + switch (tp->t_state) { + + /* + * If the state is SYN_RECEIVED: + * if seg contains an ACK, but not for our SYN/ACK, send a RST. + * (Added by Sam) if seg is resending the original SYN, resend the SYN/ACK + */ + case TCPS_SYN_RECEIVED: + if ((thflags & TH_ACK) && + (SEQ_LEQ(th->th_ack, tp->snd_una) || + SEQ_GT(th->th_ack, tp->snd_max))) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } else if ((thflags & TH_SYN) && !(thflags & TH_ACK) && (th->th_seq == tp->irs)) { // this clause was added by Sam + //tp->snd_nxt = tp->snd_una; // Added by Sam, then commented out + tp->t_flags |= TF_ACKNOW;//tcp_output(tp); + } + break; + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if seg contains an ECE and ECN support is enabled, the stream + * is ECN capable. + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ((thflags & TH_ACK) && + (SEQ_LEQ(th->th_ack, tp->iss) || + SEQ_GT(th->th_ack, tp->snd_max))) { + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } + if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) { +// TCP_PROBE5(connect__refused, NULL, tp, +// mtod(m, const char *), tp, th); + tp = tcp_drop(tp, ECONNREFUSED); + } + if (thflags & TH_RST) + goto drop; + if (!(thflags & TH_SYN)) + goto drop; + + tp->irs = th->th_seq; + tcp_rcvseqinit(tp); + if (thflags & TH_ACK) { +#if 0 + TCPSTAT_INC(tcps_connects); + soisconnected(so); +#ifdef MAC + mac_socketpeer_set_from_mbuf(m, so); +#endif +#endif + /* Do window scaling on this connection? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->rcv_scale = tp->request_r_scale; + } + tp->rcv_adv += imin(tp->rcv_wnd, + TCP_MAXWIN << tp->rcv_scale); + tp->snd_una++; /* SYN is acked */ + /* + * If there's data, delay ACK; if there's also a FIN + * ACKNOW will be turned on later. + */ + if (DELAY_ACK(tp, tlen) && tlen != 0) + tcp_timer_activate(tp, TT_DELACK, + tcp_delacktime); + else + tp->t_flags |= TF_ACKNOW; + + if ((thflags & TH_ECE) && V_tcp_do_ecn) { + tp->t_flags |= TF_ECN_PERMIT; +// TCPSTAT_INC(tcps_ecn_shs); + } + + /* + * Received in SYN_SENT[*] state. + * Transitions: + * SYN_SENT --> ESTABLISHED + * SYN_SENT* --> FIN_WAIT_1 + */ + tp->t_starttime = ticks; + if (tp->t_flags & TF_NEEDFIN) { + tcp_state_change(tp, TCPS_FIN_WAIT_1); + tp->t_flags &= ~TF_NEEDFIN; + thflags &= ~TH_SYN; + } else { + tcp_state_change(tp, TCPS_ESTABLISHED); + *signals |= SIG_CONN_ESTABLISHED; +// TCP_PROBE5(connect__established, NULL, tp, +// mtod(m, const char *), tp, th); + cc_conn_init(tp); + tcp_timer_activate(tp, TT_KEEP, + TP_KEEPIDLE(tp)); + } + } else { + /* + * Received initial SYN in SYN-SENT[*] state => + * simultaneous open. + * If it succeeds, connection is * half-synchronized. + * Otherwise, do 3-way handshake: + * SYN-SENT -> SYN-RECEIVED + * SYN-SENT* -> SYN-RECEIVED* + */ + tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); + tcp_timer_activate(tp, TT_REXMT, 0); + tcp_state_change(tp, TCPS_SYN_RECEIVED); + tp->snd_nxt--; // Sam: We would have incremented snd_nxt in tcp_output when we sent the original SYN, so decrement it here + } +/* + KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: " + "ti_locked %d", __func__, ti_locked)); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); +*/ + /* + * Advance th->th_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + th->th_seq++; + if (tlen > tp->rcv_wnd) { + todrop = tlen - tp->rcv_wnd; +#if 0 // m_adj just trims an mbuf. We can just read less, so this isn't necessary + m_adj(m, -todrop); +#endif + tlen = tp->rcv_wnd; + thflags &= ~TH_FIN; +// TCPSTAT_INC(tcps_rcvpackafterwin); +// TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop); + } + tp->snd_wl1 = th->th_seq - 1; + tp->rcv_up = th->th_seq; + /* + * Client side of transaction: already sent SYN and data. + * If the remote host used T/TCP to validate the SYN, + * our data will be ACK'd; if so, enter normal data segment + * processing in the middle of step 5, ack processing. + * Otherwise, goto step 6. + */ + if (thflags & TH_ACK) + goto process_ACK; + + goto step6; + + /* + * If the state is LAST_ACK or CLOSING or TIME_WAIT: + * do normal processing. + * + * NB: Leftover from RFC1644 T/TCP. Cases to be reused later. + */ + case TCPS_LAST_ACK: + case TCPS_CLOSING: + break; /* continue normal processing */ + } + + /* + * States other than LISTEN or SYN_SENT. + * First check the RST flag and sequence number since reset segments + * are exempt from the timestamp and connection count tests. This + * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix + * below which allowed reset segments in half the sequence space + * to fall though and be processed (which gives forged reset + * segments with a random sequence number a 50 percent chance of + * killing a connection). + * Then check timestamp, if present. + * Then check the connection count, if present. + * Then check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + */ + if (thflags & TH_RST) { + /* + * RFC5961 Section 3.2 + * + * - RST drops connection only if SEG.SEQ == RCV.NXT. + * - If RST is in window, we send challenge ACK. + * + * Note: to take into account delayed ACKs, we should + * test against last_ack_sent instead of rcv_nxt. + * Note 2: we handle special case of closed window, not + * covered by the RFC. + */ + if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && + SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || + (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) { +/* + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED, + ("%s: TH_RST ti_locked %d, th %p tp %p", + __func__, ti_locked, th, tp)); + KASSERT(tp->t_state != TCPS_SYN_SENT, + ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p", + __func__, th, tp)); +*/ + if (/*V_tcp_insecure_rst ||*/ + tp->last_ack_sent == th->th_seq) { + int droperror = 0; +// TCPSTAT_INC(tcps_drops); + /* Drop the connection. */ + switch (tp->t_state) { + case TCPS_SYN_RECEIVED: +// so->so_error = ECONNREFUSED; + droperror = ECONNREFUSED; + goto close; + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: +// so->so_error = ECONNRESET; + droperror = ECONNRESET; + close: + tcp_state_change(tp, TCPS_CLOSED); + /* FALLTHROUGH */ + default: + tp = tcp_close(tp); + connection_lost(tp, droperror); + } + } else { +// TCPSTAT_INC(tcps_badrst); + /* Send challenge ACK. */ + tcp_respond(tp, ip6, th, tp->rcv_nxt, tp->snd_nxt, TH_ACK); + tp->last_ack_sent = tp->rcv_nxt; +// m = NULL; + } + } + goto drop; + } + + /* + * RFC5961 Section 4.2 + * Send challenge ACK for any SYN in synchronized state. + * (Added by Sam) Don't send if in SYN-RECEIVED + */ + if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT && tp->t_state != TCP6S_SYN_RECEIVED) { +/* KASSERT(ti_locked == TI_RLOCKED, + ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked)); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + + TCPSTAT_INC(tcps_badsyn);*/ +/* DON'T BOTHER WITH THE ORIGINAL INSECURE WAY. ALWAYS SEND THE CHALLENGE ACK. + if (V_tcp_insecure_syn && + SEQ_GEQ(th->th_seq, tp->last_ack_sent) && + SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) { + tp = tcp_drop(tp, ECONNRESET); + rstreason = BANDLIM_UNLIMITED; + } else {*/ + /* Send challenge ACK. */ + DEBUG("Sending challenge ACK\n"); + tcp_respond(tp, ip6, th, tp->rcv_nxt, tp->snd_nxt, TH_ACK); + tp->last_ack_sent = tp->rcv_nxt; +// m = NULL; +// } + goto drop; + } + + /* + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ + if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && + TSTMP_LT(to.to_tsval, tp->ts_recent)) { + + /* Check to see if ts_recent is over 24 days old. */ + if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { + /* + * Invalidate ts_recent. If this segment updates + * ts_recent, the age will be reset later and ts_recent + * will get a valid value. If it does not, setting + * ts_recent to zero will at least satisfy the + * requirement that zero be placed in the timestamp + * echo reply when ts_recent isn't valid. The + * age isn't reset until we get a valid ts_recent + * because we don't want out-of-order segments to be + * dropped when ts_recent is old. + */ + tp->ts_recent = 0; + } else { +// TCPSTAT_INC(tcps_rcvduppack); +// TCPSTAT_ADD(tcps_rcvdupbyte, tlen); +// TCPSTAT_INC(tcps_pawsdrop); + if (tlen) + goto dropafterack; + goto drop; + } + } + + /* + * In the SYN-RECEIVED state, validate that the packet belongs to + * this connection before trimming the data to fit the receive + * window. Check the sequence number versus IRS since we know + * the sequence numbers haven't wrapped. This is a partial fix + * for the "LAND" DoS attack. + */ + if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } + + todrop = tp->rcv_nxt - th->th_seq; + if (todrop > 0) { + if (thflags & TH_SYN) { + thflags &= ~TH_SYN; + th->th_seq++; + if (th->th_urp > 1) + th->th_urp--; + else + thflags &= ~TH_URG; + todrop--; + } + /* + * Following if statement from Stevens, vol. 2, p. 960. + */ + if (todrop > tlen + || (todrop == tlen && (thflags & TH_FIN) == 0)) { + /* + * Any valid FIN must be to the left of the window. + * At this point the FIN must be a duplicate or out + * of sequence; drop it. + */ + thflags &= ~TH_FIN; + + /* + * Send an ACK to resynchronize and drop any data. + * But keep on processing for RST or ACK. + */ + tp->t_flags |= TF_ACKNOW; + todrop = tlen; +// TCPSTAT_INC(tcps_rcvduppack); +// TCPSTAT_ADD(tcps_rcvdupbyte, todrop); + } else { +// TCPSTAT_INC(tcps_rcvpartduppack); +// TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop); + } + drop_hdrlen += todrop; /* drop from the top afterwards */ + th->th_seq += todrop; + tlen -= todrop; + if (th->th_urp > todrop) + th->th_urp -= todrop; + else { + thflags &= ~TH_URG; + th->th_urp = 0; + } + } + + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ +#if 0 // I don't have to worry about user process state + if ((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT && tlen) { + KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && " + "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked)); + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data " + "after socket was closed, " + "sending RST and removing tcpcb\n", + s, __func__, tcpstates[tp->t_state], tlen); + free(s, M_TCPLOG); + } + tp = tcp_close(tp); + TCPSTAT_INC(tcps_rcvafterclose); + rstreason = BANDLIM_UNLIMITED; + goto dropwithreset; + } +#endif + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd); + if (todrop > 0) { +// TCPSTAT_INC(tcps_rcvpackafterwin); + if (todrop >= tlen) { +// TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen); + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { + tp->t_flags |= TF_ACKNOW; +// TCPSTAT_INC(tcps_rcvwinprobe); + } else + goto dropafterack; + }/* else + TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);*/ +#if 0 // Again, we don't need to trim an mbuf + m_adj(m, -todrop); +#endif + tlen -= todrop; + thflags &= ~(TH_PUSH|TH_FIN); + } + + /* + * If last ACK falls within this segment's sequence numbers, + * record its timestamp. + * NOTE: + * 1) That the test incorporates suggestions from the latest + * proposal of the tcplw@cray.com list (Braden 1993/04/26). + * 2) That updating only on newer timestamps interferes with + * our earlier PAWS tests, so this check should be solely + * predicated on the sequence space of this segment. + * 3) That we modify the segment boundary check to be + * Last.ACK.Sent <= SEG.SEQ + SEG.Len + * instead of RFC1323's + * Last.ACK.Sent < SEG.SEQ + SEG.Len, + * This modified check allows us to overcome RFC1323's + * limitations as described in Stevens TCP/IP Illustrated + * Vol. 2 p.869. In such cases, we can still calculate the + * RTT correctly when RCV.NXT == Last.ACK.Sent. + */ + + if ((to.to_flags & TOF_TS) != 0 && + SEQ_LEQ(th->th_seq, tp->last_ack_sent) && + SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + + ((thflags & (TH_SYN|TH_FIN)) != 0))) { + tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent = to.to_tsval; + } + + /* + * If the ACK bit is off: if in SYN-RECEIVED state or SENDSYN + * flag is on (half-synchronized state), then queue data for + * later processing; else drop segment and return. + */ + if ((thflags & TH_ACK) == 0) { + if (tp->t_state == TCPS_SYN_RECEIVED || + (tp->t_flags & TF_NEEDSYN)) + goto step6; + else if (tp->t_flags & TF_ACKNOW) + goto dropafterack; + else + goto drop; + } + + DEBUG("Processing ACK\n"); + + /* + * Ack processing. + */ + switch (tp->t_state) { + + /* + * In SYN_RECEIVED state, the ack ACKs our SYN, so enter + * ESTABLISHED state and continue processing. + * The ACK was checked above. + */ + case TCPS_SYN_RECEIVED: + +// TCPSTAT_INC(tcps_connects); +// soisconnected(so); + /* Do window scaling? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->rcv_scale = tp->request_r_scale; + tp->snd_wnd = tiwin; + } + /* + * Make transitions: + * SYN-RECEIVED -> ESTABLISHED + * SYN-RECEIVED* -> FIN-WAIT-1 + */ + tp->t_starttime = ticks; + if (tp->t_flags & TF_NEEDFIN) { + tcp_state_change(tp, TCPS_FIN_WAIT_1); + tp->t_flags &= ~TF_NEEDFIN; + } else { + tcp_state_change(tp, TCPS_ESTABLISHED); + *signals |= SIG_CONN_ESTABLISHED; +// TCP_PROBE5(accept__established, NULL, tp, +// mtod(m, const char *), tp, th); + cc_conn_init(tp); + tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); + if (!tpispassiveopen(tp)) { // Added by Sam: Accounts for simultaneous open + // If this socket was opened actively, then the fact we are in SYN-RECEIVED indicates a simultaneous open + // Don't ACK the SYN-ACK, in that case (unless it contains data or something, which will be processed later) + tp->t_flags &= ~TF_ACKNOW; + } else { + bool accepted = accepted_connection(tp->accepted_from, tp, &ip6->ip6_src, th->th_sport); + if (!accepted) { + // Maybe I want to just silently frop the packet? + rstreason = ECONNREFUSED; + goto dropwithreset; + } + } + } + /* + * If segment contains data or ACK, will call tcp_reass() + * later; if not, do so now to pass queued data to user. + */ + if (tlen == 0 && (thflags & TH_FIN) == 0) + (void) tcp_reass(tp, (struct tcphdr *)0, 0, + (/*struct mbuf **/ uint8_t*)0, signals); + + tp->snd_wl1 = th->th_seq - 1; + /* FALLTHROUGH */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < th->th_ack <= tp->snd_max + * then advance tp->snd_una to th->th_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + if (SEQ_GT(th->th_ack, tp->snd_max)) { +// TCPSTAT_INC(tcps_rcvacktoomuch); + goto dropafterack; + } + + if ((tp->t_flags & TF_SACK_PERMIT) && + ((to.to_flags & TOF_SACK) || + !TAILQ_EMPTY(&tp->snd_holes))) + tcp_sack_doack(tp, &to, th->th_ack); + + /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ +// hhook_run_tcp_est_in(tp, th, &to); + + if (SEQ_LEQ(th->th_ack, tp->snd_una)) { + if (tlen == 0 && tiwin == tp->snd_wnd) { + /* + * If this is the first time we've seen a + * FIN from the remote, this is not a + * duplicate and it needs to be processed + * normally. This happens during a + * simultaneous close. + */ + if ((thflags & TH_FIN) && + (TCPS_HAVERCVDFIN(tp->t_state) == 0)) { + tp->t_dupacks = 0; + break; + } +// TCPSTAT_INC(tcps_rcvdupack); + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change and FIN isn't set), + * the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshhold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + * + * When using TCP ECN, notify the peer that + * we reduced the cwnd. + */ + if (!tcp_timer_active(tp, TT_REXMT) || + th->th_ack != tp->snd_una) + tp->t_dupacks = 0; + else if (++tp->t_dupacks > tcprexmtthresh || + IN_FASTRECOVERY(tp->t_flags)) { + cc_ack_received(tp, th, CC_DUPACK); + if ((tp->t_flags & TF_SACK_PERMIT) && + IN_FASTRECOVERY(tp->t_flags)) { + int awnd; + + /* + * Compute the amount of data in flight first. + * We can inject new data into the pipe iff + * we have less than 1/2 the original window's + * worth of data in flight. + */ + awnd = (tp->snd_nxt - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; + if (awnd < tp->snd_ssthresh) { + tp->snd_cwnd += tp->t_maxseg; + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + } + } else + tp->snd_cwnd += tp->t_maxseg; + (void) tcp_output(tp); + goto drop; + } else if (tp->t_dupacks == tcprexmtthresh) { + tcp_seq onxt = tp->snd_nxt; + + /* + * If we're doing sack, check to + * see if we're already in sack + * recovery. If we're not doing sack, + * check to see if we're in newreno + * recovery. + */ + if (tp->t_flags & TF_SACK_PERMIT) { + if (IN_FASTRECOVERY(tp->t_flags)) { + tp->t_dupacks = 0; + break; + } + } else { + if (SEQ_LEQ(th->th_ack, + tp->snd_recover)) { + tp->t_dupacks = 0; + break; + } + } + /* Congestion signal before ack. */ + cc_cong_signal(tp, th, CC_NDUPACK); + cc_ack_received(tp, th, CC_DUPACK); + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + + if (tp->t_flags & TF_SACK_PERMIT) { +// TCPSTAT_INC( +// tcps_sack_recovery_episode); + tp->sack_newdata = tp->snd_nxt; + tp->snd_cwnd = tp->t_maxseg; + (void) tcp_output(tp); + goto drop; + } + + tp->snd_nxt = th->th_ack; + tp->snd_cwnd = tp->t_maxseg; + (void) tcp_output(tp); +// KASSERT(tp->snd_limited <= 2, +// ("%s: tp->snd_limited too big", +// __func__)); + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * + (tp->t_dupacks - tp->snd_limited); + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (V_tcp_do_rfc3042) { + /* + * Process first and second duplicate + * ACKs. Each indicates a segment + * leaving the network, creating room + * for more. Make sure we can send a + * packet on reception of each duplicate + * ACK by increasing snd_cwnd by one + * segment. Restore the original + * snd_cwnd after packet transmission. + */ + u_long oldcwnd; + tcp_seq oldsndmax; + u_int sent; + int avail; + cc_ack_received(tp, th, CC_DUPACK); + oldcwnd = tp->snd_cwnd; + oldsndmax = tp->snd_max; + + KASSERT(tp->t_dupacks == 1 || + tp->t_dupacks == 2, + ("%s: dupacks not 1 or 2", + __func__)); + if (tp->t_dupacks == 1) + tp->snd_limited = 0; + tp->snd_cwnd = + (tp->snd_nxt - tp->snd_una) + + (tp->t_dupacks - tp->snd_limited) * + tp->t_maxseg; + /* + * Only call tcp_output when there + * is new data available to be sent. + * Otherwise we would send pure ACKs. + */ +// SOCKBUF_LOCK(&so->so_snd); +// avail = sbavail(&so->so_snd) - +// (tp->snd_nxt - tp->snd_una); + avail = lbuf_used_space(&tp->sendbuf) - (tp->snd_nxt - tp->snd_una); +// SOCKBUF_UNLOCK(&so->so_snd); + if (avail > 0) + (void) tcp_output(tp); + sent = tp->snd_max - oldsndmax; + if (sent > tp->t_maxseg) { + KASSERT((tp->t_dupacks == 2 && + tp->snd_limited == 0) || + (sent == tp->t_maxseg + 1 && + tp->t_flags & TF_SENTFIN), + ("%s: sent too much", + __func__)); + tp->snd_limited = 2; + } else if (sent > 0) + ++tp->snd_limited; + tp->snd_cwnd = oldcwnd; + goto drop; + } + } else + tp->t_dupacks = 0; + break; + } + + KASSERT(SEQ_GT(th->th_ack, tp->snd_una), + ("%s: th_ack <= snd_una", __func__)); + + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ + if (IN_FASTRECOVERY(tp->t_flags)) { + if (SEQ_LT(th->th_ack, tp->snd_recover)) { + if (tp->t_flags & TF_SACK_PERMIT) + tcp_sack_partialack(tp, th); + else + tcp_newreno_partial_ack(tp, th); + } else + cc_post_recovery(tp, th); + } + + tp->t_dupacks = 0; + /* + * If we reach this point, ACK is not a duplicate, + * i.e., it ACKs something we sent. + */ + if (tp->t_flags & TF_NEEDSYN) { + /* + * T/TCP: Connection was half-synchronized, and our + * SYN has been ACK'd (so connection is now fully + * synchronized). Go to non-starred state, + * increment snd_una for ACK of SYN, and check if + * we can do window scaling. + */ + tp->t_flags &= ~TF_NEEDSYN; + tp->snd_una++; + /* Do window scaling? */ + if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == + (TF_RCVD_SCALE|TF_REQ_SCALE)) { + tp->rcv_scale = tp->request_r_scale; + /* Send window already scaled. */ + } + } + +process_ACK: +// INP_WLOCK_ASSERT(tp->t_inpcb); + + acked = BYTES_THIS_ACK(tp, th); +// TCPSTAT_INC(tcps_rcvackpack); +// TCPSTAT_ADD(tcps_rcvackbyte, acked); + + DEBUG("Bytes acked: %d\n", acked); + /* + * If we just performed our first retransmit, and the ACK + * arrives within our recovery window, then it was a mistake + * to do the retransmit in the first place. Recover our + * original cwnd and ssthresh, and proceed to transmit where + * we left off. + */ + if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && + (int)(ticks - tp->t_badrxtwin) < 0) + cc_cong_signal(tp, th, CC_RTO_ERR); + + /* + * If we have a timestamp reply, update smoothed + * round trip time. If no timestamp is present but + * transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + * + * Some boxes send broken timestamp replies + * during the SYN+ACK phase, ignore + * timestamps of 0 or we could calculate a + * huge RTT and blow up the retransmit timer. + */ + + if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { + u_int t; + + t = tcp_ts_getticks() - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > t) + tp->t_rttlow = t; + tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); + } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { + if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) + tp->t_rttlow = ticks - tp->t_rtttime; + tcp_xmit_timer(tp, ticks - tp->t_rtttime); + } + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (th->th_ack == tp->snd_max) { + tcp_timer_activate(tp, TT_REXMT, 0); + needoutput = 1; + } else if (!tcp_timer_active(tp, TT_PERSIST)) { + tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + } + + /* + * If no data (only SYN) was ACK'd, + * skip rest of ACK processing. + */ + if (acked == 0) + goto step6; + + /* + * Let the congestion control algorithm update congestion + * control related information. This typically means increasing + * the congestion window. + */ + cc_ack_received(tp, th, CC_ACK); + +// SOCKBUF_LOCK(&so->so_snd); + if (acked > /*sbavail(&so->so_snd)*/lbuf_used_space(&tp->sendbuf)) { + uint32_t poppedbytes; + int ntraversed = 0; + uint32_t usedspace = lbuf_used_space(&tp->sendbuf); + //tp->snd_wnd -= sbavail(&so->so_snd); + tp->snd_wnd -= usedspace; +// mfree = sbcut_locked(&so->so_snd, +// (int)sbavail(&so->so_snd)); + poppedbytes = lbuf_pop(&tp->sendbuf, usedspace, &ntraversed); + KASSERT(poppedbytes == usedspace, ("Could not fully empty send buffer")); + *freedentries += ntraversed; + ourfinisacked = 1; + } else { +// mfree = sbcut_locked(&so->so_snd, acked); + int ntraversed = 0; + uint32_t poppedbytes = lbuf_pop(&tp->sendbuf, acked, &ntraversed); + KASSERT(poppedbytes == acked, ("Could not remove acked bytes from send buffer")); + *freedentries += ntraversed; + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + /* NB: sowwakeup_locked() does an implicit unlock. */ +// sowwakeup_locked(so); +// m_freem(mfree); + /* Detect una wraparound. */ + if (!IN_RECOVERY(tp->t_flags) && + SEQ_GT(tp->snd_una, tp->snd_recover) && + SEQ_LEQ(th->th_ack, tp->snd_recover)) + tp->snd_recover = th->th_ack - 1; + /* XXXLAS: Can this be moved up into cc_post_recovery? */ + if (IN_RECOVERY(tp->t_flags) && + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_RECOVERY(tp->t_flags); + } + tp->snd_una = th->th_ack; + if (tp->t_flags & TF_SACK_PERMIT) { + if (SEQ_GT(tp->snd_una, tp->snd_recover)) + tp->snd_recover = tp->snd_una; + } + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) { + + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + * + * XXXjl: + * we should release the tp also, and use a + * compressed state. + */ + if (/*so->so_rcv.sb_state & SBS_CANTRCVMORE*/ + tpiscantrcv(tp)) { +// soisdisconnected(so); + tcp_timer_activate(tp, TT_2MSL, + (tcp_fast_finwait2_recycle ? + tcp_finwait2_timeout : + TP_MAXIDLE(tp))); + } + tcp_state_change(tp, TCPS_FIN_WAIT_2); + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) { +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + tp->t_flags &= ~TF_ACKNOW; // Added by Sam: Don't send an ACK in the Time-wait state, since we don't want to ACK ACKs. + tcp_twstart(tp); +// INP_INFO_RUNLOCK(&V_tcbinfo); +// m_freem(m); + return; + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) { +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + goto drop; + } + break; + } + } + +step6: +// INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ((thflags & TH_ACK) && + (SEQ_LT(tp->snd_wl1, th->th_seq) || + (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || + (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { + /* keep track of pure window updates */ +// if (tlen == 0 && +// tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) +// TCPSTAT_INC(tcps_rcvwinupd); + tp->snd_wnd = tiwin; + tp->snd_wl1 = th->th_seq; + tp->snd_wl2 = th->th_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ +#if 0 // IGNORE THE URG FOR NOW + if ((thflags & TH_URG) && th->th_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ +// SOCKBUF_LOCK(&so->so_rcv); + if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { + th->th_urp = 0; /* XXX */ + thflags &= ~TH_URG; /* XXX */ +// SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ + goto dodata; /* XXX */ + } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { + tp->rcv_up = th->th_seq + th->th_urp; + so->so_oobmark = sbavail(&so->so_rcv) + + (tp->rcv_up - tp->rcv_nxt) - 1; + if (so->so_oobmark == 0) + so->so_rcv.sb_state |= SBS_RCVATMARK; + sohasoutofband(so); + tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); + } +// SOCKBUF_UNLOCK(&so->so_rcv); + /* + * Remove out of band data so doesn't get presented to user. + * This can happen independent of advancing the URG pointer, + * but if two URG's are pending at once, some out-of-band + * data may creep in... ick. + */ + if (th->th_urp <= (u_long)tlen && + !(so->so_options & SO_OOBINLINE)) { + /* hdr drop is delayed */ + tcp_pulloutofband(so, th, m, drop_hdrlen); + } + } else +#endif + { + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; + } +//dodata: /* XXX */ +// INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ((tlen || (thflags & TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + tcp_seq save_start = th->th_seq; +#if 0 /* Sam: We just add the offset when copying into the receive buffer, + * rather than adding it to the th pointer (which wouldn't work). */ + m_adj(m, drop_hdrlen); /* delayed header drop */ +#endif + /* + * Insert segment which includes th into TCP reassembly queue + * with control block tp. Set thflags to whether reassembly now + * includes a segment with FIN. This handles the common case + * inline (segment is the next to be received on an established + * connection, and the queue is empty), avoiding linkage into + * and removal from the queue and repetition of various + * conversions. + * Set DELACK for segments received in order, but ack + * immediately when segments are out of order (so + * fast retransmit can work). + */ + if (th->th_seq == tp->rcv_nxt && + /*LIST_EMPTY(&tp->t_segq) &&*/ + (tpiscantrcv(tp) || bmp_isempty(tp->reassbmp, REASSBMP_SIZE(tp))) && // Added by Sam + TCPS_HAVEESTABLISHED(tp->t_state)) { + if (DELAY_ACK(tp, tlen)) + tp->t_flags |= TF_DELACK; + else + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt += tlen; + thflags = th->th_flags & TH_FIN; +// TCPSTAT_INC(tcps_rcvpack); +// TCPSTAT_ADD(tcps_rcvbyte, tlen); +// SOCKBUF_LOCK(&so->so_rcv); +/* + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + m_freem(m); + else +*/ + //sbappendstream_locked(&so->so_rcv, m, 0); + if (!tpiscantrcv(tp)) { + size_t usedbefore = cbuf_used_space(&tp->recvbuf); + cbuf_write(&tp->recvbuf, ((uint8_t*) th) + drop_hdrlen, tlen); + if (usedbefore == 0 && tlen > 0) { + *signals |= SIG_RECVBUF_NOTEMPTY; + } + } else if (tlen > 0) { + /* Sam: We already know tlen != 0, so if we got here, then it means + that we got data after we called SHUT_RD, or after receiving a FIN. + I'm going to drop the connection in this case. */ + tcp_drop(tp, ECONNABORTED); + goto drop; + } + /* NB: sorwakeup_locked() does an implicit unlock. */ +// sorwakeup_locked(so); + } else if (tpiscantrcv(tp)) { + /* Sam: We will reach this point if we get out-of-order data on a socket which was + shut down with SHUT_RD, or where we already received a FIN. My response here is + to drop the segment and send an RST. */ + tcp_drop(tp, ECONNABORTED); + goto drop; + } else { + /* + * XXX: Due to the header drop above "th" is + * theoretically invalid by now. Fortunately + * m_adj() doesn't actually frees any mbufs + * when trimming from the head. + */ + thflags = tcp_reass(tp, th, &tlen, ((uint8_t*) th) + drop_hdrlen, signals); + tp->t_flags |= TF_ACKNOW; + } + // Only place tlen is used after the call to tcp_reass is below + if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) + tcp_update_sack_list(tp, save_start, save_start + tlen); +#if 0 // This was originally there, not me commenting out things + /* + * Note the amount of data that peer has sent into + * our window, in order to estimate the sender's + * buffer size. + * XXX: Unused. + */ + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt); + else + len = so->so_rcv.sb_hiwat; +#endif + } else { +// m_freem(m); + thflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (thflags & TH_FIN) { + DEBUG("FIN Processing start\n"); + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { +// socantrcvmore(so); + tpcantrcvmore(tp); + /* + * If connection is half-synchronized + * (ie NEEDSYN flag on) then delay ACK, + * so it may be piggybacked when SYN is sent. + * Otherwise, since we received a FIN then no + * more input can be expected, send ACK now. + */ + if (tp->t_flags & TF_NEEDSYN) + tp->t_flags |= TF_DELACK; + else + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + if (tp->reass_fin_index != -2) { + *signals |= SIG_RCVD_FIN; + tp->reass_fin_index = -2; // Added by Sam: make sure not to consider any more FINs in reassembly + } + switch (tp->t_state) { + + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + tp->t_starttime = ticks; + /* FALLTHROUGH */ + case TCPS_ESTABLISHED: + tcp_state_change(tp, TCPS_CLOSE_WAIT); + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + tcp_state_change(tp, TCPS_CLOSING); + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: +/* + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata " + "TCP_FIN_WAIT_2 ti_locked: %d", __func__, + ti_locked)); +*/ + tcp_twstart(tp); +// INP_INFO_RUNLOCK(&V_tcbinfo); + return; + } + } +#if 0 + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); +#endif + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) + (void) tcp_output(tp); + +check_delack: +#if 0 + KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", + __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); +#endif + if (tp->t_flags & TF_DELACK) { + tp->t_flags &= ~TF_DELACK; + tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); + } +// INP_WUNLOCK(tp->t_inpcb); + return; + +dropafterack: + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + * + * We can now skip the test for the RST flag since all + * paths to this code happen after packets containing + * RST have been dropped. + * + * In the SYN-RECEIVED state, don't send an ACK unless the + * segment we received passes the SYN-RECEIVED ACK test. + * If it fails send a RST. This breaks the loop in the + * "LAND" DoS attack, and also prevents an ACK storm + * between two listening ports that have been sent forged + * SYN segments, each with the source address of the other. + */ + if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && + (SEQ_GT(tp->snd_una, th->th_ack) || + SEQ_GT(th->th_ack, tp->snd_max)) ) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } +#if 0 +#ifdef TCPDEBUG + if (so->so_options & SO_DEBUG) + tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif +#endif +// TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); +// if (ti_locked == TI_RLOCKED) +// INP_INFO_RUNLOCK(&V_tcbinfo); +// ti_locked = TI_UNLOCKED; + + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); +// INP_WUNLOCK(tp->t_inpcb); +// m_freem(m); + return; + +dropwithreset: +/* + if (ti_locked == TI_RLOCKED) + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; +*/ + if (tp != NULL) { + tcp_dropwithreset(ip6, th, tp, tlen, rstreason); +// INP_WUNLOCK(tp->t_inpcb); + } else + tcp_dropwithreset(ip6, th, NULL, tlen, rstreason); + return; + +drop: +#if 0 + if (ti_locked == TI_RLOCKED) { + INP_INFO_RUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } +#ifdef INVARIANTS + else + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); +#endif + + /* + * Drop space held by incoming segment and return. + */ +#ifdef TCPDEBUG + if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, + &tcp_savetcp, 0); +#endif + TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); + if (tp != NULL) + INP_WUNLOCK(tp->t_inpcb); + m_freem(m); +#endif + return; +} + +/* + * Parse TCP options and place in tcpopt. + */ +static void +tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags) +{ + int opt, optlen; + + to->to_flags = 0; + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = cp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case TCPOPT_MAXSEG: + if (optlen != TCPOLEN_MAXSEG) + continue; + if (!(flags & TO_SYN)) + continue; + to->to_flags |= TOF_MSS; + bcopy((char *)cp + 2, + (char *)&to->to_mss, sizeof(to->to_mss)); + to->to_mss = ntohs(to->to_mss); + break; + case TCPOPT_WINDOW: + if (optlen != TCPOLEN_WINDOW) + continue; + if (!(flags & TO_SYN)) + continue; + to->to_flags |= TOF_SCALE; + to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT); + break; + case TCPOPT_TIMESTAMP: + if (optlen != TCPOLEN_TIMESTAMP) + continue; + to->to_flags |= TOF_TS; + bcopy((char *)cp + 2, + (char *)&to->to_tsval, sizeof(to->to_tsval)); + to->to_tsval = ntohl(to->to_tsval); + bcopy((char *)cp + 6, + (char *)&to->to_tsecr, sizeof(to->to_tsecr)); + to->to_tsecr = ntohl(to->to_tsecr); + break; +#ifdef TCP_SIGNATURE + /* + * XXX In order to reply to a host which has set the + * TCP_SIGNATURE option in its initial SYN, we have to + * record the fact that the option was observed here + * for the syncache code to perform the correct response. + */ + case TCPOPT_SIGNATURE: + if (optlen != TCPOLEN_SIGNATURE) + continue; + to->to_flags |= TOF_SIGNATURE; + to->to_signature = cp + 2; + break; +#endif + case TCPOPT_SACK_PERMITTED: + if (optlen != TCPOLEN_SACK_PERMITTED) + continue; + if (!(flags & TO_SYN)) + continue; + if (!V_tcp_do_sack) + continue; + to->to_flags |= TOF_SACKPERM; + break; + case TCPOPT_SACK: + if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) + continue; + if (flags & TO_SYN) + continue; + to->to_flags |= TOF_SACK; + to->to_nsacks = (optlen - 2) / TCPOLEN_SACK; + to->to_sacks = cp + 2; +// TCPSTAT_INC(tcps_sack_rcv_blocks); + break; + default: + continue; + } + } +} + + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ +static void +tcp_xmit_timer(struct tcpcb *tp, int rtt) +{ + int delta; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + +// TCPSTAT_INC(tcps_rttupdated); + tp->t_rttupdated++; + if (tp->t_srtt != 0) { + /* + * srtt is stored as fixed point with 5 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = ((rtt - 1) << TCP_DELTA_SHIFT) + - (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)); + + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 4 bits after the + * binary point (scaled by 16). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar) + tp->t_rttbest = tp->t_srtt + tp->t_rttvar; + } else { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << TCP_RTT_SHIFT; + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); + tp->t_rttbest = tp->t_srtt + tp->t_rttvar; + } + tp->t_rtttime = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX); + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* Taken from netinet6/in6.c. */ +int +in6_localaddr(struct in6_addr *in6) +{ + //struct rm_priotracker in6_ifa_tracker; + //struct in6_ifaddr *ia; + + if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) + return 1; +#if 0 + IN6_IFADDR_RLOCK(&in6_ifa_tracker); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, + &ia->ia_prefixmask.sin6_addr)) { + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + return 1; + } + } + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); +#endif + return (0); +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing interface + * without forcing IP to fragment. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + * + * Also take into account the space needed for options that we + * send regularly. Make maxseg shorter by that amount to assure + * that we can send maxseg amount of data even when the options + * are present. Store the upper limit of the length of options plus + * data in maxopd. + * + * NOTE that this routine is only called when we process an incoming + * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS + * settings are handled in tcp_mssopt(). + */ +void +tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, + struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap) +{ + int mss = 0; + u_long maxmtu = 0; +// struct inpcb *inp = tp->t_inpcb; + struct hc_metrics_lite metrics; + int origoffer; +//#ifdef INET6 +// int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0; + size_t min_protoh = /*isipv6 ?*/ + IP6HDR_SIZE + sizeof (struct tcphdr)/* : + sizeof (struct tcpiphdr)*/; +//#else +// const size_t min_protoh = sizeof(struct tcpiphdr); +//#endif + +// INP_WLOCK_ASSERT(tp->t_inpcb); + + if (mtuoffer != -1) { + KASSERT(offer == -1, ("%s: conflict", __func__)); + offer = mtuoffer - min_protoh; + } + origoffer = offer; + + /* Initialize. */ +//#ifdef INET6 +// if (isipv6) { + maxmtu = tcp_maxmtu6(/*&inp->inp_inc*/tp, cap); + tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt; +// } +//#endif +#if 0 // We're IPv6 +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + maxmtu = tcp_maxmtu(&inp->inp_inc, cap); + tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt; + } +#endif +#endif + /* + * No route to sender, stay with default mss and return. + */ + if (maxmtu == 0) { + /* + * In case we return early we need to initialize metrics + * to a defined state as tcp_hc_get() would do for us + * if there was no cache hit. + */ + if (metricptr != NULL) + bzero(metricptr, sizeof(struct hc_metrics_lite)); + return; + } + + /* What have we got? */ + switch (offer) { + case 0: + /* + * Offer == 0 means that there was no MSS on the SYN + * segment, in this case we use tcp_mssdflt as + * already assigned to t_maxopd above. + */ + offer = tp->t_maxopd; + break; + + case -1: + /* + * Offer == -1 means that we didn't receive SYN yet. + */ + /* FALLTHROUGH */ + + default: + /* + * Prevent DoS attack with too small MSS. Round up + * to at least minmss. + */ + offer = max(offer, V_tcp_minmss); + } + + /* + * rmx information is now retrieved from tcp_hostcache. + */ + tcp_hc_get(/*&inp->inp_inc*/tp, &metrics); + if (metricptr != NULL) + bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite)); + + /* + * If there's a discovered mtu in tcp hostcache, use it. + * Else, use the link mtu. + */ + if (metrics.rmx_mtu) + mss = min(metrics.rmx_mtu, maxmtu) - min_protoh; + else { +//#ifdef INET6 +// if (isipv6) { + mss = maxmtu - min_protoh; + if (!V_path_mtu_discovery && + !in6_localaddr(/*&inp->in6p_faddr*/ &tp->faddr)) + mss = min(mss, V_tcp_v6mssdflt); +// } +//#endif +#if 0 +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + mss = maxmtu - min_protoh; + if (!V_path_mtu_discovery && + !in_localaddr(inp->inp_faddr)) + mss = min(mss, V_tcp_mssdflt); + } +#endif +#endif + /* + * XXX - The above conditional (mss = maxmtu - min_protoh) + * probably violates the TCP spec. + * The problem is that, since we don't know the + * other end's MSS, we are supposed to use a conservative + * default. But, if we do that, then MTU discovery will + * never actually take place, because the conservative + * default is much less than the MTUs typically seen + * on the Internet today. For the moment, we'll sweep + * this under the carpet. + * + * The conservative default might not actually be a problem + * if the only case this occurs is when sending an initial + * SYN with options and data to a host we've never talked + * to before. Then, they will reply with an MSS value which + * will get recorded and the new parameters should get + * recomputed. For Further Study. + */ + } + mss = min(mss, offer); + + /* + * Sanity check: make sure that maxopd will be large + * enough to allow some data on segments even if the + * all the option space is used (40bytes). Otherwise + * funny things may happen in tcp_output. + */ + mss = max(mss, /*64*/TCP_MAXOLEN + 1); + + /* + * maxopd stores the maximum length of data AND options + * in a segment; maxseg is the amount of data in a normal + * segment. We need to store this value (maxopd) apart + * from maxseg, because now every segment carries options + * and thus we normally have somewhat less data in segments. + */ + tp->t_maxopd = mss; + + /* + * origoffer==-1 indicates that no segments were received yet. + * In this case we just guess. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP && + (origoffer == -1 || + (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) + mss -= TCPOLEN_TSTAMP_APPA; + + tp->t_maxseg = mss; +} + +void +tcp_mss(struct tcpcb *tp, int offer) +{ + //int mss; + //u_long bufsize; +// struct inpcb *inp; +// struct socket *so; + struct hc_metrics_lite metrics; + struct tcp_ifcap cap; + + KASSERT(tp != NULL, ("%s: tp == NULL", __func__)); + + bzero(&cap, sizeof(cap)); + tcp_mss_update(tp, offer, -1, &metrics, &cap); + +// mss = tp->t_maxseg; NOT NEEDED, SINCE we removed all of the code that deals with mss. So we would just do tp->t_maxseg = mss; +// inp = tp->t_inpcb; + + /* + * If there's a pipesize, change the socket buffer to that size, + * don't change if sb_hiwat is different than default (then it + * has been changed on purpose with setsockopt). + * Make the socket buffers an integral number of mss units; + * if the mss is larger than the socket buffer, decrease the mss. + */ + // Sam: the socket buffers is statically allocated, so I can't change its size +// so = inp->inp_socket; +// SOCKBUF_LOCK(&so->so_snd); + +#if 0 // It doesn't make sense to limit the mss to the size of the send buffer, since there isn't a hard limit on its size +// if ((/*so->so_snd.sb_hiwat*/sendbufsize == V_tcp_sendspace) && metrics.rmx_sendpipe) +// bufsize = metrics.rmx_sendpipe; +// else + bufsize = /*so->so_snd.sb_hiwat*/ sendbufsize; + if (bufsize < mss) + mss = bufsize; +#endif +#if 0 // The send buffer is statically allocated, so I can't change its size... + else { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + if (bufsize > so->so_snd.sb_hiwat) + (void)sbreserve_locked(&so->so_snd, bufsize, so, NULL); + } +#endif +// SOCKBUF_UNLOCK(&so->so_snd); +// tp->t_maxseg = mss; NOT NEEDED, since we removed the code that modifies mss after assigning it to tp->t_maxseg + +// SOCKBUF_LOCK(&so->so_rcv); +#if 0 // The receive buffer is statically allocated, so I can't change its size... + if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe) + bufsize = metrics.rmx_recvpipe; + else + bufsize = so->so_rcv.sb_hiwat; + if (bufsize > mss) { + bufsize = roundup(bufsize, mss); + if (bufsize > sb_max) + bufsize = sb_max; + if (bufsize > so->so_rcv.sb_hiwat) + (void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL); + } +#endif +// SOCKBUF_UNLOCK(&so->so_rcv); + +#if 0 // No support for TCP Segment Offloading + /* Check the interface for TSO capabilities. */ + if (cap.ifcap & CSUM_TSO) { + tp->t_flags |= TF_TSO; + tp->t_tsomax = cap.tsomax; + tp->t_tsomaxsegcount = cap.tsomaxsegcount; + tp->t_tsomaxsegsize = cap.tsomaxsegsize; + } +#endif +} + + +// TODO Translate MSS Option +/* + * Determine the MSS option to send on an outgoing SYN. + */ +int +tcp_mssopt(/*struct in_conninfo *inc*/struct tcpcb* tp) +{ + int mss = 0; + u_long maxmtu = 0; + u_long thcmtu = 0; + size_t min_protoh; + +// KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer")); + KASSERT(tp != NULL, ("tcp_mssopt with NULL tcpcb pointer")); + +//#ifdef INET6 +// if (inc->inc_flags & INC_ISIPV6) { + mss = V_tcp_v6mssdflt; + maxmtu = tcp_maxmtu6(/*inc*/tp, NULL); + min_protoh = IP6HDR_SIZE + sizeof(struct tcphdr); +// } +//#endif +#if 0 +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + mss = V_tcp_mssdflt; + maxmtu = tcp_maxmtu(inc, NULL); + min_protoh = sizeof(struct tcpiphdr); + } +#endif +#endif +//#if defined(INET6) || defined(INET) + thcmtu = tcp_hc_getmtu(/*inc*/tp); /* IPv4 and IPv6 */ +//#endif + + if (maxmtu && thcmtu) + mss = min(maxmtu, thcmtu) - min_protoh; + else if (maxmtu || thcmtu) + mss = max(maxmtu, thcmtu) - min_protoh; + + return (mss); +} + +/* + * On a partial ack arrives, force the retransmission of the + * next unacknowledged segment. Do not clear tp->t_dupacks. + * By setting snd_nxt to ti_ack, this forces retransmission timer to + * be started again. + */ +static void +tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th) +{ + tcp_seq onxt = tp->snd_nxt; + u_long ocwnd = tp->snd_cwnd; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + tp->snd_nxt = th->th_ack; + /* + * Set snd_cwnd to one segment beyond acknowledged offset. + * (tp->snd_una has not yet been updated when this function is called.) + */ + tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th); + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + tp->snd_cwnd = ocwnd; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + /* + * Partial window deflation. Relies on fact that tp->snd_una + * not updated yet. + */ + if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th)) + tp->snd_cwnd -= BYTES_THIS_ACK(tp, th); + else + tp->snd_cwnd = 0; + tp->snd_cwnd += tp->t_maxseg; +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_output.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_output.c new file mode 100644 index 000000000000..b5adaa6dbf10 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_output.c @@ -0,0 +1,1860 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_output.c 8.4 (Berkeley) 5/24/95 + */ + +// #include +// #include +#include +#include "../gnrc_tcp_freebsd_internal.h" +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_var.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "../lib/cbuf.h" + +#include "tcp_const.h" + +#include "net/gnrc/pktbuf.h" + +#include "net/gnrc/ipv6/hdr.h" + +#include "debug.h" + +// From ip_compat.h +#define bcopy(a,b,c) memmove(b,a,c) + +static void inline +cc_after_idle(struct tcpcb *tp) +{ +// INP_WLOCK_ASSERT(tp->t_inpcb); + + if (CC_ALGO(tp)->after_idle != NULL) + CC_ALGO(tp)->after_idle(tp->ccv); +} + +long min(long a, long b) { + if (a < b) { + return a; + } else { + return b; + } +} + +unsigned long ulmin(unsigned long a, unsigned long b) { + if (a < b) { + return a; + } else { + return b; + } +} + +#define lmin(a, b) min(a, b) + +void +tcp_setpersist(struct tcpcb *tp) +{ + int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + int tt; + + tp->t_flags &= ~TF_PREVVALID; + if (tcp_timer_active(tp, TT_REXMT)) + printf("PANIC: tcp_setpersist: retransmit pending\n"); + /* + * Start/restart persistance timer. + */ + TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + tcp_timer_activate(tp, TT_PERSIST, tt); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} + + /* + * Tcp output routine: figure out what should be sent and send it. + */ +int +tcp_output(struct tcpcb *tp) +{ +#if 0 // I'M GOING TO TRY SIMPLIFYING THIS AS FAR AS POSSIBLE + struct socket *so = tp->t_inpcb->inp_socket; + long len, recwin, sendwin; + int off, flags, error = 0; /* Keep compiler happy */ + struct mbuf *m; + struct ip *ip = NULL; + struct ipovly *ipov = NULL; + struct tcphdr *th; + u_char opt[TCP_MAXOLEN]; + unsigned ipoptlen, optlen, hdrlen; +#ifdef IPSEC + unsigned ipsec_optlen = 0; +#endif + int idle, sendalot; + int sack_rxmit, sack_bytes_rxmt; + struct sackhole *p; + int tso, mtu; + struct tcpopt to; +#if 0 + int maxburst = TCP_MAXBURST; +#endif +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; + int isipv6; + + isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; +#endif + + INP_WLOCK_ASSERT(tp->t_inpcb); +#endif + + struct ip6_hdr* ip6 = NULL; + struct tcphdr* th = NULL; + int idle; + long len, recwin, sendwin; + int off, flags, error = 0; /* Keep compiler happy */ + int sendalot, mtu; + int sack_rxmit, sack_bytes_rxmt; + struct sackhole* p; + unsigned ipoptlen, optlen, hdrlen; + //int alen; + //char* buf, * bufreal; // Added by Sam + //struct ip6_packet* msg; + //struct ip_iovec* iov; + struct tcpopt to; + //struct ip_iovec startvec; // Added by Sam (this and the next few fields) + //struct ip_iovec endvec; + //struct lbufent* startptr = NULL; + //struct lbufent* endptr = NULL; + //uint32_t startoffset; + //uint32_t endextra; + u_char opt[TCP_MAXOLEN]; + uint32_t ticks = get_ticks(); +#if 0 +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE) + return (tcp_offload_output(tp)); +#endif +#endif + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); + if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) + cc_after_idle(tp); + + tp->t_flags &= ~TF_LASTIDLE; + if (idle) { + if (tp->t_flags & TF_MORETOCOME) { + tp->t_flags |= TF_LASTIDLE; + idle = 0; + } + } +again: + /* + * If we've recently taken a timeout, snd_max will be greater than + * snd_nxt. There may be SACK information that allows us to avoid + * resending already delivered data. Adjust snd_nxt accordingly. + */ + if ((tp->t_flags & TF_SACK_PERMIT) && + SEQ_LT(tp->snd_nxt, tp->snd_max)) + tcp_sack_adjust(tp); + sendalot = 0; +#if 0 + tso = 0; +#endif + mtu = 0; + off = tp->snd_nxt - tp->snd_una; + sendwin = min(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + /* + * Send any SACK-generated retransmissions. If we're explicitly trying + * to send out new data (when sendalot is 1), bypass this function. + * If we retransmit in fast recovery mode, decrement snd_cwnd, since + * we're replacing a (future) new transmission with a retransmission + * now, and we previously incremented snd_cwnd in tcp_input(). + */ + /* + * Still in sack recovery , reset rxmit flag to zero. + */ + sack_rxmit = 0; + sack_bytes_rxmt = 0; + len = 0; + p = NULL; + if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) && + (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { + long cwin; + + cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; + /* Do not retransmit SACK segments beyond snd_recover */ + if (SEQ_GT(p->end, tp->snd_recover)) { + /* + * (At least) part of sack hole extends beyond + * snd_recover. Check to see if we can rexmit data + * for this hole. + */ + if (SEQ_GEQ(p->rxmit, tp->snd_recover)) { + /* + * Can't rexmit any more data for this hole. + * That data will be rexmitted in the next + * sack recovery episode, when snd_recover + * moves past p->rxmit. + */ + p = NULL; + goto after_sack_rexmit; + } else + /* Can rexmit part of the current hole */ + len = ((long)ulmin(cwin, + tp->snd_recover - p->rxmit)); + } else + len = ((long)ulmin(cwin, p->end - p->rxmit)); + off = p->rxmit - tp->snd_una; + KASSERT(off >= 0,("%s: sack block to the left of una : %d", + __func__, off)); + if (len > 0) { + sack_rxmit = 1; + sendalot = 1; +// TCPSTAT_INC(tcps_sack_rexmits); +// TCPSTAT_ADD(tcps_sack_rexmit_bytes, +// min(len, tp->t_maxseg)); + } + } +after_sack_rexmit: + /* + * Get standard flags, and add SYN or FIN if requested by 'hidden' + * state flags. + */ + if (tp->t_flags & TF_NEEDFIN) + flags |= TH_FIN; + if (tp->t_flags & TF_NEEDSYN) + flags |= TH_SYN; + +#if 0 // REMOVE SYNCHRONIZATION + SOCKBUF_LOCK(&so->so_snd); +#endif + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_flags & TF_FORCEDATA) { + if (sendwin == 0) { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unsent data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ +// if (off < sbused(&so->so_snd)) + if (off < lbuf_used_space(&tp->sendbuf)) + flags &= ~TH_FIN; + sendwin = 1; + } else { + tcp_timer_activate(tp, TT_PERSIST, 0); + tp->t_rxtshift = 0; + } + } + + /* + * If snd_nxt == snd_max and we have transmitted a FIN, the + * offset will be > 0 even if so_snd.sb_cc is 0, resulting in + * a negative length. This can also occur when TCP opens up + * its congestion window while receiving additional duplicate + * acks after fast-retransmit because TCP will reset snd_nxt + * to snd_max after the fast-retransmit. + * + * In the normal retransmit-FIN-only case, however, snd_nxt will + * be set to snd_una, the offset will be 0, and the length may + * wind up 0. + * + * If sack_rxmit is true we are retransmitting from the scoreboard + * in which case len is already set. + */ + if (sack_rxmit == 0) { + if (sack_bytes_rxmt == 0) +// len = ((long)ulmin(sbavail(&so->so_snd), sendwin) - + len = ((long) ulmin(lbuf_used_space(&tp->sendbuf), sendwin) - + off); + else { + long cwin; + + /* + * We are inside of a SACK recovery episode and are + * sending new data, having retransmitted all the + * data possible in the scoreboard. + */ + len = ((long)ulmin(/*sbavail(&so->so_snd)*/lbuf_used_space(&tp->sendbuf), tp->snd_wnd) - + off); + /* + * Don't remove this (len > 0) check ! + * We explicitly check for len > 0 here (although it + * isn't really necessary), to work around a gcc + * optimization issue - to force gcc to compute + * len above. Without this check, the computation + * of len is bungled by the optimizer. + */ + if (len > 0) { + cwin = tp->snd_cwnd - + (tp->snd_nxt - tp->sack_newdata) - + sack_bytes_rxmt; + if (cwin < 0) + cwin = 0; + len = lmin(len, cwin); + } + } + } + + /* + * Lop off SYN bit if it has already been sent. However, if this + * is SYN-SENT state and if segment contains data and if we don't + * know that foreign host supports TAO, suppress sending segment. + */ + if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { + if (tp->t_state != TCPS_SYN_RECEIVED) + flags &= ~TH_SYN; + off--, len++; + } + + /* + * Be careful not to send data and/or FIN on SYN segments. + * This measure is needed to prevent interoperability problems + * with not fully conformant TCP implementations. + */ + if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) { + len = 0; + flags &= ~TH_FIN; + } + + if (len <= 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be < 0. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit, pull snd_nxt back + * to (closed) window, and set the persist timer + * if it isn't already going. If the window didn't + * close completely, just wait for an ACK. + * + * We also do a general check here to ensure that + * we will set the persist timer when we have data + * to send, but a 0-byte window. This makes sure + * the persist timer is set even if the packet + * hits one of the "goto send" lines below. + */ + len = 0; + if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) && + (off < (int) /*sbavail(&so->so_snd)*/lbuf_used_space(&tp->sendbuf))) { + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rxtshift = 0; + tp->snd_nxt = tp->snd_una; + if (!tcp_timer_active(tp, TT_PERSIST)) { + tcp_setpersist(tp); + } + } + } + + + /* len will be >= 0 after this point. */ + KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); + + /* + * Automatic sizing of send socket buffer. Often the send buffer + * size is not optimally adjusted to the actual network conditions + * at hand (delay bandwidth product). Setting the buffer size too + * small limits throughput on links with high bandwidth and high + * delay (eg. trans-continental/oceanic links). Setting the + * buffer size too big consumes too much real kernel memory, + * especially with many connections on busy servers. + * + * The criteria to step up the send buffer one notch are: + * 1. receive window of remote host is larger than send buffer + * (with a fudge factor of 5/4th); + * 2. send buffer is filled to 7/8th with data (so we actually + * have data to make use of it); + * 3. send buffer fill has not hit maximal automatic size; + * 4. our send window (slow start and cogestion controlled) is + * larger than sent but unacknowledged data in send buffer. + * + * The remote host receive window scaling factor may limit the + * growing of the send buffer before it reaches its allowed + * maximum. + * + * It scales directly with slow start or congestion window + * and does at most one step per received ACK. This fast + * scaling has the drawback of growing the send buffer beyond + * what is strictly necessary to make full use of a given + * delay*bandwith product. However testing has shown this not + * to be much of an problem. At worst we are trading wasting + * of available bandwith (the non-use of it) for wasting some + * socket buffer memory. + * + * TODO: Shrink send buffer during idle periods together + * with congestion window. Requires another timer. Has to + * wait for upcoming tcp timer rewrite. + * + * XXXGL: should there be used sbused() or sbavail()? + */ + #if 0 // THE SEND BUFFER WILL BE SMALL, SO ITS SIZE CAN BE FIXED + if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) { + if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && + sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) && + sbused(&so->so_snd) < V_tcp_autosndbuf_max && + sendwin >= (sbused(&so->so_snd) - + (tp->snd_nxt - tp->snd_una))) { + if (!sbreserve_locked(&so->so_snd, + min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc, + V_tcp_autosndbuf_max), so, curthread)) + so->so_snd.sb_flags &= ~SB_AUTOSIZE; + } + } + #endif + +#if 0 // DON'T DO TCP SEGMENTATION OFFLOADING + /* + * Decide if we can use TCP Segmentation Offloading (if supported by + * hardware). + * + * TSO may only be used if we are in a pure bulk sending state. The + * presence of TCP-MD5, SACK retransmits, SACK advertizements and + * IP options prevent using TSO. With TSO the TCP header is the same + * (except for the sequence number) for all generated packets. This + * makes it impossible to transmit any options which vary per generated + * segment or packet. + */ +#ifdef IPSEC + /* + * Pre-calculate here as we save another lookup into the darknesses + * of IPsec that way and can actually decide if TSO is ok. + */ + ipsec_optlen = ipsec_hdrsiz_tcp(tp); +#endif + if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && + ((tp->t_flags & TF_SIGNATURE) == 0) && + tp->rcv_numsacks == 0 && sack_rxmit == 0 && +#ifdef IPSEC + ipsec_optlen == 0 && +#endif + tp->t_inpcb->inp_options == NULL && + tp->t_inpcb->in6p_options == NULL) + tso = 1; +#endif + + if (sack_rxmit) { +// if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd))) + if (SEQ_LT(p->rxmit + len, tp->snd_una + lbuf_used_space(&tp->sendbuf))) + flags &= ~TH_FIN; + } else { + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + +// sbused(&so->so_snd))) + lbuf_used_space(&tp->sendbuf))) + flags &= ~TH_FIN; + } + +// recwin = sbspace(&so->so_rcv); + recwin = cbuf_free_space(&tp->recvbuf); + + /* + * Sender silly window avoidance. We transmit under the following + * conditions when len is non-zero: + * + * - We have a full segment (or more with TSO) + * - This is the last buffer in a write()/send() and we are + * either idle or running NODELAY + * - we've timed out (e.g. persist timer) + * - we have more then 1/2 the maximum send window's worth of + * data (receiver may be limited the window size) + * - we need to retransmit + */ + if (len) { + if (len >= tp->t_maxseg) + goto send; + /* + * NOTE! on localhost connections an 'ack' from the remote + * end may occur synchronously with the output and cause + * us to flush a buffer queued with moretocome. XXX + * + * note: the len + off check is almost certainly unnecessary. + */ + if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ + (idle || (tp->t_flags & TF_NODELAY)) && +// len + off >= sbavail(&so->so_snd) && + len + off >= lbuf_used_space(&tp->sendbuf) && + (tp->t_flags & TF_NOPUSH) == 0) { + goto send; + } + if (tp->t_flags & TF_FORCEDATA) /* typ. timeout case */ + goto send; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */ + goto send; + if (sack_rxmit) + goto send; + } + + /* + * Sending of standalone window updates. + * + * Window updates are important when we close our window due to a + * full socket buffer and are opening it again after the application + * reads data from it. Once the window has opened again and the + * remote end starts to send again the ACK clock takes over and + * provides the most current window information. + * + * We must avoid the silly window syndrome whereas every read + * from the receive buffer, no matter how small, causes a window + * update to be sent. We also should avoid sending a flurry of + * window updates when the socket buffer had queued a lot of data + * and the application is doing small reads. + * + * Prevent a flurry of pointless window updates by only sending + * an update when we can increase the advertized window by more + * than 1/4th of the socket buffer capacity. When the buffer is + * getting full or is very small be more aggressive and send an + * update whenever we can increase by two mss sized segments. + * In all other situations the ACK's to new incoming data will + * carry further window increases. + * + * Don't send an independent window update if a delayed + * ACK is pending (it will get piggy-backed on it) or the + * remote side already has done a half-close and won't send + * more data. Skip this if the connection is in T/TCP + * half-open state. + */ + if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) && + !(tp->t_flags & TF_DELACK) && + !TCPS_HAVERCVDFIN(tp->t_state)) { + /* + * "adv" is the amount we could increase the window, + * taking into account that we are limited by + * TCP_MAXWIN << tp->rcv_scale. + */ + long adv; + int oldwin; + + adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale); + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { + oldwin = (tp->rcv_adv - tp->rcv_nxt); + adv -= oldwin; + } else + oldwin = 0; + + /* + * If the new window size ends up being the same as the old + * size when it is scaled, then don't force a window update. + */ + if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale) + goto dontupdate; + +#if 0 // My window size and max seg size are on different orders of magnitude than what is expected + if (adv >= (long)(2 * tp->t_maxseg) && + (adv >= (long)(/*so->so_rcv.sb_hiwat*/cbuf_size(&tp->recvbuf) / 4) || + recwin <= (long)(/*so->so_rcv.sb_hiwat*/cbuf_size(&tp->recvbuf) / 8) || + /*so->so_rcv.sb_hiwat*/cbuf_size(&tp->recvbuf) <= 8 * tp->t_maxseg)) + goto send; +#endif + if (adv >= (long) cbuf_size(&tp->recvbuf) / 4) + goto send; + } +dontupdate: + + /* + * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW + * is also a catch-all for the retransmit timer timeout case. + */ + if (tp->t_flags & TF_ACKNOW) { + goto send; + } + if ((flags & TH_RST) || + ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, then we need to send. + */ + if (flags & TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + /* + * In SACK, it is possible for tcp_output to fail to send a segment + * after the retransmission timer has been turned off. Make sure + * that the retransmission timer is set. + */ + if ((tp->t_flags & TF_SACK_PERMIT) && + SEQ_GT(tp->snd_max, tp->snd_una) && + !tcp_timer_active(tp, TT_REXMT) && + !tcp_timer_active(tp, TT_PERSIST)) { + tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + goto just_return; + } + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tcp_timer_active(tp, TT_PERSIST) + * is true when we are in persist state. + * (tp->t_flags & TF_FORCEDATA) + * is set when we are called to send a persist packet. + * tcp_timer_active(tp, TT_REXMT) + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (/*sbavail(&so->so_snd)*/lbuf_used_space(&tp->sendbuf) && !tcp_timer_active(tp, TT_REXMT) && + !tcp_timer_active(tp, TT_PERSIST)) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ +just_return: + //SOCKBUF_UNLOCK(&so->so_snd); OMIT SYNCHRONIZATION + return (0); + +send: + /* What happens after the send label may have to change significantly, since I need + to send a packet the TinyOS way, which is different from the BSD UNIX way (e.g. + I'm not going to use mbufs, for example.) */ + //SOCKBUF_LOCK_ASSERT(&so->so_snd); + if (len > 0) { + if (len >= tp->t_maxseg) + tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT; + else + tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT; + } + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES + */ + optlen = 0; +//#ifdef INET6 +// if (isipv6) + hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr); +// else +//#endif +// hdrlen = sizeof (struct tcpiphdr); + + /* + * Compute options for segment. + * We only have to care about SYN and established connection + * segments. Options for SYN-ACK segments are handled in TCP + * syncache. + * Sam: I've done away with the syncache. However, it seems that + * the existing logic works fine for SYN-ACK as well + */ + if ((tp->t_flags & TF_NOOPT) == 0) { + to.to_flags = 0; + /* Maximum segment size. */ + if (flags & TH_SYN) { + tp->snd_nxt = tp->iss; + to.to_mss = tcp_mssopt(/*&tp->t_inpcb->inp_inc*/tp); + to.to_flags |= TOF_MSS; + } + /* Window scaling. */ + if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { + to.to_wscale = tp->request_r_scale; + to.to_flags |= TOF_SCALE; + } + /* Timestamps. */ + if ((tp->t_flags & TF_RCVD_TSTMP) || + ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { + to.to_tsval = tcp_ts_getticks() + tp->ts_offset; + to.to_tsecr = tp->ts_recent; + to.to_flags |= TOF_TS; +#if 0 + /* Set receive buffer autosizing timestamp. */ + if (tp->rfbuf_ts == 0 && + (so->so_rcv.sb_flags & SB_AUTOSIZE)) + tp->rfbuf_ts = tcp_ts_getticks(); +#endif + } + + /* Selective ACK's. */ + if (tp->t_flags & TF_SACK_PERMIT) { + if (flags & TH_SYN) + to.to_flags |= TOF_SACKPERM; + else if (TCPS_HAVEESTABLISHED(tp->t_state) && + (tp->t_flags & TF_SACK_PERMIT) && + tp->rcv_numsacks > 0) { + to.to_flags |= TOF_SACK; + to.to_nsacks = tp->rcv_numsacks; + to.to_sacks = (u_char *)tp->sackblks; + } + } + +#if 0 +#ifdef TCP_SIGNATURE + /* TCP-MD5 (RFC2385). */ + if (tp->t_flags & TF_SIGNATURE) + to.to_flags |= TOF_SIGNATURE; +#endif /* TCP_SIGNATURE */ +#endif + + /* Processing the options. */ + hdrlen += optlen = tcp_addoptions(&to, opt); + } +//#ifdef INET6 +// if (isipv6) +// ipoptlen = ip6_optlen(tp->t_inpcb); + ipoptlen = 0; // FOR NOW. MAYBE I'LL PUT THIS BACK IN LATER +// else +//#endif +// if (tp->t_inpcb->inp_options) +// ipoptlen = tp->t_inpcb->inp_options->m_len - +// offsetof(struct ipoption, ipopt_list); +// else +// ipoptlen = 0; +//#ifdef IPSEC +// ipoptlen += ipsec_optlen; +//#endif + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxopd length. + * Clear the FIN bit because we cut off the tail of + * the segment. + */ + if (len + optlen + ipoptlen > tp->t_maxopd) { + flags &= ~TH_FIN; +#if 0 + if (tso) { + u_int if_hw_tsomax; + u_int if_hw_tsomaxsegcount; + u_int if_hw_tsomaxsegsize; + struct mbuf *mb; + u_int moff; + int max_len; + + /* extract TSO information */ + if_hw_tsomax = tp->t_tsomax; + if_hw_tsomaxsegcount = tp->t_tsomaxsegcount; + if_hw_tsomaxsegsize = tp->t_tsomaxsegsize; + + /* + * Limit a TSO burst to prevent it from + * overflowing or exceeding the maximum length + * allowed by the network interface: + */ + KASSERT(ipoptlen == 0, + ("%s: TSO can't do IP options", __func__)); + + /* + * Check if we should limit by maximum payload + * length: + */ + if (if_hw_tsomax != 0) { + /* compute maximum TSO length */ + max_len = (if_hw_tsomax - hdrlen - + max_linkhdr); + if (max_len <= 0) { + len = 0; + } else if (len > max_len) { + sendalot = 1; + len = max_len; + } + } + + /* + * Check if we should limit by maximum segment + * size and count: + */ + if (if_hw_tsomaxsegcount != 0 && + if_hw_tsomaxsegsize != 0) { + /* + * Subtract one segment for the LINK + * and TCP/IP headers mbuf that will + * be prepended to this mbuf chain + * after the code in this section + * limits the number of mbufs in the + * chain to if_hw_tsomaxsegcount. + */ + if_hw_tsomaxsegcount -= 1; + max_len = 0; + mb = sbsndmbuf(&so->so_snd, off, &moff); + + while (mb != NULL && max_len < len) { + u_int mlen; + u_int frags; + + /* + * Get length of mbuf fragment + * and how many hardware frags, + * rounded up, it would use: + */ + mlen = (mb->m_len - moff); + frags = howmany(mlen, + if_hw_tsomaxsegsize); + + /* Handle special case: Zero Length Mbuf */ + if (frags == 0) + frags = 1; + + /* + * Check if the fragment limit + * will be reached or exceeded: + */ + if (frags >= if_hw_tsomaxsegcount) { + max_len += min(mlen, + if_hw_tsomaxsegcount * + if_hw_tsomaxsegsize); + break; + } + max_len += mlen; + if_hw_tsomaxsegcount -= frags; + moff = 0; + mb = mb->m_next; + } + if (max_len <= 0) { + len = 0; + } else if (len > max_len) { + sendalot = 1; + len = max_len; + } + } + + /* + * Prevent the last segment from being + * fractional unless the send sockbuf can be + * emptied: + */ + max_len = (tp->t_maxopd - optlen); + if ((off + len) < sbavail(&so->so_snd)) { + moff = len % max_len; + if (moff != 0) { + len -= moff; + sendalot = 1; + } + } + + /* + * In case there are too many small fragments + * don't use TSO: + */ + if (len <= max_len) { + len = max_len; + sendalot = 1; + tso = 0; + } + + /* + * Send the FIN in a separate segment + * We don't trust the TSO implementations + * to clear the FIN flag on all but the + * last segment. + */ + if (tp->t_flags & TF_NEEDFIN) + sendalot = 1; + + } else { +#endif + len = tp->t_maxopd - optlen - ipoptlen; + sendalot = 1; +#if 0 + } +#endif + }/* else + tso = 0; +*/ + KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET, + ("%s: len > IP_MAXPACKET", __func__)); + +#if 0 // WE AREN'T USING MBUFS, SO THERE'S NO NEED TO CHECK IF IT FITS IN ONE +/*#ifdef DIAGNOSTIC*/ +//#ifdef INET6 + if (max_linkhdr + hdrlen > MCLBYTES) +//#else +// if (max_linkhdr + hdrlen > MHLEN) +//#endif + printf("PANIC: tcphdr too big\n"); +/*#endif*/ +#endif + /* + * This KASSERT is here to catch edge cases at a well defined place. + * Before, those had triggered (random) panic conditions further down. + */ + KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__)); + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ +#if 0 + if (len) { + struct mbuf *mb; + u_int moff; + + if ((tp->t_flags & TF_FORCEDATA) && len == 1) + TCPSTAT_INC(tcps_sndprobe); + else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { + tp->t_sndrexmitpack++; + TCPSTAT_INC(tcps_sndrexmitpack); + TCPSTAT_ADD(tcps_sndrexmitbyte, len); + } else { + TCPSTAT_INC(tcps_sndpack); + TCPSTAT_ADD(tcps_sndbyte, len); + } +#ifdef INET6 + if (MHLEN < hdrlen + max_linkhdr) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else +#endif + m = m_gethdr(M_NOWAIT, MT_DATA); + + if (m == NULL) { + SOCKBUF_UNLOCK(&so->so_snd); + error = ENOBUFS; + sack_rxmit = 0; + goto out; + } + + m->m_data += max_linkhdr; + m->m_len = hdrlen; + + /* + * Start the m_copy functions from the closest mbuf + * to the offset in the socket buffer chain. + */ + mb = sbsndptr(&so->so_snd, off, len, &moff); + + if (len <= MHLEN - hdrlen - max_linkhdr) { + m_copydata(mb, moff, (int)len, + mtod(m, caddr_t) + hdrlen); + m->m_len += len; + } else { + m->m_next = m_copy(mb, moff, (int)len); + if (m->m_next == NULL) { + SOCKBUF_UNLOCK(&so->so_snd); + (void) m_free(m); + error = ENOBUFS; + sack_rxmit = 0; + goto out; + } + } + + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == sbused(&so->so_snd)) + flags |= TH_PUSH; + SOCKBUF_UNLOCK(&so->so_snd); + } else { + SOCKBUF_UNLOCK(&so->so_snd); + if (tp->t_flags & TF_ACKNOW) + TCPSTAT_INC(tcps_sndacks); + else if (flags & (TH_SYN|TH_FIN|TH_RST)) + TCPSTAT_INC(tcps_sndctrl); + else if (SEQ_GT(tp->snd_up, tp->snd_una)) + TCPSTAT_INC(tcps_sndurg); + else + TCPSTAT_INC(tcps_sndwinup); + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + sack_rxmit = 0; + goto out; + } +#ifdef INET6 + if (isipv6 && (MHLEN < hdrlen + max_linkhdr) && + MHLEN >= hdrlen) { + M_ALIGN(m, hdrlen); + } else +#endif + m->m_data += max_linkhdr; + m->m_len = hdrlen; + } +#endif + /* Instead of the previous code that "grabs an mbuf", we need to do this the + RIOT OS way, where we allocate a gnrc_pktsnip_t. */ + /* There was a change made upstream to the SPI driver which causes RIOT to + * kernel panic if it sees an iovec of length 0. This means we can't attach + * a gnrc_pktsnip_t of length 0. So if the length of the body is zero, we + * need to make sure that the payload is NULL, not an empty gnrc_pktsnip_t. + */ + gnrc_pktsnip_t* payload; + if (len == 0) { + payload = NULL; + } else { + payload = gnrc_pktbuf_add(NULL, NULL, len, GNRC_NETTYPE_UNDEF); + if (payload == NULL) { + goto memsendfail; + } + } + gnrc_pktsnip_t* tcpsnip = gnrc_pktbuf_add(payload, NULL, sizeof(struct tcphdr) + optlen, GNRC_NETTYPE_TCP); + if (tcpsnip == NULL) { + gnrc_pktbuf_release(payload); + goto memsendfail; + } + assert(ipoptlen == 0); // For now. Otherwise we need to handle IPv6 extensions... + // The destination address is copied into the header in tcpip_fillheaders + gnrc_pktsnip_t* ip6snip = gnrc_ipv6_hdr_build(tcpsnip, NULL, NULL); + if (ip6snip == NULL) { + gnrc_pktbuf_release(tcpsnip); +memsendfail: + error = ENOBUFS; + sack_rxmit = 0; + goto out; + } + if (len) { + uint32_t used_space = lbuf_used_space(&tp->sendbuf); + + /* + * The TinyOS version has a way to avoid the copying we have to do here. + * Because it is possible to send iovecs directly in the BLIP stack, and + * an lbuf is made of iovecs, we could just "save" the starting and ending + * iovecs, modify them to get exactly the slice we want, call "send" on + * the resulting chain, and then restore the starting and ending iovecs + * once "send" returns. + * + * In RIOT, pktsnips have additional behavior regarding memory management + * that precludes this optimization. + */ + int written = iov_read(lbuf_to_iovec(&tp->sendbuf), off, len, payload->data); + KASSERT(written == len, ("Reading send buffer out of range!\n")); + + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == /*sbused(&so->so_snd)*/used_space) + flags |= TH_PUSH; + } + + ip6 = (struct ip6_hdr*) ip6snip->data; + th = (struct tcphdr*) tcpsnip->data; + + ip6->ip6_nxt = IANA_TCP; + ip6->ip6_plen = htons(sizeof(struct tcphdr) + optlen + len); + +#if 0 // The TinyOS code + alen = sizeof(struct ip6_packet) + sizeof(struct tcphdr) + optlen + ipoptlen + sizeof(struct ip_iovec); + bufreal = ip_malloc(alen + 3); + if (bufreal == NULL) { + error = ENOBUFS; + sack_rxmit = 0; + goto out; + } + buf = (char*) (((uint32_t) (bufreal + 3)) & 0xFFFFFFFCu); + memset(buf, 0, alen); // For safe measure + msg = (struct ip6_packet*) buf; + iov = (struct ip_iovec*) (buf + alen - sizeof(struct ip_iovec)); + iov->iov_next = NULL; // if len > 0, this will be reassigned + iov->iov_len = sizeof(struct tcphdr) + optlen; + iov->iov_base = (void*) ((char*) (msg + 1) + ipoptlen); + msg->ip6_hdr.ip6_nxt = IANA_TCP; + msg->ip6_hdr.ip6_plen = htons(sizeof(struct tcphdr) + optlen + len); + + msg->ip6_data = iov; + if (len) { + uint32_t used_space = lbuf_used_space(&tp->sendbuf); + int rv = lbuf_getrange(&tp->sendbuf, off, len, &startptr, &startoffset, &endptr, &endextra); + KASSERT(!rv, ("Reading send buffer out of range!\n")); + // Temporarily modify the iovecs in the send buffer so we don't have to copy anything. + // But first, store the original iovecs so we can restore them after sending the message. + memcpy(&startvec, &startptr->iov, sizeof(struct ip_iovec)); + memcpy(&endvec, &endptr->iov, sizeof(struct ip_iovec)); + startptr->iov.iov_base += startoffset; + startptr->iov.iov_len -= startoffset; + endptr->iov.iov_len -= endextra; + endptr->iov.iov_next = NULL; // end of the chain + + iov->iov_next = &startptr->iov; // connect to our chain + + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == /*sbused(&so->so_snd)*/used_space) + flags |= TH_PUSH; + } + + ip6 = (struct ip6_hdr*) &msg->ip6_hdr; + th = (struct tcphdr*) ((char*) (ip6 + 1) + ipoptlen); +#endif + tcpip_fillheaders(tp, ip6, th); + + //SOCKBUF_UNLOCK_ASSERT(&so->so_snd); +// m->m_pkthdr.rcvif = (struct ifnet *)0; +//#ifdef MAC +// mac_inpcb_create_mbuf(tp->t_inpcb, m); +//#endif +#if 0 // ALREADY HANDLED ABOVE +#ifdef INET6 + if (isipv6) { + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)(ip6 + 1); + tcpip_fillheaders(tp->t_inpcb, ip6, th); + } else +#endif /* INET6 */ + { + ip = mtod(m, struct ip *); + ipov = (struct ipovly *)ip; + th = (struct tcphdr *)(ip + 1); + tcpip_fillheaders(tp->t_inpcb, ip, th); + } +#endif + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && + tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + /* + * If we are starting a connection, send ECN setup + * SYN packet. If we are on a retransmit, we may + * resend those bits a number of times as per + * RFC 3168. + */ +#if 0 // FOR NOW, SKIP ECN, SINCE IT ISN'T CRITICAL. I MAY ADD THIS BACK LATER. + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) { + if (tp->t_rxtshift >= 1) { + if (tp->t_rxtshift <= V_tcp_ecn_maxretries) + flags |= TH_ECE|TH_CWR; + } else + flags |= TH_ECE|TH_CWR; + } + + if (tp->t_state == TCPS_ESTABLISHED && + (tp->t_flags & TF_ECN_PERMIT)) { + /* + * If the peer has ECN, mark data packets with + * ECN capable transmission (ECT). + * Ignore pure ack packets, retransmissions and window probes. + */ + if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + !((tp->t_flags & TF_FORCEDATA) && len == 1)) { +#ifdef INET6 + if (isipv6) + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); + else +#endif + ip->ip_tos |= IPTOS_ECN_ECT0; + TCPSTAT_INC(tcps_ecn_ect0); + } + + /* + * Reply with proper ECN notifications. + */ + if (tp->t_flags & TF_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags &= ~TF_ECN_SND_CWR; + } + if (tp->t_flags & TF_ECN_SND_ECE) + flags |= TH_ECE; + } +#endif + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + if (sack_rxmit == 0) { + if (len || (flags & (TH_SYN|TH_FIN)) || + tcp_timer_active(tp, TT_PERSIST)) + th->th_seq = htonl(tp->snd_nxt); + else + th->th_seq = htonl(tp->snd_max); + } else { + th->th_seq = htonl(p->rxmit); + p->rxmit += len; + tp->sackhint.sack_bytes_rexmit += len; + } + + th->th_ack = htonl(tp->rcv_nxt); + if (optlen) { + bcopy(opt, th + 1, optlen); + th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; + } + th->th_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (recwin < (long)(/*so->so_rcv.sb_hiwat*/cbuf_size(&tp->recvbuf) / 4) && + recwin < (long)tp->t_maxseg) + recwin = 0; + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && + recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) + recwin = (long)(tp->rcv_adv - tp->rcv_nxt); + if (recwin > (long)TCP_MAXWIN << tp->rcv_scale) + recwin = (long)TCP_MAXWIN << tp->rcv_scale; + + /* + * According to RFC1323 the window field in a SYN (i.e., a + * or ) segment itself is never scaled. The + * case is handled in syncache. + */ + if (flags & TH_SYN) + th->th_win = htons((u_short) + (min(cbuf_size(&tp->recvbuf), TCP_MAXWIN))); + else + th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); + + /* + * Adjust the RXWIN0SENT flag - indicate that we have advertised + * a 0 window. This may cause the remote transmitter to stall. This + * flag tells soreceive() to disable delayed acknowledgements when + * draining the buffer. This can occur if the receiver is attempting + * to read more data than can be buffered prior to transmitting on + * the connection. + */ + if (th->th_win == 0) { +// tp->t_sndzerowin++; + tp->t_flags |= TF_RXWIN0SENT; + } else + tp->t_flags &= ~TF_RXWIN0SENT; + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { + th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); + th->th_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ +#if 0 +#ifdef TCP_SIGNATURE + if (tp->t_flags & TF_SIGNATURE) { + int sigoff = to.to_signature - opt; + tcp_signature_compute(m, 0, len, optlen, + (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND); + } +#endif +#endif + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + //m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ + //m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); +//#ifdef INET6 +// if (isipv6) { + /* + * ip6_plen is not need to be filled now, and will be filled + * in ip6_output. + */ + //m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + //th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + + //optlen + len, IPPROTO_TCP, 0); +// } +//#endif +#if 0 // THIS IS IPv6! +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + { + m->m_pkthdr.csum_flags = CSUM_TCP; + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); + + /* IP version must be set here for ipv4/ipv6 checking later */ + KASSERT(ip->ip_v == IPVERSION, + ("%s: IP version incorrect: %d", __func__, ip->ip_v)); + } +#endif +#endif +#if 0 // No TCP Segment Offloading + /* + * Enable TSO and specify the size of the segments. + * The TCP pseudo header checksum is always provided. + */ + if (tso) { + KASSERT(len > tp->t_maxopd - optlen, + ("%s: len <= tso_segsz", __func__)); + m->m_pkthdr.csum_flags |= CSUM_TSO; + m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; + } +#endif +#if 0 // THIS CHECK IS IRRELEVANT SINCE WE AREN'T USING MBUFS +#ifdef IPSEC + KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), + ("%s: mbuf chain shorter than expected: %ld + %u + %u - %u != %u", + __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); +#else + KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL), + ("%s: mbuf chain shorter than expected: %ld + %u + %u != %u", + __func__, len, hdrlen, ipoptlen, m_length(m, NULL))); +#endif +#endif +#if 0 + /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */ + hhook_run_tcp_est_out(tp, th, &to, len, tso); +#endif +#if 0 +#ifdef TCPDEBUG + /* + * Trace. + */ + if (so->so_options & SO_DEBUG) { + u_short save = 0; +#ifdef INET6 + if (!isipv6) +#endif + { + save = ipov->ih_len; + ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + (th->th_off << 2) */); + } + tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0); +#ifdef INET6 + if (!isipv6) +#endif + ipov->ih_len = save; + } +#endif /* TCPDEBUG */ +#endif +/* REMOVING ALL SDT PROBES + TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); +*/ + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + /* + * m->m_pkthdr.len should have been set before checksum calculation, + * because in6_cksum() need it. + */ +//#ifdef INET6 +// if (isipv6) { +#if 0 + struct route_in6 ro; + + bzero(&ro, sizeof(ro)); + /* + * we separately set hoplimit for every segment, since the + * user might want to change the value via setsockopt. + * Also, desired default hop limit might be changed via + * Neighbor Discovery. + */ + ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL); +#endif + /* + * Set the packet size here for the benefit of DTrace probes. + * ip6_output() will set it properly; it's supposed to include + * the option header lengths as well. + */ + ip6->ip6_plen = htons(len + optlen + sizeof(struct tcphdr)); +#if 0 // THIS SEEMS OPTIONAL, SO I'M GETTING RID OF IT + if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) + tp->t_flags2 |= TF2_PLPMTU_PMTUD; + else + tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; +#endif + +// if (tp->t_state == TCPS_SYN_SENT) +// TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th); + +// TCP_PROBE5(send, NULL, tp, ip6, tp, th); +#if 0 +#ifdef TCPPCAP + /* Save packet, if requested. */ + tcp_pcap_add(th, m, &(tp->t_outpkts)); +#endif +#endif +#if 0 + /* TODO: IPv6 IP6TOS_ECT bit on */ + error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro, + ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), + NULL, NULL, tp->t_inpcb); + + if (error == EMSGSIZE && ro.ro_rt != NULL) + mtu = ro.ro_rt->rt_mtu; + RO_RTFREE(&ro); +#endif + send_message(ip6snip); +#if 0 // The TinyOS code + // Send packet the TinyOS way + send_message(tp, msg, th, len + optlen + sizeof(struct tcphdr)); + ip_free(bufreal); + + if (len) { + // Restore the iovecs + memcpy(&startptr->iov, &startvec, sizeof(struct ip_iovec)); + memcpy(&endptr->iov, &endvec, sizeof(struct ip_iovec)); + } +#endif +// } +//#endif /* INET6 */ +#if 0 +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + struct route ro; + + bzero(&ro, sizeof(ro)); + ip->ip_len = htons(m->m_pkthdr.len); +#ifdef INET6 + if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO) + ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL); +#endif /* INET6 */ + /* + * If we do path MTU discovery, then we set DF on every packet. + * This might not be the best thing to do according to RFC3390 + * Section 2. However the tcp hostcache migitates the problem + * so it affects only the first tcp connection with a host. + * + * NB: Don't set DF on small MTU/MSS to have a safe fallback. + */ + if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) { + ip->ip_off |= htons(IP_DF); + tp->t_flags2 |= TF2_PLPMTU_PMTUD; + } else { + tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; + } + + if (tp->t_state == TCPS_SYN_SENT) + TCP_PROBE5(connect__request, NULL, tp, ip, tp, th); + + TCP_PROBE5(send, NULL, tp, ip, tp, th); + +#if 0 +#ifdef TCPPCAP + /* Save packet, if requested. */ + tcp_pcap_add(th, m, &(tp->t_outpkts)); +#endif +#endif + error = ip_output(m, tp->t_inpcb->inp_options, &ro, + ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, + tp->t_inpcb); + + if (error == EMSGSIZE && ro.ro_rt != NULL) + mtu = ro.ro_rt->rt_mtu; + RO_RTFREE(&ro); + } +#endif /* INET */ +#endif +out: + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if ((tp->t_flags & TF_FORCEDATA) == 0 || + !tcp_timer_active(tp, TT_PERSIST)) { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) { + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + } + if (sack_rxmit) + goto timer; + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtttime == 0) { + tp->t_rtttime = ticks; + tp->t_rtseq = startseq; +// TCPSTAT_INC(tcps_segstimed); + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing a pure ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ +timer: + if (!tcp_timer_active(tp, TT_REXMT) && + ((sack_rxmit && tp->snd_nxt != tp->snd_max) || + (tp->snd_nxt != tp->snd_una))) { + if (tcp_timer_active(tp, TT_PERSIST)) { + tcp_timer_activate(tp, TT_PERSIST, 0); + tp->t_rxtshift = 0; + } + tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + } else if (len == 0 && /*sbavail(&so->so_snd)*/lbuf_used_space(&tp->sendbuf) && + !tcp_timer_active(tp, TT_REXMT) && + !tcp_timer_active(tp, TT_PERSIST)) { + /* + * Avoid a situation where we do not set persist timer + * after a zero window condition. For example: + * 1) A -> B: packet with enough data to fill the window + * 2) B -> A: ACK for #1 + new data (0 window + * advertisement) + * 3) A -> B: ACK for #2, 0 len packet + * + * In this case, A will not activate the persist timer, + * because it chose to send a packet. Unless tcp_output + * is called for some other reason (delayed ack timer, + * another input packet from B, socket syscall), A will + * not send zero window probes. + * + * So, if you send a 0-length packet, but there is data + * in the socket buffer, and neither the rexmt or + * persist timer is already set, then activate the + * persist timer. + */ + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + } else { + /* + * Persist case, update snd_max but since we are in + * persist mode (no window) we do not update snd_nxt. + */ + int xlen = len; + if (flags & TH_SYN) + ++xlen; + if (flags & TH_FIN) { + ++xlen; + tp->t_flags |= TF_SENTFIN; + } + if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + } + + if (error) { + + /* + * We know that the packet was lost, so back out the + * sequence number advance, if any. + * + * If the error is EPERM the packet got blocked by the + * local firewall. Normally we should terminate the + * connection but the blocking may have been spurious + * due to a firewall reconfiguration cycle. So we treat + * it like a packet loss and let the retransmit timer and + * timeouts do their work over time. + * XXX: It is a POLA question whether calling tcp_drop right + * away would be the really correct behavior instead. + */ + if (((tp->t_flags & TF_FORCEDATA) == 0 || + !tcp_timer_active(tp, TT_PERSIST)) && + ((flags & TH_SYN) == 0) && + (error != EPERM)) { + if (sack_rxmit) { + p->rxmit -= len; + tp->sackhint.sack_bytes_rexmit -= len; + KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, + ("sackhint bytes rtx >= 0")); + } else + tp->snd_nxt -= len; + } + //SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */ + switch (error) { + case EPERM: + tp->t_softerror = error; + return (error); + case ENOBUFS: + if (!tcp_timer_active(tp, TT_REXMT) && + !tcp_timer_active(tp, TT_PERSIST)) + tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); + tp->snd_cwnd = tp->t_maxseg; + return (0); + case EMSGSIZE: + /* + * For some reason the interface we used initially + * to send segments changed to another or lowered + * its MTU. + * If TSO was active we either got an interface + * without TSO capabilits or TSO was turned off. + * If we obtained mtu from ip_output() then update + * it and try again. + */ +#if 0 + if (tso) + tp->t_flags &= ~TF_TSO; +#endif + if (mtu != 0) { + tcp_mss_update(tp, -1, mtu, NULL, NULL); + goto again; + } + return (error); + case EHOSTDOWN: + case EHOSTUNREACH: + case ENETDOWN: + case ENETUNREACH: + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_softerror = error; + return (0); + } + /* FALLTHROUGH */ + default: + return (error); + } + } + //TCPSTAT_INC(tcps_sndtotal); + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + recwin; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); + if (tcp_timer_active(tp, TT_DELACK)) + tcp_timer_activate(tp, TT_DELACK, 0); +#if 0 + /* + * This completely breaks TCP if newreno is turned on. What happens + * is that if delayed-acks are turned on on the receiver, this code + * on the transmitter effectively destroys the TCP window, forcing + * it to four packets (1.5Kx4 = 6K window). + */ + if (sendalot && --maxburst) + goto again; +#endif + if (sendalot) + goto again; + return (0); +} + +/* + * Insert TCP options according to the supplied parameters to the place + * optp in a consistent way. Can handle unaligned destinations. + * + * The order of the option processing is crucial for optimal packing and + * alignment for the scarce option space. + * + * The optimal order for a SYN/SYN-ACK segment is: + * MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) + + * Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40. + * + * The SACK options should be last. SACK blocks consume 8*n+2 bytes. + * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks). + * At minimum we need 10 bytes (to generate 1 SACK block). If both + * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present, + * we only have 10 bytes for SACK options (40 - (12 + 18)). + */ +int +tcp_addoptions(struct tcpopt *to, u_char *optp) +{ + u_int mask, optlen = 0; + + for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) { + if ((to->to_flags & mask) != mask) + continue; + if (optlen == TCP_MAXOLEN) + break; + switch (to->to_flags & mask) { + case TOF_MSS: + while (optlen % 4) { + optlen += TCPOLEN_NOP; + *optp++ = TCPOPT_NOP; + } + if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG) + continue; + optlen += TCPOLEN_MAXSEG; + *optp++ = TCPOPT_MAXSEG; + *optp++ = TCPOLEN_MAXSEG; + to->to_mss = htons(to->to_mss); + bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss)); + optp += sizeof(to->to_mss); + break; + case TOF_SCALE: + while (!optlen || optlen % 2 != 1) { + optlen += TCPOLEN_NOP; + *optp++ = TCPOPT_NOP; + } + if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW) + continue; + optlen += TCPOLEN_WINDOW; + *optp++ = TCPOPT_WINDOW; + *optp++ = TCPOLEN_WINDOW; + *optp++ = to->to_wscale; + break; + case TOF_SACKPERM: + while (optlen % 2) { + optlen += TCPOLEN_NOP; + *optp++ = TCPOPT_NOP; + } + if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED) + continue; + optlen += TCPOLEN_SACK_PERMITTED; + *optp++ = TCPOPT_SACK_PERMITTED; + *optp++ = TCPOLEN_SACK_PERMITTED; + break; + case TOF_TS: + while (!optlen || optlen % 4 != 2) { + optlen += TCPOLEN_NOP; + *optp++ = TCPOPT_NOP; + } + if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP) + continue; + optlen += TCPOLEN_TIMESTAMP; + *optp++ = TCPOPT_TIMESTAMP; + *optp++ = TCPOLEN_TIMESTAMP; + to->to_tsval = htonl(to->to_tsval); + to->to_tsecr = htonl(to->to_tsecr); + bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval)); + optp += sizeof(to->to_tsval); + bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr)); + optp += sizeof(to->to_tsecr); + break; + case TOF_SIGNATURE: + { + int siglen = TCPOLEN_SIGNATURE - 2; + + while (!optlen || optlen % 4 != 2) { + optlen += TCPOLEN_NOP; + *optp++ = TCPOPT_NOP; + } + if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) + continue; + optlen += TCPOLEN_SIGNATURE; + *optp++ = TCPOPT_SIGNATURE; + *optp++ = TCPOLEN_SIGNATURE; + to->to_signature = optp; + while (siglen--) + *optp++ = 0; + break; + } + case TOF_SACK: + { + int sackblks = 0; + struct sackblk *sack = (struct sackblk *)to->to_sacks; + tcp_seq sack_seq; + + while (!optlen || optlen % 4 != 2) { + optlen += TCPOLEN_NOP; + *optp++ = TCPOPT_NOP; + } + if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK) + continue; + optlen += TCPOLEN_SACKHDR; + *optp++ = TCPOPT_SACK; + sackblks = min(to->to_nsacks, + (TCP_MAXOLEN - optlen) / TCPOLEN_SACK); + *optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK; + while (sackblks--) { + sack_seq = htonl(sack->start); + bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq)); + optp += sizeof(sack_seq); + sack_seq = htonl(sack->end); + bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq)); + optp += sizeof(sack_seq); + optlen += TCPOLEN_SACK; + sack++; + } +// TCPSTAT_INC(tcps_sack_send_blocks); + break; + } + default: + /*panic(*/printf("PANIC: %s: unknown TCP option type", __func__); + break; + } + } + + /* Terminate and pad TCP options to a 4 byte boundary. */ + if (optlen % 4) { + optlen += TCPOLEN_EOL; + *optp++ = TCPOPT_EOL; + } + /* + * According to RFC 793 (STD0007): + * "The content of the header beyond the End-of-Option option + * must be header padding (i.e., zero)." + * and later: "The padding is composed of zeros." + */ + while (optlen % 4) { + optlen += TCPOLEN_PAD; + *optp++ = TCPOPT_PAD; + } + + KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__)); + return (optlen); +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_reass.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_reass.c new file mode 100644 index 000000000000..6e6333e9b558 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_reass.c @@ -0,0 +1,318 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 + */ + +#include "../gnrc_tcp_freebsd_internal.h" +#include "../lib/bitmap.h" +#include "../lib/cbuf.h" +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_var.h" + +/* Sam: Segments are only reassembled within the window; data outside the window + is thrown away. So, the total amount of reassembly data cannot exceed the + size of the receive window. + The receive window is just the amount of empty space in the receive buffer, and + the receive buffer is statically allocated and is of known size. So, I am just + going to use the empty space in the receive buffer for segment reassembly. A + bitmap keeps track of which bytes represent partial segments, and which ones are + free space. + + I've kept the original function for reference, but I rewrote it to use my data + structure for the reassembly buffer. + + Looking at the usage of this function in tcp_input, this just has to set *tlenp + to 0 if the received segment is already completely buffered; it does not need + to update it if only part of the segment is trimmed off. */ + +int +tcp_reass(struct tcpcb* tp, struct tcphdr* th, int* tlenp, uint8_t* data, uint8_t* signals) +{ + size_t mergeable, written; + size_t offset; + size_t start_index; + //int added_fin; + size_t usedbefore; + int tlen = *tlenp; + size_t merged = 0; + int flags = 0; + + /* + * Call with th==NULL after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (th == NULL) + goto present; + + /* Insert the new segment queue entry into place. */ + KASSERT(SEQ_GEQ(th->th_seq, tp->rcv_nxt), ("Adding past segment to the reassembly queue\n")); + offset = (size_t) (th->th_seq - tp->rcv_nxt); + + if (cbuf_reass_count_set(&tp->recvbuf, (size_t) offset, tp->reassbmp, tlen) >= tlen) { + *tlenp = 0; + goto present; + } + written = cbuf_reass_write(&tp->recvbuf, (size_t) offset, data, tlen, tp->reassbmp, &start_index); + + if ((th->th_flags & TH_FIN) && (tp->reass_fin_index == -1)) { + tp->reass_fin_index = (int16_t) (start_index + tlen); + } + KASSERT(written == tlen, ("Reassembly write out of bounds: tried to write %d, but wrote %d\n", tlen, (int) written)); + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + mergeable = cbuf_reass_count_set(&tp->recvbuf, 0, tp->reassbmp, (size_t) 0xFFFFFFFF); + usedbefore = cbuf_used_space(&tp->recvbuf); + if (!tpiscantrcv(tp) || usedbefore == 0) { + /* If usedbefore == 0, but we can't receive more, then we still need to move the buffer + along by merging and then popping, in case we receive a FIN later on. */ + if (tp->reass_fin_index >= 0 && cbuf_reass_within_offset(&tp->recvbuf, mergeable, (size_t) tp->reass_fin_index)) { + tp->reass_fin_index = -2; // So we won't consider any more FINs + flags = TH_FIN; + } + merged = cbuf_reass_merge(&tp->recvbuf, mergeable, tp->reassbmp); + KASSERT(merged == mergeable, ("Reassembly merge out of bounds: tried to merge %d, but merged %d\n", (int) mergeable, (int) merged)); + if (tpiscantrcv(tp)) { + cbuf_pop(&tp->recvbuf, merged); // So no data really enters the buffer + } else if (usedbefore == 0 && merged > 0) { + *signals |= SIG_RECVBUF_NOTEMPTY; + } + } else { + /* If there is data in the buffer AND we can't receive more, then that must be because we received a FIN, + but the user hasn't yet emptied the buffer of its contents. */ + KASSERT (tp->reass_fin_index == -2, ("Can't receive more, and data in buffer, but haven't received a FIN\n")); + } + + tp->rcv_nxt += mergeable; + + return flags; +} +#if 0 +int +tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m) +{ + struct tseg_qent *q; + struct tseg_qent *p = NULL; + struct tseg_qent *nq; + struct tseg_qent *te = NULL; + struct socket *so = tp->t_inpcb->inp_socket; + char *s = NULL; + int flags; + struct tseg_qent tqs; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + /* + * XXX: tcp_reass() is rather inefficient with its data structures + * and should be rewritten (see NetBSD for optimizations). + */ + + /* + * Call with th==NULL after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (th == NULL) + goto present; + + /* + * Limit the number of segments that can be queued to reduce the + * potential for mbuf exhaustion. For best performance, we want to be + * able to queue a full window's worth of segments. The size of the + * socket receive buffer determines our advertised window and grows + * automatically when socket buffer autotuning is enabled. Use it as the + * basis for our queue limit. + * Always let the missing segment through which caused this queue. + * NB: Access to the socket buffer is left intentionally unlocked as we + * can tolerate stale information here. + * + * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat + * should work but causes packets to be dropped when they shouldn't. + * Investigate why and re-evaluate the below limit after the behaviour + * is understood. + */ + if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) && + tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) { + TCPSTAT_INC(tcps_rcvreassfull); + *tlenp = 0; + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: queue limit reached, " + "segment dropped\n", s, __func__); + free(s, M_TCPLOG); + } + m_freem(m); + return (0); + } + + /* + * Allocate a new queue entry. If we can't, or hit the zone limit + * just drop the pkt. + * + * Use a temporary structure on the stack for the missing segment + * when the zone is exhausted. Otherwise we may get stuck. + */ + te = uma_zalloc(tcp_reass_zone, M_NOWAIT); + if (te == NULL) { + if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) { + TCPSTAT_INC(tcps_rcvmemdrop); + m_freem(m); + *tlenp = 0; + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, + NULL))) { + log(LOG_DEBUG, "%s; %s: global zone limit " + "reached, segment dropped\n", s, __func__); + free(s, M_TCPLOG); + } + return (0); + } else { + bzero(&tqs, sizeof(struct tseg_qent)); + te = &tqs; + if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, + NULL))) { + log(LOG_DEBUG, + "%s; %s: global zone limit reached, using " + "stack for missing segment\n", s, __func__); + free(s, M_TCPLOG); + } + } + } + tp->t_segqlen++; + + /* + * Find a segment which begins after this one does. + */ + LIST_FOREACH(q, &tp->t_segq, tqe_q) { + if (SEQ_GT(q->tqe_th->th_seq, th->th_seq)) + break; + p = q; + } + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (p != NULL) { + int i; + /* conversion to int (in i) handles seq wraparound */ + i = p->tqe_th->th_seq + p->tqe_len - th->th_seq; + if (i > 0) { + if (i >= *tlenp) { + TCPSTAT_INC(tcps_rcvduppack); + TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp); + m_freem(m); + if (te != &tqs) + uma_zfree(tcp_reass_zone, te); + tp->t_segqlen--; + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ + } + m_adj(m, i); + *tlenp -= i; + th->th_seq += i; + } + } + tp->t_rcvoopack++; + TCPSTAT_INC(tcps_rcvoopack); + TCPSTAT_ADD(tcps_rcvoobyte, *tlenp); + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q) { + int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq; + if (i <= 0) + break; + if (i < q->tqe_len) { + q->tqe_th->th_seq += i; + q->tqe_len -= i; + m_adj(q->tqe_m, i); + break; + } + + nq = LIST_NEXT(q, tqe_q); + LIST_REMOVE(q, tqe_q); + m_freem(q->tqe_m); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + q = nq; + } + + /* Insert the new segment queue entry into place. */ + te->tqe_m = m; + te->tqe_th = th; + te->tqe_len = *tlenp; + + if (p == NULL) { + LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q); + } else { + KASSERT(te != &tqs, ("%s: temporary stack based entry not " + "first element in queue", __func__)); + LIST_INSERT_AFTER(p, te, tqe_q); + } + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (!TCPS_HAVEESTABLISHED(tp->t_state)) + return (0); + q = LIST_FIRST(&tp->t_segq); + if (!q || q->tqe_th->th_seq != tp->rcv_nxt) + return (0); + SOCKBUF_LOCK(&so->so_rcv); + do { + tp->rcv_nxt += q->tqe_len; + flags = q->tqe_th->th_flags & TH_FIN; + nq = LIST_NEXT(q, tqe_q); + LIST_REMOVE(q, tqe_q); + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + m_freem(q->tqe_m); + else + sbappendstream_locked(&so->so_rcv, q->tqe_m, 0); + if (q != &tqs) + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + q = nq; + } while (q && q->tqe_th->th_seq == tp->rcv_nxt); + sorwakeup_locked(so); + return (flags); +} +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_sack.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_sack.c new file mode 100644 index 000000000000..55687b760b9c --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_sack.c @@ -0,0 +1,733 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95 + */ + +/*- + * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995 + * + * NRL grants permission for redistribution and use in source and binary + * forms, with or without modification, of the software and documentation + * created at NRL provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgements: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * This product includes software developed at the Information + * Technology Division, US Naval Research Laboratory. + * 4. Neither the name of the NRL nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS + * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation + * are those of the authors and should not be interpreted as representing + * official policies, either expressed or implied, of the US Naval + * Research Laboratory (NRL). + */ +#if 0 +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_tcpdebug.h" + +#include +#include +#include +#include +#include +#include +#include /* for proc0 declaration */ +#include +#include +#include +#include +#include + +#include /* before tcp_seq.h, for tcp_random18() */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef TCPDEBUG +#include +#endif /* TCPDEBUG */ + +#include + +VNET_DECLARE(struct uma_zone *, sack_hole_zone); +#define V_sack_hole_zone VNET(sack_hole_zone) + +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); +VNET_DEFINE(int, tcp_do_sack) = 1; +#define V_tcp_do_sack VNET(tcp_do_sack) +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(tcp_do_sack), 0, "Enable/Disable TCP SACK support"); + +VNET_DEFINE(int, tcp_sack_maxholes) = 128; +#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(tcp_sack_maxholes), 0, + "Maximum number of TCP SACK holes allowed per connection"); + +VNET_DEFINE(int, tcp_sack_globalmaxholes) = 65536; +#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(tcp_sack_globalmaxholes), 0, + "Global maximum number of TCP SACK holes"); + +VNET_DEFINE(int, tcp_sack_globalholes) = 0; +#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) +SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD, + &VNET_NAME(tcp_sack_globalholes), 0, + "Global number of TCP SACK holes currently allocated"); +#endif + +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "sys/queue.h" + +enum tcp_sack_consts { + V_tcp_sack_maxholes = MAX_SACKHOLES +}; +/* Don't need these constants, since I have an explicit pool. */ +#if 0 +const int V_tcp_sack_globalmaxholes = 65536; +int V_tcp_sack_globalholes = 0; +#endif + +/* Pool of SACK holes, used for allocation. */ +struct sackhole sackhole_pool[SACKHOLE_POOL_SIZE]; +uint8_t sackhole_bmp[SACKHOLE_BMP_SIZE]; + +/* Initialize the pool of SACK holes. */ +void +tcp_sack_init(void) +{ + bmp_init(sackhole_bmp, SACKHOLE_BMP_SIZE); +} + +struct sackhole* sackhole_alloc(void) { + size_t freeindex = bmp_countset(sackhole_bmp, SACKHOLE_BMP_SIZE, 0, SACKHOLE_BMP_SIZE); + if (freeindex >= SACKHOLE_BMP_SIZE) { + return NULL; // all sackholes are allocated already! + } + bmp_setrange(sackhole_bmp, freeindex, 1); + return &sackhole_pool[freeindex]; +} + +void sackhole_free(struct sackhole* tofree) { + size_t freeindex = (size_t) (tofree - &sackhole_pool[0]); + KASSERT(tofree == &sackhole_pool[freeindex], ("sackhole pool unaligned\n")); + bmp_clrrange(sackhole_bmp, freeindex, 1); +} + + +/* + * This function is called upon receipt of new valid data (while not in + * header prediction mode), and it updates the ordered list of sacks. + */ +void +tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) +{ + /* + * First reported block MUST be the most recent one. Subsequent + * blocks SHOULD be in the order in which they arrived at the + * receiver. These two conditions make the implementation fully + * compliant with RFC 2018. + */ + struct sackblk head_blk, saved_blks[MAX_SACK_BLKS]; + int num_head, num_saved, i; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + + /* Check arguments. */ + KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end")); + + /* SACK block for the received segment. */ + head_blk.start = rcv_start; + head_blk.end = rcv_end; + + /* + * Merge updated SACK blocks into head_blk, and save unchanged SACK + * blocks into saved_blks[]. num_saved will have the number of the + * saved SACK blocks. + */ + num_saved = 0; + for (i = 0; i < tp->rcv_numsacks; i++) { + tcp_seq start = tp->sackblks[i].start; + tcp_seq end = tp->sackblks[i].end; + if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) { + /* + * Discard this SACK block. + */ + } else if (SEQ_LEQ(head_blk.start, end) && + SEQ_GEQ(head_blk.end, start)) { + /* + * Merge this SACK block into head_blk. This SACK + * block itself will be discarded. + */ + if (SEQ_GT(head_blk.start, start)) + head_blk.start = start; + if (SEQ_LT(head_blk.end, end)) + head_blk.end = end; + } else { + /* + * Save this SACK block. + */ + saved_blks[num_saved].start = start; + saved_blks[num_saved].end = end; + num_saved++; + } + } + + /* + * Update SACK list in tp->sackblks[]. + */ + num_head = 0; + if (SEQ_GT(head_blk.start, tp->rcv_nxt)) { + /* + * The received data segment is an out-of-order segment. Put + * head_blk at the top of SACK list. + */ + tp->sackblks[0] = head_blk; + num_head = 1; + /* + * If the number of saved SACK blocks exceeds its limit, + * discard the last SACK block. + */ + if (num_saved >= MAX_SACK_BLKS) + num_saved--; + } + if (num_saved > 0) { + /* + * Copy the saved SACK blocks back. + */ + bcopy(saved_blks, &tp->sackblks[num_head], + sizeof(struct sackblk) * num_saved); + } + + /* Save the number of SACK blocks. */ + tp->rcv_numsacks = num_head + num_saved; +} + +/* + * Delete all receiver-side SACK information. + */ +void +tcp_clean_sackreport(struct tcpcb *tp) +{ + int i; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + tp->rcv_numsacks = 0; + for (i = 0; i < MAX_SACK_BLKS; i++) + tp->sackblks[i].start = tp->sackblks[i].end=0; +} + +/* + * Allocate struct sackhole. + */ +static struct sackhole * +tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) +{ + struct sackhole *hole; + + if (tp->snd_numholes >= V_tcp_sack_maxholes/* || + V_tcp_sack_globalholes >= V_tcp_sack_globalmaxholes*/) { +// TCPSTAT_INC(tcps_sack_sboverflow); + return NULL; + } + +// hole = (struct sackhole *)uma_zalloc(V_sack_hole_zone, M_NOWAIT); + hole = sackhole_alloc(); + if (hole == NULL) + return NULL; + + hole->start = start; + hole->end = end; + hole->rxmit = start; + + tp->snd_numholes++; +// atomic_add_int(&V_tcp_sack_globalholes, 1); + + return hole; +} + +/* + * Free struct sackhole. + */ +static void +tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole) +{ + +// uma_zfree(V_sack_hole_zone, hole); + sackhole_free(hole); + + tp->snd_numholes--; +// atomic_subtract_int(&V_tcp_sack_globalholes, 1); + + KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes >= 0")); +// KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes >= 0")); +} + +/* + * Insert new SACK hole into scoreboard. + */ +static struct sackhole * +tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, + struct sackhole *after) +{ + struct sackhole *hole; + + /* Allocate a new SACK hole. */ + hole = tcp_sackhole_alloc(tp, start, end); + if (hole == NULL) + return NULL; + + /* Insert the new SACK hole into scoreboard. */ + if (after != NULL) + TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink); + else + TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink); + + /* Update SACK hint. */ + if (tp->sackhint.nexthole == NULL) + tp->sackhint.nexthole = hole; + + return hole; +} + +/* + * Remove SACK hole from scoreboard. + */ +static void +tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) +{ + + /* Update SACK hint. */ + if (tp->sackhint.nexthole == hole) + tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink); + + /* Remove this SACK hole. */ + TAILQ_REMOVE(&tp->snd_holes, hole, scblink); + + /* Free this SACK hole. */ + tcp_sackhole_free(tp, hole); +} + +/* + * Process cumulative ACK and the TCP SACK option to update the scoreboard. + * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of + * the sequence space). + */ +void +tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) +{ + struct sackhole *cur, *temp; + struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; + int i, j, num_sack_blks; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + + num_sack_blks = 0; + /* + * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, + * treat [SND.UNA, SEG.ACK) as if it is a SACK block. + */ + if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) { + sack_blocks[num_sack_blks].start = tp->snd_una; + sack_blocks[num_sack_blks++].end = th_ack; + } + /* + * Append received valid SACK blocks to sack_blocks[], but only if we + * received new blocks from the other side. + */ + if (to->to_flags & TOF_SACK) { + for (i = 0; i < to->to_nsacks; i++) { + bcopy((to->to_sacks + i * TCPOLEN_SACK), + &sack, sizeof(sack)); + sack.start = ntohl(sack.start); + sack.end = ntohl(sack.end); + if (SEQ_GT(sack.end, sack.start) && + SEQ_GT(sack.start, tp->snd_una) && + SEQ_GT(sack.start, th_ack) && + SEQ_LT(sack.start, tp->snd_max) && + SEQ_GT(sack.end, tp->snd_una) && + SEQ_LEQ(sack.end, tp->snd_max)) + sack_blocks[num_sack_blks++] = sack; + } + } + /* + * Return if SND.UNA is not advanced and no valid SACK block is + * received. + */ + if (num_sack_blks == 0) + return; + + /* + * Sort the SACK blocks so we can update the scoreboard with just one + * pass. The overhead of sorting upto 4+1 elements is less than + * making upto 4+1 passes over the scoreboard. + */ + for (i = 0; i < num_sack_blks; i++) { + for (j = i + 1; j < num_sack_blks; j++) { + if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { + sack = sack_blocks[i]; + sack_blocks[i] = sack_blocks[j]; + sack_blocks[j] = sack; + } + } + } + if (TAILQ_EMPTY(&tp->snd_holes)) + /* + * Empty scoreboard. Need to initialize snd_fack (it may be + * uninitialized or have a bogus value). Scoreboard holes + * (from the sack blocks received) are created later below + * (in the logic that adds holes to the tail of the + * scoreboard). + */ + tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); + /* + * In the while-loop below, incoming SACK blocks (sack_blocks[]) and + * SACK holes (snd_holes) are traversed from their tails with just + * one pass in order to reduce the number of compares especially when + * the bandwidth-delay product is large. + * + * Note: Typically, in the first RTT of SACK recovery, the highest + * three or four SACK blocks with the same ack number are received. + * In the second RTT, if retransmitted data segments are not lost, + * the highest three or four SACK blocks with ack number advancing + * are received. + */ + sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ + tp->sackhint.last_sack_ack = sblkp->end; + if (SEQ_LT(tp->snd_fack, sblkp->start)) { + /* + * The highest SACK block is beyond fack. Append new SACK + * hole at the tail. If the second or later highest SACK + * blocks are also beyond the current fack, they will be + * inserted by way of hole splitting in the while-loop below. + */ + temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); + if (temp != NULL) { + tp->snd_fack = sblkp->end; + /* Go to the previous sack block. */ + sblkp--; + } else { + /* + * We failed to add a new hole based on the current + * sack block. Skip over all the sack blocks that + * fall completely to the right of snd_fack and + * proceed to trim the scoreboard based on the + * remaining sack blocks. This also trims the + * scoreboard for th_ack (which is sack_blocks[0]). + */ + while (sblkp >= sack_blocks && + SEQ_LT(tp->snd_fack, sblkp->start)) + sblkp--; + if (sblkp >= sack_blocks && + SEQ_LT(tp->snd_fack, sblkp->end)) + tp->snd_fack = sblkp->end; + } + } else if (SEQ_LT(tp->snd_fack, sblkp->end)) + /* fack is advanced. */ + tp->snd_fack = sblkp->end; + /* We must have at least one SACK hole in scoreboard. */ + KASSERT(!TAILQ_EMPTY(&tp->snd_holes), + ("SACK scoreboard must not be empty")); + cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ + /* + * Since the incoming sack blocks are sorted, we can process them + * making one sweep of the scoreboard. + */ + while (sblkp >= sack_blocks && cur != NULL) { + if (SEQ_GEQ(sblkp->start, cur->end)) { + /* + * SACKs data beyond the current hole. Go to the + * previous sack block. + */ + sblkp--; + continue; + } + if (SEQ_LEQ(sblkp->end, cur->start)) { + /* + * SACKs data before the current hole. Go to the + * previous hole. + */ + cur = TAILQ_PREV(cur, sackhole_head, scblink); + continue; + } + tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); + KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, + ("sackhint bytes rtx >= 0")); + if (SEQ_LEQ(sblkp->start, cur->start)) { + /* Data acks at least the beginning of hole. */ + if (SEQ_GEQ(sblkp->end, cur->end)) { + /* Acks entire hole, so delete hole. */ + temp = cur; + cur = TAILQ_PREV(cur, sackhole_head, scblink); + tcp_sackhole_remove(tp, temp); + /* + * The sack block may ack all or part of the + * next hole too, so continue onto the next + * hole. + */ + continue; + } else { + /* Move start of hole forward. */ + cur->start = sblkp->end; + cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); + } + } else { + /* Data acks at least the end of hole. */ + if (SEQ_GEQ(sblkp->end, cur->end)) { + /* Move end of hole backward. */ + cur->end = sblkp->start; + cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); + } else { + /* + * ACKs some data in middle of a hole; need + * to split current hole + */ + temp = tcp_sackhole_insert(tp, sblkp->end, + cur->end, cur); + if (temp != NULL) { + if (SEQ_GT(cur->rxmit, temp->rxmit)) { + temp->rxmit = cur->rxmit; + tp->sackhint.sack_bytes_rexmit + += (temp->rxmit + - temp->start); + } + cur->end = sblkp->start; + cur->rxmit = SEQ_MIN(cur->rxmit, + cur->end); + } + } + } + tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); + /* + * Testing sblkp->start against cur->start tells us whether + * we're done with the sack block or the sack hole. + * Accordingly, we advance one or the other. + */ + if (SEQ_LEQ(sblkp->start, cur->start)) + cur = TAILQ_PREV(cur, sackhole_head, scblink); + else + sblkp--; + } +} + +/* + * Free all SACK holes to clear the scoreboard. + */ +void +tcp_free_sackholes(struct tcpcb *tp) +{ + struct sackhole *q; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL) + tcp_sackhole_remove(tp, q); + tp->sackhint.sack_bytes_rexmit = 0; + + KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0")); + KASSERT(tp->sackhint.nexthole == NULL, + ("tp->sackhint.nexthole == NULL")); +} + +/* + * Partial ack handling within a sack recovery episode. Keeping this very + * simple for now. When a partial ack is received, force snd_cwnd to a value + * that will allow the sender to transmit no more than 2 segments. If + * necessary, a better scheme can be adopted at a later point, but for now, + * the goal is to prevent the sender from bursting a large amount of data in + * the midst of sack recovery. + */ +void +tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) +{ + int num_segs = 1; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + tcp_timer_activate(tp, TT_REXMT, 0); + tp->t_rtttime = 0; + /* Send one or 2 segments based on how much new data was acked. */ + if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) + num_segs = 2; + tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + + (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); +} + +#if 0 +/* + * Debug version of tcp_sack_output() that walks the scoreboard. Used for + * now to sanity check the hint. + */ +static struct sackhole * +tcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt) +{ + struct sackhole *p; + + INP_WLOCK_ASSERT(tp->t_inpcb); + *sack_bytes_rexmt = 0; + TAILQ_FOREACH(p, &tp->snd_holes, scblink) { + if (SEQ_LT(p->rxmit, p->end)) { + if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ + continue; + } + *sack_bytes_rexmt += (p->rxmit - p->start); + break; + } + *sack_bytes_rexmt += (p->rxmit - p->start); + } + return (p); +} +#endif + +/* + * Returns the next hole to retransmit and the number of retransmitted bytes + * from the scoreboard. We store both the next hole and the number of + * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK + * reception). This avoids scoreboard traversals completely. + * + * The loop here will traverse *at most* one link. Here's the argument. For + * the loop to traverse more than 1 link before finding the next hole to + * retransmit, we would need to have at least 1 node following the current + * hint with (rxmit == end). But, for all holes following the current hint, + * (start == rxmit), since we have not yet retransmitted from them. + * Therefore, in order to traverse more 1 link in the loop below, we need to + * have at least one node following the current hint with (start == rxmit == + * end). But that can't happen, (start == end) means that all the data in + * that hole has been sacked, in which case, the hole would have been removed + * from the scoreboard. + */ +struct sackhole * +tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) +{ + struct sackhole *hole = NULL; + +// INP_WLOCK_ASSERT(tp->t_inpcb); + *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit; + hole = tp->sackhint.nexthole; + if (hole == NULL || SEQ_LT(hole->rxmit, hole->end)) + goto out; + while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) { + if (SEQ_LT(hole->rxmit, hole->end)) { + tp->sackhint.nexthole = hole; + break; + } + } +out: + return (hole); +} + +/* + * After a timeout, the SACK list may be rebuilt. This SACK information + * should be used to avoid retransmitting SACKed data. This function + * traverses the SACK list to see if snd_nxt should be moved forward. + */ +void +tcp_sack_adjust(struct tcpcb *tp) +{ + struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes); + +// INP_WLOCK_ASSERT(tp->t_inpcb); + if (cur == NULL) + return; /* No holes */ + if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) + return; /* We're already beyond any SACKed blocks */ + /*- + * Two cases for which we want to advance snd_nxt: + * i) snd_nxt lies between end of one hole and beginning of another + * ii) snd_nxt lies between end of last hole and snd_fack + */ + while ((p = TAILQ_NEXT(cur, scblink)) != NULL) { + if (SEQ_LT(tp->snd_nxt, cur->end)) + return; + if (SEQ_GEQ(tp->snd_nxt, p->start)) + cur = p; + else { + tp->snd_nxt = p->start; + return; + } + } + if (SEQ_LT(tp->snd_nxt, cur->end)) + return; + tp->snd_nxt = tp->snd_fack; +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_seq.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_seq.h new file mode 100644 index 000000000000..7b4aee06e452 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_seq.h @@ -0,0 +1,101 @@ +/*- + * Copyright (c) 1982, 1986, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_seq.h 8.3 (Berkeley) 6/21/95 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_SEQ_H_ +#define _NETINET_TCP_SEQ_H_ + +#include "../gnrc_tcp_freebsd_internal.h" + +/* + * TCP sequence numbers are 32 bit integers operated + * on with modular arithmetic. These macros can be + * used to compare such integers. + */ +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +#define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b)) +#define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b)) + +/* for modulo comparisons of timestamps */ +#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) +#define TSTMP_GT(a,b) ((int)((a)-(b)) > 0) +#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \ + (tp)->snd_recover = (tp)->iss + +//#ifdef _KERNEL +/* + * Clock macros for RFC 1323 timestamps. + */ +#define TCP_TS_TO_TICKS(_t) ((_t) * hz / 1000) + +/* Timestamp wrap-around time, 24 days. */ +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000) + +/* + * tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323. + * We always use 1ms granularity independent of hz. + */ +static __inline u_int +tcp_ts_getticks(void) +{ +#if 0 // I don't have "getmicrouptime" + struct timeval tv; + u_long ms; + + /* + * getmicrouptime() should be good enough for any 1-1000ms granularity. + * Do not use getmicrotime() here as it might break nfsroot/tcp. + */ + getmicrouptime(&tv); + ms = tv.tv_sec * 1000 + tv.tv_usec / 1000; + + return (ms); +#endif + return get_millis(); +} +//#endif /* _KERNEL */ + +#endif /* _NETINET_TCP_SEQ_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_subr.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_subr.c new file mode 100644 index 000000000000..96696cc4aa78 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_subr.c @@ -0,0 +1,933 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 + */ + +#include + +#include "../gnrc_tcp_freebsd_internal.h" +#include "ip.h" +#include "ip6.h" +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_var.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "../lib/bitmap.h" +#include "../lib/cbuf.h" +#include "cc.h" +#include "../lib/lbuf.h" + +#include "tcp_const.h" + +#include "net/gnrc/pktbuf.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +/* EXTERN DECLARATIONS FROM TCP_TIMER.H */ +#if 0 // I put these in the enum below +int tcp_keepinit; /* time to establish connection */ +int tcp_keepidle; /* time before keepalive probes begin */ +int tcp_keepintvl; /* time between keepalive probes */ +//int tcp_keepcnt; /* number of keepalives */ +int tcp_delacktime; /* time before sending a delayed ACK */ +int tcp_maxpersistidle; +int tcp_rexmit_slop; +int tcp_msl; +//int tcp_ttl; /* time to live for TCP segs */ +int tcp_finwait2_timeout; +#endif +int tcp_rexmit_min; + +// A simple linear congruential number generator +tcp_seq seed = (tcp_seq) 0xbeaddeed; +tcp_seq tcp_new_isn(struct tcpcb* tp) { + seed = (((tcp_seq) 0xfaded011) * seed) + (tcp_seq) 0x1ead1eaf; + return seed; +} + +/* This is based on tcp_init in tcp_subr.c. */ +void tcp_init(void) { + // Added by Sam: Need to initialize the sackhole pool. + tcp_sack_init(); +#if 0 // I'M NOT USING A HASH TABLE TO STORE TCBS. + const char *tcbhash_tuneable; + int hashsize; + + tcbhash_tuneable = "net.inet.tcp.tcbhashsize"; + + if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, + &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register helper hook\n", __func__); + if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, + &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register helper hook\n", __func__); + + hashsize = TCBHASHSIZE; + TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); + if (hashsize == 0) { + /* + * Auto tune the hash size based on maxsockets. + * A perfect hash would have a 1:1 mapping + * (hashsize = maxsockets) however it's been + * suggested that O(2) average is better. + */ + hashsize = maketcp_hashsize(maxsockets / 4); + /* + * Our historical default is 512, + * do not autotune lower than this. + */ + if (hashsize < 512) + hashsize = 512; + if (bootverbose) + printf("%s: %s auto tuned to %d\n", __func__, + tcbhash_tuneable, hashsize); + } + /* + * We require a hashsize to be a power of two. + * Previously if it was not a power of two we would just reset it + * back to 512, which could be a nasty surprise if you did not notice + * the error message. + * Instead what we do is clip it to the closest power of two lower + * than the specified hash value. + */ + if (!powerof2(hashsize)) { + int oldhashsize = hashsize; + + hashsize = maketcp_hashsize(hashsize); + /* prevent absurdly low value */ + if (hashsize < 16) + hashsize = 16; + printf("%s: WARNING: TCB hash size not a power of 2, " + "clipped from %d to %d.\n", __func__, oldhashsize, + hashsize); + } + in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, + "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_4TUPLE); + + /* + * These have to be type stable for the benefit of the timers. + */ + V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(V_tcpcb_zone, maxsockets); + uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached"); + + tcp_tw_init(); + syncache_init(); + tcp_hc_init(); + + TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); + V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + + /* Skip initialization of globals for non-default instances. */ + if (!IS_DEFAULT_VNET(curvnet)) + return; + + tcp_reass_global_init(); +#endif + /* XXX virtualize those bellow? */ + +#if 0 // To save memory, I put these in an enum, defined above + tcp_delacktime = TCPTV_DELACK; + tcp_keepinit = TCPTV_KEEP_INIT; + tcp_keepidle = TCPTV_KEEP_IDLE; + tcp_keepintvl = TCPTV_KEEPINTVL; + tcp_maxpersistidle = TCPTV_KEEP_IDLE; + tcp_msl = TCPTV_MSL; +#endif + tcp_rexmit_min = TCPTV_MIN; + if (tcp_rexmit_min < 1) + tcp_rexmit_min = 1; +#if 0 + tcp_rexmit_slop = TCPTV_CPU_VAR; + tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; +#endif + //tcp_tcbhashsize = hashsize; + +#if 0 // Ignoring this for now (may bring it back later if necessary) + if (tcp_soreceive_stream) { +#ifdef INET + tcp_usrreqs.pru_soreceive = soreceive_stream; +#endif +#ifdef INET6 + tcp6_usrreqs.pru_soreceive = soreceive_stream; +#endif /* INET6 */ + } + +#ifdef INET6 +#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) +#else /* INET6 */ +#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) +#endif /* INET6 */ + if (max_protohdr < TCP_MINPROTOHDR) + max_protohdr = TCP_MINPROTOHDR; + if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) + panic("tcp_init"); +#undef TCP_MINPROTOHDR + + ISN_LOCK_INIT(); + EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, + SHUTDOWN_PRI_DEFAULT); + EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, + EVENTHANDLER_PRI_ANY); +#ifdef TCPPCAP + tcp_pcap_init(); +#endif +#endif +} + +/* + * A subroutine which makes it easy to track TCP state changes with DTrace. + * This function shouldn't be called for t_state initializations that don't + * correspond to actual TCP state transitions. + */ +void +tcp_state_change(struct tcpcb *tp, int newstate) +{ +#if 0 +#if defined(KDTRACE_HOOKS) + int pstate = tp->t_state; +#endif +#endif + DEBUG("Socket %d: %s --> %s\n", tp->index, tcpstates[tp->t_state], tcpstates[newstate]); + tp->t_state = newstate; +#if 0 + TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); +#endif +} + + /* This is based on tcp_newtcb in tcp_subr.c, and tcp_usr_attach in tcp_usrreq.c. + The length of the reassembly bitmap is fixed at ceil(0.125 * recvbuflen). */ +__attribute__((used)) void initialize_tcb(struct tcpcb* tp, uint16_t lport, uint8_t* recvbuf, size_t recvbuflen, uint8_t* reassbmp) { + uint32_t ticks = get_ticks(); + int initindex = tp->index; + + memset(tp, 0x00, sizeof(struct tcpcb)); + tp->reass_fin_index = -1; + tp->lport = lport; + tp->index = initindex; + // Congestion control algorithm. + + // I only implement New Reno, so I'm not going to waste memory in each socket describing what the congestion algorithm is; it's always New Reno +// CC_ALGO(tp) = CC_DEFAULT(); +// tp->ccv->type = IPPROTO_TCP; + tp->ccv->ccvc.tcp = tp; + + tp->t_maxseg = tp->t_maxopd = +//#ifdef INET6 + /*isipv6 ? */V_tcp_v6mssdflt /*:*/ +//#endif /* INET6 */ + /*V_tcp_mssdflt*/; + + if (V_tcp_do_rfc1323) + tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); + if (V_tcp_do_sack) + tp->t_flags |= TF_SACK_PERMIT; + TAILQ_INIT(&tp->snd_holes); + + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; + tp->t_rttmin = tcp_rexmit_min; + tp->t_rxtcur = TCPTV_RTOBASE; + tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->t_rcvtime = ticks; + + /* From tcp_usr_attach in tcp_usrreq.c. */ + tp->t_state = TCP6S_CLOSED; + + lbuf_init(&tp->sendbuf); + if (recvbuf) { + cbuf_init(&tp->recvbuf, recvbuf, recvbuflen); + tp->reassbmp = reassbmp; + bmp_init(tp->reassbmp, BITS_TO_BYTES(recvbuflen)); + } +} + +void +tcp_discardcb(struct tcpcb *tp) +{ + tcp_cancel_timers(tp); + + /* Allow the CC algorithm to clean up after itself. */ + if (CC_ALGO(tp)->cb_destroy != NULL) + CC_ALGO(tp)->cb_destroy(tp->ccv); + +// khelp_destroy_osd(tp->osd); + +// CC_ALGO(tp) = NULL; + + tcp_free_sackholes(tp); +#if 0 // Most of this is not applicable anymore. Above, I've copied the relevant parts. + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; +#ifdef INET6 + int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; +#endif /* INET6 */ + int released; + + INP_WLOCK_ASSERT(inp); + + /* + * Make sure that all of our timers are stopped before we delete the + * PCB. + * + * If stopping a timer fails, we schedule a discard function in same + * callout, and the last discard function called will take care of + * deleting the tcpcb. + */ + tcp_timer_stop(tp, TT_REXMT); + tcp_timer_stop(tp, TT_PERSIST); + tcp_timer_stop(tp, TT_KEEP); + tcp_timer_stop(tp, TT_2MSL); + tcp_timer_stop(tp, TT_DELACK); + + /* + * If we got enough samples through the srtt filter, + * save the rtt and rttvar in the routing entry. + * 'Enough' is arbitrarily defined as 4 rtt samples. + * 4 samples is enough for the srtt filter to converge + * to within enough % of the correct value; fewer samples + * and we could save a bogus rtt. The danger is not high + * as tcp quickly recovers from everything. + * XXX: Works very well but needs some more statistics! + */ + if (tp->t_rttupdated >= 4) { + struct hc_metrics_lite metrics; + u_long ssthresh; + + bzero(&metrics, sizeof(metrics)); + /* + * Update the ssthresh always when the conditions below + * are satisfied. This gives us better new start value + * for the congestion avoidance for new connections. + * ssthresh is only set if packet loss occured on a session. + * + * XXXRW: 'so' may be NULL here, and/or socket buffer may be + * being torn down. Ideally this code would not use 'so'. + */ + ssthresh = tp->snd_ssthresh; + if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { + /* + * convert the limit from user data bytes to + * packets then to packet data bytes. + */ + ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; + if (ssthresh < 2) + ssthresh = 2; + ssthresh *= (u_long)(tp->t_maxseg + +#ifdef INET6 + (isipv6 ? sizeof (struct ip6_hdr) + + sizeof (struct tcphdr) : +#endif + sizeof (struct tcpiphdr) +#ifdef INET6 + ) +#endif + ); + } else + ssthresh = 0; + metrics.rmx_ssthresh = ssthresh; + + metrics.rmx_rtt = tp->t_srtt; + metrics.rmx_rttvar = tp->t_rttvar; + metrics.rmx_cwnd = tp->snd_cwnd; + metrics.rmx_sendpipe = 0; + metrics.rmx_recvpipe = 0; + + tcp_hc_update(&inp->inp_inc, &metrics); + } + + /* free the reassembly queue, if any */ + tcp_reass_flush(tp); + +#ifdef TCP_OFFLOAD + /* Disconnect offload device, if any. */ + if (tp->t_flags & TF_TOE) + tcp_offload_detach(tp); +#endif + + tcp_free_sackholes(tp); + +#ifdef TCPPCAP + /* Free the TCP PCAP queues. */ + tcp_pcap_drain(&(tp->t_inpkts)); + tcp_pcap_drain(&(tp->t_outpkts)); +#endif + + /* Allow the CC algorithm to clean up after itself. */ + if (CC_ALGO(tp)->cb_destroy != NULL) + CC_ALGO(tp)->cb_destroy(tp->ccv); + + khelp_destroy_osd(tp->osd); + + CC_ALGO(tp) = NULL; + inp->inp_ppcb = NULL; + if ((tp->t_timers->tt_flags & TT_MASK) == 0) { + /* We own the last reference on tcpcb, let's free it. */ + tp->t_inpcb = NULL; + uma_zfree(V_tcpcb_zone, tp); + released = in_pcbrele_wlocked(inp); + KASSERT(!released, ("%s: inp %p should not have been released " + "here", __func__, inp)); + } +#endif +} + + + /* + * Attempt to close a TCP control block, marking it as dropped, and freeing + * the socket if we hold the only reference. + */ +struct tcpcb * +tcp_close(struct tcpcb *tp) +{ + // Seriously, it looks like this is all this function does, that I'm concerned with + tcp_state_change(tp, TCP6S_CLOSED); // for the print statement + tcp_discardcb(tp); + // Don't reset the TCB by calling initialize_tcb, since that overwrites the buffer contents. + return tp; +#if 0 + struct inpcb *inp = tp->t_inpcb; + struct socket *so; + + INP_INFO_LOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); + +#ifdef TCP_OFFLOAD + if (tp->t_state == TCPS_LISTEN) + tcp_offload_listen_stop(tp); +#endif + in_pcbdrop(inp); + TCPSTAT_INC(tcps_closed); + KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); + so = inp->inp_socket; + soisdisconnected(so); + if (inp->inp_flags & INP_SOCKREF) { + KASSERT(so->so_state & SS_PROTOREF, + ("tcp_close: !SS_PROTOREF")); + inp->inp_flags &= ~INP_SOCKREF; + INP_WUNLOCK(inp); + ACCEPT_LOCK(); + SOCK_LOCK(so); + so->so_state &= ~SS_PROTOREF; + sofree(so); + return (NULL); + } + return (tp); +#endif +} + +/* + * Create template to be used to send tcp packets on a connection. + * Allocates an mbuf and fills in a skeletal tcp/ip header. The only + * use for this function is in keepalives, which use tcp_respond. + */ + // NOTE: I CHANGED THE SIGNATURE OF THIS FUNCTION +void +tcpip_maketemplate(struct tcpcb* tp, struct tcptemp* t) +{ + //struct tcptemp *t; +#if 0 + t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); +#endif + //t = ip_malloc(sizeof(struct tcptemp)); + //if (t == NULL) + // return (NULL); + tcpip_fillheaders(tp, (void *)&t->tt_ipgen, (void *)&t->tt_t); + //return (t); +} + +/* + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. + * tcp_template used to store this data in mbufs, but we now recopy it out + * of the tcpcb each time to conserve mbufs. + */ + // NOTE: HAS A DIFFERENT SIGNATURE FROM THE ORIGINAL FUNCTION IN tcp_subr.c +void +tcpip_fillheaders(struct tcpcb* tp, void *ip_ptr, void *tcp_ptr) +{ + struct ip6_hdr* ip6 = (struct ip6_hdr*) ip_ptr; + struct tcphdr *th = (struct tcphdr *)tcp_ptr; + +// INP_WLOCK_ASSERT(inp); + +/* I fill in the IP header elsewhere. In send_message in BsdTcpP.nc, to be exact. */ +#if 0 +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) { + struct ip6_hdr *ip6; + + ip6 = (struct ip6_hdr *)ip_ptr; + ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | + (inp->inp_flow & IPV6_FLOWINFO_MASK); + ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | + (IPV6_VERSION & IPV6_VERSION_MASK); + ip6->ip6_nxt = IPPROTO_TCP; + ip6->ip6_plen = htons(sizeof(struct tcphdr)); + ip6->ip6_src = inp->in6p_laddr; + ip6->ip6_dst = inp->in6p_faddr; + } +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + { + struct ip *ip; + + ip = (struct ip *)ip_ptr; + ip->ip_v = IPVERSION; + ip->ip_hl = 5; + ip->ip_tos = inp->inp_ip_tos; + ip->ip_len = 0; + ip->ip_id = 0; + ip->ip_off = 0; + ip->ip_ttl = inp->inp_ip_ttl; + ip->ip_sum = 0; + ip->ip_p = IPPROTO_TCP; + ip->ip_src = inp->inp_laddr; + ip->ip_dst = inp->inp_faddr; + } +#endif /* INET */ +#endif + /* Fill in the IP header */ + // The source address is copied here in send_message. + ip6->ip6_vfc = 0x60; + memset(&ip6->ip6_src, 0x00, sizeof(ip6->ip6_src)); + ip6->ip6_dst = tp->faddr; + /* Fill in the TCP header */ + //th->th_sport = inp->inp_lport; + //th->th_dport = inp->inp_fport; + th->th_sport = tp->lport; + th->th_dport = tp->fport; + th->th_seq = 0; + th->th_ack = 0; + th->th_x2 = 0; + th->th_off = 5; + th->th_flags = 0; + th->th_win = 0; + th->th_urp = 0; + th->th_sum = 0; /* in_pseudo() is called later for ipv4 */ +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == NULL, then we make a copy + * of the tcpiphdr at th and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection. If flags are given then we send + * a message back to the TCP which originated the segment th, + * and discard the mbuf containing it and any other attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + * + * NOTE: If m != NULL, then th must point to *inside* the mbuf. + */ +/* Original signature was +void +tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, + tcp_seq ack, tcp_seq seq, int flags) +*/ +void +tcp_respond(struct tcpcb *tp, struct ip6_hdr* ip6gen, struct tcphdr *thgen, + tcp_seq ack, tcp_seq seq, int flags) +{ + /* Again, I rewrote this function for the RIOT port of the code. */ + gnrc_pktsnip_t* tcpsnip = gnrc_pktbuf_add(NULL, NULL, sizeof(struct tcphdr), GNRC_NETTYPE_TCP); + if (tcpsnip == NULL) { + return; // drop the message; + } + gnrc_pktsnip_t* ip6snip = gnrc_pktbuf_add(tcpsnip, NULL, sizeof(struct ip6_hdr), GNRC_NETTYPE_IPV6); + if (ip6snip == NULL) { + return; // drop the message; + } + struct tcphdr* nth = tcpsnip->data; + struct ip6_hdr* ip6 = ip6snip->data; + int win = 0; + if (tp != NULL) { + if (!(flags & TH_RST)) { + win = cbuf_free_space(&tp->recvbuf); + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + } + } + ip6->ip6_vfc = 0x60; + ip6->ip6_nxt = IANA_TCP; + ip6->ip6_plen = htons(sizeof(struct tcphdr)); + ip6->ip6_src = ip6gen->ip6_dst; + ip6->ip6_dst = ip6gen->ip6_src; + nth->th_sport = thgen->th_dport; + nth->th_dport = thgen->th_sport; + nth->th_seq = htonl(seq); + nth->th_ack = htonl(ack); + nth->th_x2 = 0; + nth->th_off = sizeof(struct tcphdr) >> 2; + nth->th_flags = flags; + if (tp != NULL) + nth->th_win = htons((u_short) (win >> tp->rcv_scale)); + else + nth->th_win = htons((u_short)win); + nth->th_urp = 0; + + send_message(ip6snip); +#if 0 + /* Essentially all the code needs to be discarded because I need to send packets the TinyOS way. + There are some parts that I copied; I didn't want to comment out everything except the few + lines I needed since I felt that this would be cleaner. */ + struct ip6_packet* msg; + struct ip6_hdr* ip6; + struct tcphdr* nth; + struct ip_iovec* iov; + int alen = sizeof(struct ip6_packet) + sizeof(struct tcphdr) + sizeof(struct ip_iovec); + char* bufreal = ip_malloc(alen + 3); + int win = 0; + char* buf; + if (bufreal == NULL) { + return; // drop the message + } + if (tp != NULL) { + if (!(flags & TH_RST)) { + win = cbuf_free_space(&tp->recvbuf); + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + } + } + buf = (char*) (((uint32_t) (bufreal + 3)) & 0xFFFFFFFCu); + memset(buf, 0, alen); // for safe measure + msg = (struct ip6_packet*) buf; + iov = (struct ip_iovec*) (buf + alen - sizeof(struct ip_iovec)); + iov->iov_next = NULL; + iov->iov_len = sizeof(struct tcphdr); + iov->iov_base = (void*) (msg + 1); + msg->ip6_data = iov; + ip6 = &msg->ip6_hdr; + ip6->ip6_nxt = IANA_TCP; + ip6->ip6_plen = htons(sizeof(struct tcphdr)); + ip6->ip6_src = ip6gen->ip6_dst; + ip6->ip6_dst = ip6gen->ip6_src; + nth = (struct tcphdr*) (ip6 + 1); + nth->th_sport = thgen->th_dport; + nth->th_dport = thgen->th_sport; + nth->th_seq = htonl(seq); + nth->th_ack = htonl(ack); + nth->th_x2 = 0; + nth->th_off = sizeof (struct tcphdr) >> 2; + nth->th_flags = flags; + if (tp != NULL) + nth->th_win = htons((u_short) (win >> tp->rcv_scale)); + else + nth->th_win = htons((u_short)win); + nth->th_urp = 0; + send_message(tp, msg, nth, sizeof(struct tcphdr)); + ip_free(bufreal); +#endif +#if 0 + int tlen; + int win = 0; + struct ip *ip; + struct tcphdr *nth; +#ifdef INET6 + struct ip6_hdr *ip6; + int isipv6; +#endif /* INET6 */ + int ipflags = 0; + struct inpcb *inp; +#if 0 + KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); + +#ifdef INET6 + isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4); + ip6 = ipgen; +#endif /* INET6 */ + ip = ipgen; +#endif + + if (tp != NULL) { + inp = tp->t_inpcb; + KASSERT(inp != NULL, ("tcp control block w/o inpcb")); + INP_WLOCK_ASSERT(inp); + } else + inp = NULL; + + if (tp != NULL) { + if (!(flags & TH_RST)) { + win = sbspace(&inp->inp_socket->so_rcv); + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + } + } + if (m == NULL) { + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) + return; + tlen = 0; + m->m_data += max_linkhdr; +#ifdef INET6 + if (isipv6) { + bcopy((caddr_t)ip6, mtod(m, caddr_t), + sizeof(struct ip6_hdr)); + ip6 = mtod(m, struct ip6_hdr *); + nth = (struct tcphdr *)(ip6 + 1); + } else +#endif /* INET6 */ + { + bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); + ip = mtod(m, struct ip *); + nth = (struct tcphdr *)(ip + 1); + } + bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); + flags = TH_ACK; + } else { + /* + * reuse the mbuf. + * XXX MRT We inherrit the FIB, which is lucky. + */ + m_freem(m->m_next); + m->m_next = NULL; + m->m_data = (caddr_t)ipgen; + /* m_len is set later */ + tlen = 0; +#define xchg(a,b,type) { type t; t=a; a=b; b=t; } +#ifdef INET6 + if (isipv6) { + xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); + nth = (struct tcphdr *)(ip6 + 1); + } else +#endif /* INET6 */ + { + xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); + nth = (struct tcphdr *)(ip + 1); + } + if (th != nth) { + /* + * this is usually a case when an extension header + * exists between the IPv6 header and the + * TCP header. + */ + nth->th_sport = th->th_sport; + nth->th_dport = th->th_dport; + } + xchg(nth->th_dport, nth->th_sport, uint16_t); +#undef xchg + } +#ifdef INET6 + if (isipv6) { + ip6->ip6_flow = 0; + ip6->ip6_vfc = IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_TCP; + tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); + ip6->ip6_plen = htons(tlen - sizeof(*ip6)); + } +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + { + tlen += sizeof (struct tcpiphdr); + ip->ip_len = htons(tlen); + ip->ip_ttl = V_ip_defttl; + if (V_path_mtu_discovery) + ip->ip_off |= htons(IP_DF); + } +#endif + m->m_len = tlen; + m->m_pkthdr.len = tlen; + m->m_pkthdr.rcvif = NULL; +#ifdef MAC + if (inp != NULL) { + /* + * Packet is associated with a socket, so allow the + * label of the response to reflect the socket label. + */ + INP_WLOCK_ASSERT(inp); + mac_inpcb_create_mbuf(inp, m); + } else { + /* + * Packet is not associated with a socket, so possibly + * update the label in place. + */ + mac_netinet_tcp_reply(m); + } +#endif + nth->th_seq = htonl(seq); + nth->th_ack = htonl(ack); + nth->th_x2 = 0; + nth->th_off = sizeof (struct tcphdr) >> 2; + nth->th_flags = flags; + if (tp != NULL) + nth->th_win = htons((u_short) (win >> tp->rcv_scale)); + else + nth->th_win = htons((u_short)win); + nth->th_urp = 0; + + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); +//#ifdef INET6 +// if (isipv6) { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + nth->th_sum = in6_cksum_pseudo(ip6, + tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); + ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : + NULL, NULL); +// } +//#endif /* INET6 */ +#if 0 +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + { + m->m_pkthdr.csum_flags = CSUM_TCP; + nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); + } +#endif /* INET */ +#ifdef TCPDEBUG + if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); +#endif + TCP_PROBE3(debug__input, tp, th, mtod(m, const char *)); + if (flags & TH_RST) + TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *), + tp, nth); +#endif +// TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth); +//#ifdef INET6 + if (isipv6) + (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp); +//#endif /* INET6 */ +#if 0 +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + (void) ip_output(m, NULL, NULL, ipflags, NULL, inp); +#endif +#endif +#endif +} + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +/* Sam: I changed the parameter "errno" to "errnum" since it caused + * problems during compilation. + */ +struct tcpcb * +tcp_drop(struct tcpcb *tp, int errnum) +{ +// struct socket *so = tp->t_inpcb->inp_socket; + +// INP_INFO_LOCK_ASSERT(&V_tcbinfo); +// INP_WLOCK_ASSERT(tp->t_inpcb); + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tcp_state_change(tp, TCPS_CLOSED); + (void) tcp_output(tp); +// TCPSTAT_INC(tcps_drops); + }// else +// TCPSTAT_INC(tcps_conndrops); + if (errnum == ETIMEDOUT && tp->t_softerror) + errnum = tp->t_softerror; +// so->so_error = errnum; +// return (tcp_close(tp)); + tp = tcp_close(tp); + connection_lost(tp, errnum); + return tp; +} + +/* + * Look-up the routing entry to the peer of this inpcb. If no route + * is found and it cannot be allocated, then return 0. This routine + * is called by TCP routines that access the rmx structure and by + * tcp_mss_update to get the peer/interface MTU. + */ +u_long +tcp_maxmtu6(/*struct in_conninfo *inc,*/struct tcpcb* tp, struct tcp_ifcap *cap) +{ + u_long maxmtu = 0; + + KASSERT (tp != NULL, ("tcp_maxmtu6 with NULL tcpcb pointer")); + if (!IN6_IS_ADDR_UNSPECIFIED(&tp->faddr)) { + maxmtu = FRAMES_PER_SEG * FRAMECAP_6LOWPAN; + } + + return (maxmtu); + +#if 0 // I rewrote this function above + struct route_in6 sro6; + struct ifnet *ifp; + u_long maxmtu = 0; + + KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); + + bzero(&sro6, sizeof(sro6)); + if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { + sro6.ro_dst.sin6_family = AF_INET6; + sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); + sro6.ro_dst.sin6_addr = inc->inc6_faddr; + in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum); + } + if (sro6.ro_rt != NULL) { + ifp = sro6.ro_rt->rt_ifp; + if (sro6.ro_rt->rt_mtu == 0) + maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp); + else + maxmtu = min(sro6.ro_rt->rt_mtu, + IN6_LINKMTU(sro6.ro_rt->rt_ifp)); + + /* Report additional interface capabilities. */ + if (cap != NULL) { + if (ifp->if_capenable & IFCAP_TSO6 && + ifp->if_hwassist & CSUM_TSO) { + cap->ifcap |= CSUM_TSO; + cap->tsomax = ifp->if_hw_tsomax; + cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; + cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; + } + } + RTFREE(sro6.ro_rt); + } + + return (maxmtu); +#endif +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.c new file mode 100644 index 000000000000..ae14b4ce8e21 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.c @@ -0,0 +1,722 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#include +#include + +#include "../gnrc_tcp_freebsd_internal.h" +#include "../lib/lbuf.h" +#include "tcp_fsm.h" +#include "tcp_timer.h" +#include "tcp_var.h" + +#include "tcp_const.h" + +#if 0 +int V_tcp_pmtud_blackhole_detect = 0; +int V_tcp_pmtud_blackhole_failed = 0; +int V_tcp_pmtud_blackhole_activated = 0; +int V_tcp_pmtud_blackhole_activated_min_mss = 0; +#endif + +/* + * TCP timer processing. + */ + +void +tcp_timer_delack(/*void *xtp*/struct tcpcb* tp) +{ + KASSERT(tpistimeractive(tp, TT_DELACK), ("Delack timer running, but unmarked\n")); + tpcleartimeractive(tp, TT_DELACK); +#if 0 + struct tcpcb *tp = xtp; + struct inpcb *inp; + CURVNET_SET(tp->t_vnet); + + inp = tp->t_inpcb; + KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); + INP_WLOCK(inp); + if (callout_pending(&tp->t_timers->tt_delack) || + !callout_active(&tp->t_timers->tt_delack)) { + INP_WUNLOCK(inp); + CURVNET_RESTORE(); + return; + } + callout_deactivate(&tp->t_timers->tt_delack); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + CURVNET_RESTORE(); + return; + } + KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0, + ("%s: tp %p delack callout should be running", __func__, tp)); +#endif + tp->t_flags |= TF_ACKNOW; +// TCPSTAT_INC(tcps_delack); + (void) tcp_output(tp); +// INP_WUNLOCK(inp); +// CURVNET_RESTORE(); +} + +void +tcp_timer_keep(struct tcpcb* tp) +{ + uint32_t ticks = get_ticks(); + /*struct tcptemp *t_template;*/ + struct tcptemp t_template; + KASSERT(tpistimeractive(tp, TT_KEEP), ("Keep timer running, but unmarked\n")); + tpcleartimeractive(tp, TT_KEEP); // for our own internal bookkeeping +#if 0 // I already cancel this invocation if it was rescheduled meanwhile + struct inpcb *inp; + CURVNET_SET(tp->t_vnet); +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + INP_INFO_RLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); + INP_WLOCK(inp); + if (callout_pending(&tp->t_timers->tt_keep) || + !callout_active(&tp->t_timers->tt_keep)) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + callout_deactivate(&tp->t_timers->tt_keep); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0, + ("%s: tp %p keep callout should be running", __func__, tp)); +#endif + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ +// TCPSTAT_INC(tcps_keeptimeo); + if (tp->t_state < TCPS_ESTABLISHED) + goto dropit; + if ((always_keepalive/* || inp->inp_socket->so_options & SO_KEEPALIVE*/) && + tp->t_state <= TCPS_CLOSING) { + if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ +// TCPSTAT_INC(tcps_keepprobe); + tcpip_maketemplate(/*inp*/tp, &t_template); + //if (t_template) { + tcp_respond(tp, (struct ip6_hdr*) t_template.tt_ipgen, + &t_template.tt_t,/* (struct mbuf *)NULL,*/ + tp->rcv_nxt, tp->snd_una - 1, 0); + //free(t_template, M_TEMP); + //ip_free(t_template); + //} +#if 0 + if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), + tcp_timer_keep, tp)) { + tp->t_timers->tt_flags &= ~TT_KEEP_RST; + } +#endif + set_timer(tp, TOS_KEEP, TP_KEEPINTVL(tp)); + } else /*if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), + tcp_timer_keep, tp)) { + tp->t_timers->tt_flags &= ~TT_KEEP_RST; + }*/ + { + set_timer(tp, TOS_KEEP, TP_KEEPIDLE(tp)); + } +#if 0 +#ifdef TCPDEBUG + if (inp->inp_socket->so_options & SO_DEBUG) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, + PRU_SLOWTIMO); +#endif + TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); +#endif + return; + +dropit: +// TCPSTAT_INC(tcps_keepdrops); + tp = tcp_drop(tp, ETIMEDOUT); +#if 0 +#ifdef TCPDEBUG + if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, + PRU_SLOWTIMO); +#endif + TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + if (tp != NULL) + INP_WUNLOCK(tp->t_inpcb); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); +#endif +} + +void +tcp_timer_persist(struct tcpcb* tp) +{ + uint32_t ticks = get_ticks(); + KASSERT(tpistimeractive(tp, TT_PERSIST), ("Persist timer running, but unmarked\n")); + tpcleartimeractive(tp, TT_PERSIST); // mark that this timer is no longer active +#if 0 // I already cancel if a timer was scheduled meanwhile + struct inpcb *inp; + CURVNET_SET(tp->t_vnet); +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + INP_INFO_RLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); + INP_WLOCK(inp); + if (callout_pending(&tp->t_timers->tt_persist) || + !callout_active(&tp->t_timers->tt_persist)) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + callout_deactivate(&tp->t_timers->tt_persist); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0, + ("%s: tp %p persist callout should be running", __func__, tp)); +#endif + /* + * Persistance timer into zero window. + * Force a byte to be output, if possible. + */ +// TCPSTAT_INC(tcps_persisttimeo); + /* + * Hack: if the peer is dead/unreachable, we do not + * time out if the window is closed. After a full + * backoff, drop the connection if the idle time + * (no responses to probes) reaches the maximum + * backoff that we would use if retransmitting. + */ + + if (tp->t_rxtshift == TCP_MAXRXTSHIFT && + (ticks - tp->t_rcvtime >= tcp_maxpersistidle || + ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { +// TCPSTAT_INC(tcps_persistdrop); + tp = tcp_drop(tp, ETIMEDOUT); + goto out; + } + + /* + * If the user has closed the socket then drop a persisting + * connection after a much reduced timeout. + */ + if (tp->t_state > TCPS_CLOSE_WAIT && + (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { +// TCPSTAT_INC(tcps_persistdrop); + tp = tcp_drop(tp, ETIMEDOUT); + goto out; + } + + tcp_setpersist(tp); + tp->t_flags |= TF_FORCEDATA; + printf("Persist output: %lu bytes in sendbuf\n", lbuf_used_space(&tp->sendbuf)); + (void) tcp_output(tp); + tp->t_flags &= ~TF_FORCEDATA; + +out: +#if 0 +#ifdef TCPDEBUG + if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) + tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); +#endif + TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + if (tp != NULL) + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); +#endif + return; +} + +void +tcp_timer_2msl(struct tcpcb* tp) +{ + uint32_t ticks = get_ticks(); + KASSERT(tpistimeractive(tp, TT_2MSL), ("2MSL timer running, but unmarked\n")); + tpcleartimeractive(tp, TT_2MSL); // for our own bookkeeping +#if 0 + struct inpcb *inp; + CURVNET_SET(tp->t_vnet); +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif + INP_INFO_RLOCK(&V_tcbinfo); + inp = tp->t_inpcb; + KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); + INP_WLOCK(inp); + tcp_free_sackholes(tp); + if (callout_pending(&tp->t_timers->tt_2msl) || + !callout_active(&tp->t_timers->tt_2msl)) { + INP_WUNLOCK(tp->t_inpcb); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + callout_deactivate(&tp->t_timers->tt_2msl); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0, + ("%s: tp %p 2msl callout should be running", __func__, tp)); +#endif + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long delete connection control block. Otherwise, check + * again in a bit. + * + * If in TIME_WAIT state just ignore as this timeout is handled in + * tcp_tw_2msl_scan(). (Sam: not anymore) + * + * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, + * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. + * Ignore fact that there were recent incoming segments. + */ +#if 0 + if ((inp->inp_flags & INP_TIMEWAIT) != 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } +#endif + if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2/* && + tp->t_inpcb && tp->t_inpcb->inp_socket && + (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)*/) { +// TCPSTAT_INC(tcps_finwait2_drops); + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + } else if (tp->t_state == TCP6S_TIME_WAIT) { // Added by Sam + /* Normally, this timer isn't used for sockets in the Time-wait state; instead the + tcp_tw_2msl_scan method is called periodically on the slow timer, and expired + tcbtw structs are closed and freed. + + Instead, I keep the socket around, so I just use this timer to do it. */ + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + } else { + if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { + /* + if (!callout_reset(&tp->t_timers->tt_2msl, + TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) { + tp->t_timers->tt_flags &= ~TT_2MSL_RST; + } + */ + set_timer(tp, TOS_2MSL, TP_KEEPINTVL(tp)); + } else { + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + } + } +#if 0 +#ifdef TCPDEBUG + if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, + PRU_SLOWTIMO); +#endif + TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + + if (tp != NULL) + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); +#endif +} + +void +tcp_timer_rexmt(struct tcpcb *tp) +{ +// CURVNET_SET(tp->t_vnet); + int rexmt; + //int headlocked; + uint32_t ticks = get_ticks(); + KASSERT(tpistimeractive(tp, TT_REXMT), ("Rexmt timer running, but unmarked\n")); + tpcleartimeractive(tp, TT_REXMT); // for our own bookkeeping of active timers +// struct inpcb *inp; +#if 0 +#ifdef TCPDEBUG + int ostate; + + ostate = tp->t_state; +#endif +#endif +// INP_INFO_RLOCK(&V_tcbinfo); +// inp = tp->t_inpcb; +// KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); +// INP_WLOCK(inp); +#if 0 // I already handle this edge case in the Timer.fired function in BsdTcpP.nc + if (callout_pending(&tp->t_timers->tt_rexmt) || + !callout_active(&tp->t_timers->tt_rexmt)) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } + callout_deactivate(&tp->t_timers->tt_rexmt); + if ((inp->inp_flags & INP_DROPPED) != 0) { + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + return; + } +#endif +// KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, +// ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); +// KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0, +// ("%s: tp %p rexmt callout should be running", __func__, tp)); +// tcp_free_sackholes(tp); + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + tp->t_rxtshift = TCP_MAXRXTSHIFT; +// TCPSTAT_INC(tcps_timeoutdrop); + + tp = tcp_drop(tp, tp->t_softerror ? + tp->t_softerror : ETIMEDOUT); + //headlocked = 1; + goto out; + } +// INP_INFO_RUNLOCK(&V_tcbinfo); + //headlocked = 0; + if (tp->t_state == TCPS_SYN_SENT) { + /* + * If the SYN was retransmitted, indicate CWND to be + * limited to 1 segment in cc_conn_init(). + */ + tp->snd_cwnd = 1; + } else if (tp->t_rxtshift == 1) { + /* + * first retransmit; record ssthresh and cwnd so they can + * be recovered if this turns out to be a "bad" retransmit. + * A retransmit is considered "bad" if an ACK for this + * segment is received within RTT/2 interval; the assumption + * here is that the ACK was already in flight. See + * "On Estimating End-to-End Network Path Properties" by + * Allman and Paxson for more details. + */ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->snd_recover_prev = tp->snd_recover; + if (IN_FASTRECOVERY(tp->t_flags)) + tp->t_flags |= TF_WASFRECOVERY; + else + tp->t_flags &= ~TF_WASFRECOVERY; + if (IN_CONGRECOVERY(tp->t_flags)) + tp->t_flags |= TF_WASCRECOVERY; + else + tp->t_flags &= ~TF_WASCRECOVERY; + tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + tp->t_flags |= TF_PREVVALID; + } else + tp->t_flags &= ~TF_PREVVALID; +// TCPSTAT_INC(tcps_rexmttimeo); + if (tp->t_state == TCPS_SYN_SENT) + rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; + else + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + tp->t_rttmin, TCPTV_REXMTMAX); + +# if 0 // DON'T ATTEMPT BLACKHOLE DETECTION. OUR MTU SHOULD BE SMALL ENOUGH THAT ANY ROUTER CAN ROUTE IT + /* + * We enter the path for PLMTUD if connection is established or, if + * connection is FIN_WAIT_1 status, reason for the last is that if + * amount of data we send is very small, we could send it in couple of + * packets and process straight to FIN. In that case we won't catch + * ESTABLISHED state. + */ + if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED)) + || (tp->t_state == TCPS_FIN_WAIT_1))) { + int optlen; +//#ifdef INET6 + int isipv6; +//#endif + + /* + * Idea here is that at each stage of mtu probe (usually, 1448 + * -> 1188 -> 524) should be given 2 chances to recover before + * further clamping down. 'tp->t_rxtshift % 2 == 0' should + * take care of that. + */ + if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) == + (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) && + (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) { + /* + * Enter Path MTU Black-hole Detection mechanism: + * - Disable Path MTU Discovery (IP "DF" bit). + * - Reduce MTU to lower value than what we + * negotiated with peer. + */ + /* Record that we may have found a black hole. */ + tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; + + /* Keep track of previous MSS. */ + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_pmtud_saved_maxopd = tp->t_maxopd; + + /* + * Reduce the MSS to blackhole value or to the default + * in an attempt to retransmit. + */ +//#ifdef INET6 + //isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; + isipv6 = 1; + if (isipv6 && + tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) { + /* Use the sysctl tuneable blackhole MSS. */ + tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss; + V_tcp_pmtud_blackhole_activated++; + } else if (isipv6) { + /* Use the default MSS. */ + tp->t_maxopd = V_tcp_v6mssdflt; + /* + * Disable Path MTU Discovery when we switch to + * minmss. + */ + tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; + V_tcp_pmtud_blackhole_activated_min_mss++; + } +//#endif +#if 0 +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) { + /* Use the sysctl tuneable blackhole MSS. */ + tp->t_maxopd = V_tcp_pmtud_blackhole_mss; + V_tcp_pmtud_blackhole_activated++; + } else { + /* Use the default MSS. */ + tp->t_maxopd = V_tcp_mssdflt; + /* + * Disable Path MTU Discovery when we switch to + * minmss. + */ + tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; + V_tcp_pmtud_blackhole_activated_min_mss++; + } +#endif +#endif + tp->t_maxseg = tp->t_maxopd - optlen; + /* + * Reset the slow-start flight size + * as it may depend on the new MSS. + */ + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); + } else { + /* + * If further retransmissions are still unsuccessful + * with a lowered MTU, maybe this isn't a blackhole and + * we restore the previous MSS and blackhole detection + * flags. + * The limit '6' is determined by giving each probe + * stage (1448, 1188, 524) 2 chances to recover. + */ + if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && + (tp->t_rxtshift > 6)) { + tp->t_flags2 |= TF2_PLPMTU_PMTUD; + tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_maxopd = tp->t_pmtud_saved_maxopd; + tp->t_maxseg = tp->t_maxopd - optlen; + V_tcp_pmtud_blackhole_failed++; + /* + * Reset the slow-start flight size as it + * may depend on the new MSS. + */ + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); + } + } + } +#endif + + /* + * Disable RFC1323 and SACK if we haven't got any response to + * our third SYN to work-around some broken terminal servers + * (most of which have hopefully been retired) that have bad VJ + * header compression code which trashes TCP segments containing + * unknown-to-them TCP options. + */ + if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && + (tp->t_rxtshift == 3)) + tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); + /* + * If we backed off this far, our srtt estimate is probably bogus. + * Clobber it so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { +//#ifdef INET6 +// if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) +// in6_losing(tp->t_inpcb); +//#endif + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; + tp->snd_recover = tp->snd_max; + /* + * Force a segment to be sent. + */ + tp->t_flags |= TF_ACKNOW; + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtttime = 0; + + cc_cong_signal(tp, NULL, CC_RTO); + + (void) tcp_output(tp); + +out: +#if 0 +#ifdef TCPDEBUG + if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, + PRU_SLOWTIMO); +#endif + TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); + if (tp != NULL) + INP_WUNLOCK(inp); + if (headlocked) + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); +#endif + return; +} + +int +tcp_timer_active(struct tcpcb *tp, uint32_t timer_type) +{ + return tpistimeractive(tp, timer_type); +} + +void +tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta) { + uint8_t tos_timer; + switch (timer_type) { + case TT_DELACK: + tos_timer = TOS_DELACK; + break; + case TT_REXMT: + tos_timer = TOS_REXMT; + break; + case TT_PERSIST: + tos_timer = TOS_PERSIST; + break; + case TT_KEEP: + tos_timer = TOS_KEEP; + break; + case TT_2MSL: + tos_timer = TOS_2MSL; + break; + default: + printf("Invalid timer 0x%lx: skipping\n", timer_type); + return; + } + if (delta) { + tpmarktimeractive(tp, timer_type); + if (tpistimeractive(tp, TT_REXMT) && tpistimeractive(tp, TT_PERSIST)) { + char* msg = "TCP CRITICAL FAILURE: Retransmit and Persist timers are simultaneously running!\n"; + printf("%s\n", msg); + } + set_timer(tp, tos_timer, (uint32_t) delta); + } else { + tpcleartimeractive(tp, timer_type); + stop_timer(tp, tos_timer); + } +} + +void +tcp_cancel_timers(struct tcpcb* tp) { + tpcleartimeractive(tp, TOS_DELACK); + stop_timer(tp, TOS_DELACK); + tpcleartimeractive(tp, TOS_REXMT); + stop_timer(tp, TOS_REXMT); + tpcleartimeractive(tp, TOS_PERSIST); + stop_timer(tp, TOS_PERSIST); + tpcleartimeractive(tp, TOS_KEEP); + stop_timer(tp, TOS_KEEP); + tpcleartimeractive(tp, TOS_2MSL); + stop_timer(tp, TOS_2MSL); +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.h new file mode 100644 index 000000000000..8c1cdd2ca77a --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timer.h @@ -0,0 +1,249 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_TIMER_H_ +#define _NETINET_TCP_TIMER_H_ + +#include "tcp_var.h" + +/* + * The TCPT_REXMT timer is used to force retransmissions. + * The TCP has the TCPT_REXMT timer set whenever segments + * have been sent for which ACKs are expected but not yet + * received. If an ACK is received which advances tp->snd_una, + * then the retransmit timer is cleared (if there are no more + * outstanding segments) or reset to the base value (if there + * are more ACKs expected). Whenever the retransmit timer goes off, + * we retransmit one unacknowledged segment, and do a backoff + * on the retransmit timer. + * + * The TCPT_PERSIST timer is used to keep window size information + * flowing even if the window goes shut. If all previous transmissions + * have been acknowledged (so that there are no retransmissions in progress), + * and the window is too small to bother sending anything, then we start + * the TCPT_PERSIST timer. When it expires, if the window is nonzero, + * we go to transmit state. Otherwise, at intervals send a single byte + * into the peer's window to force him to update our window information. + * We do this at most as often as TCPT_PERSMIN time intervals, + * but no more frequently than the current estimate of round-trip + * packet time. The TCPT_PERSIST timer is cleared whenever we receive + * a window update from the peer. + * + * The TCPT_KEEP timer is used to keep connections alive. If an + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, + * but not yet established, then we drop the connection. Once the connection + * is established, if the connection is idle for TCPTV_KEEP_IDLE time + * (and keepalives have been enabled on the socket), we begin to probe + * the connection. We force the peer to send us a segment by sending: + * + * This segment is (deliberately) outside the window, and should elicit + * an ack segment in response from the peer. If, despite the TCPT_KEEP + * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE + * amount of time probing, then we drop the connection. + */ + +#define TT_DELACK 0x0001 +#define TT_REXMT 0x0002 +#define TT_PERSIST 0x0004 +#define TT_KEEP 0x0008 +#define TT_2MSL 0x0010 + +// To interface with TinyOS, each timer must take up only 2 bits +#define TOS_DELACK 0x0 +#define TOS_REXMT 0x1 +#define TOS_PERSIST 0x1 // The same timer is used for Persist and Retransmit, since both can't be running simultaneously +#define TOS_KEEP 0x2 +#define TOS_2MSL 0x3 + +/* + * Time constants. + */ +#define TCPTV_MSL ( 30*hz) /* max seg lifetime (hah!) */ +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */ + +#define TCPTV_PERSMIN ( 5*hz) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*hz) /* initial connect keepalive */ +#define TCPTV_KEEP_IDLE (120*60*hz) /* dflt time before probing */ +#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */ + +/* + * Minimum retransmit timer is 3 ticks, for algorithmic stability. + * TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with + * the expected worst-case processing variances by the kernels + * representing the end points. Such variances do not always show + * up in the srtt because the timestamp is often calculated at + * the interface rather then at the TCP layer. This value is + * typically 50ms. However, it is also possible that delayed + * acks (typically 100ms) could create issues so we set the slop + * to 200ms to try to cover it. Note that, properly speaking, + * delayed-acks should not create a major issue for interactive + * environments which 'P'ush the last segment, at least as + * long as implementations do the required 'at least one ack + * for every two packets' for the non-interactive streaming case. + * (maybe the RTO calculation should use 2*RTT instead of RTT + * to handle the ack-every-other-packet case). + * + * The prior minimum of 1*hz (1 second) badly breaks throughput on any + * networks faster then a modem that has minor (e.g. 1%) packet loss. + */ +#define TCPTV_MIN ( hz/33 ) /* minimum allowable value */ +#define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */ +#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */ + +#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ + +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ + +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ + +#define TCPTV_DELACK ( hz/10 ) /* 100ms timeout */ + +#ifdef TCPTIMERS +static const char *tcptimers[] = + { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" }; +#endif + +int tcp_timer_active(struct tcpcb *tp, uint32_t timer_type); +void tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta); +void tcp_cancel_timers(struct tcpcb* tp); + +/* I moved the definition of TCPT_RANGESET to tcp_const.h. */ + +static const int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; + +static const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; + +static const int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ + +void tcp_timer_delack(struct tcpcb* tp); +void tcp_timer_keep(struct tcpcb* tp); +void tcp_timer_persist(struct tcpcb* tp); +void tcp_timer_2msl(struct tcpcb* tp); +void tcp_timer_rexmt(struct tcpcb *tp); +int tcp_timer_active(struct tcpcb *tp, uint32_t timer_type); + +/* Copied from below, with modifications. */ +#define TP_KEEPINIT(tp) (/*(tp)->t_keepinit ? (tp)->t_keepinit :*/ tcp_keepinit) +#define TP_KEEPIDLE(tp) (/*(tp)->t_keepidle ? (tp)->t_keepidle :*/ tcp_keepidle) +#define TP_KEEPINTVL(tp) (/*(tp)->t_keepintvl ? (tp)->t_keepintvl :*/ tcp_keepintvl) +#define TP_KEEPCNT(tp) (/*(tp)->t_keepcnt ? (tp)->t_keepcnt :*/ tcp_keepcnt) +#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp)) + +//extern int tcp_keepcnt; /* number of keepalives */ + +// MOVED NECESSARY EXTERN DECLARATIONS TO TCP_SUBR.C +#if 0 // I'M IMPLEMENTING TIMERS MY OWN WAY IN TINYOS, SO I DON'T NEED THIS + +#ifdef _KERNEL + +struct xtcp_timer; + +struct tcp_timer { + struct callout tt_rexmt; /* retransmit timer */ + struct callout tt_persist; /* retransmit persistence */ + struct callout tt_keep; /* keepalive */ + struct callout tt_2msl; /* 2*msl TIME_WAIT timer */ + struct callout tt_delack; /* delayed ACK timer */ + uint32_t tt_flags; /* Timers flags */ + uint32_t tt_spare; /* TDB */ +}; + +/* + * Flags for the tt_flags field. + */ +#define TT_DELACK 0x0001 +#define TT_REXMT 0x0002 +#define TT_PERSIST 0x0004 +#define TT_KEEP 0x0008 +#define TT_2MSL 0x0010 +#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL) + +#define TT_DELACK_RST 0x0100 +#define TT_REXMT_RST 0x0200 +#define TT_PERSIST_RST 0x0400 +#define TT_KEEP_RST 0x0800 +#define TT_2MSL_RST 0x1000 + +#define TT_STOPPED 0x00010000 + +#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit) +#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle) +#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl) +#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt) +#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp)) + +extern int tcp_keepinit; /* time to establish connection */ +extern int tcp_keepidle; /* time before keepalive probes begin */ +extern int tcp_keepintvl; /* time between keepalive probes */ +extern int tcp_keepcnt; /* number of keepalives */ +extern int tcp_delacktime; /* time before sending a delayed ACK */ +extern int tcp_maxpersistidle; +extern int tcp_rexmit_min; +extern int tcp_rexmit_slop; +extern int tcp_msl; +extern int tcp_ttl; /* time to live for TCP segs */ +extern int tcp_backoff[]; +extern int tcp_syn_backoff[]; + +extern int tcp_finwait2_timeout; +extern int tcp_fast_finwait2_recycle; + +void tcp_timer_init(void); +void tcp_timer_2msl(void *xtp); +struct tcptw * + tcp_tw_2msl_scan(int reuse); /* XXX temporary? */ +void tcp_timer_keep(void *xtp); +void tcp_timer_persist(void *xtp); +void tcp_timer_rexmt(void *xtp); +void tcp_timer_delack(void *xtp); +void tcp_timer_2msl_discard(void *xtp); +void tcp_timer_keep_discard(void *xtp); +void tcp_timer_persist_discard(void *xtp); +void tcp_timer_rexmt_discard(void *xtp); +void tcp_timer_delack_discard(void *xtp); +void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, + struct xtcp_timer *xtimer); + +#endif /* _KERNEL */ + +#endif + +#endif /* !_NETINET_TCP_TIMER_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timewait.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timewait.c new file mode 100644 index 000000000000..c6c455fd13fb --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_timewait.c @@ -0,0 +1,503 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 + */ + +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "tcp_var.h" + +#include "tcp_const.h" +#include "net/gnrc/pktbuf.h" + +enum tcp_timewait_consts { + V_nolocaltimewait = 0 // For now, to keep things simple +}; + +static void +tcp_tw_2msl_reset(struct tcpcb* tp, int rearm) +{ + +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); +// INP_WLOCK_ASSERT(tw->tw_inpcb); + +// TW_WLOCK(V_tw_lock); +// if (rearm) +// TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl); +// /*tw*/tp->tw_time = get_ticks()/*ticks*/ + 2 * tcp_msl; + tcp_timer_activate(tp, TT_2MSL, 2 * tcp_msl); +// TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl); +// TW_WUNLOCK(V_tw_lock); +} + +static int +tcp_twrespond(struct tcpcb* tp, int flags) +{ + /* Essentially all the code needs to be discarded because I need to send packets the TinyOS way. + There are some parts that I copied; I didn't want to comment out everything except the few + lines I needed since I felt that this would be cleaner. + + Update: I just made inline updates for the RIOT OS version. */ + //struct ip6_packet* msg; + struct ip6_hdr* ip6; + struct tcphdr* nth; + //struct ip_iovec* iov; + struct tcpopt to; + u_int optlen = 0; + u_char opt[TCP_MAXOLEN]; + //int alen; + //char* bufreal; + int win = 0; + //char* buf; + + to.to_flags = 0; + + /* + * Send a timestamp and echo-reply if both our side and our peer + * have sent timestamps in our SYN's and this is not a RST. + */ + if (/*tw->t_recent*/(tp->t_flags & TF_RCVD_TSTMP) && flags == TH_ACK) { + to.to_flags |= TOF_TS; + to.to_tsval = tcp_ts_getticks() + /*tw->ts_offset*/tp->ts_offset; + to.to_tsecr = /*tw->t_recent*/tp->ts_recent; + } + optlen = tcp_addoptions(&to, opt); + + gnrc_pktsnip_t* tcpsnip = gnrc_pktbuf_add(NULL, NULL, sizeof(struct tcphdr) + optlen, GNRC_NETTYPE_TCP); + if (tcpsnip == NULL) { + return 0; // drop the message; + } + gnrc_pktsnip_t* ip6snip = gnrc_pktbuf_add(tcpsnip, NULL, sizeof(struct ip6_hdr), GNRC_NETTYPE_IPV6); + if (ip6snip == NULL) { + gnrc_pktbuf_release(tcpsnip); + return 0; // drop the message; + } + + nth = tcpsnip->data; + ip6 = ip6snip->data; + + #if 0 + alen = sizeof(struct ip6_packet) + sizeof(struct tcphdr) + optlen + sizeof(struct ip_iovec); + bufreal = ip_malloc(alen + 3); + if (bufreal == NULL) { + return 0; // drop the message + } + #endif + if (tp != NULL) { + if (!(flags & TH_RST)) { + win = cbuf_free_space(&tp->recvbuf); + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + } + } + #if 0 + buf = (char*) (((uint32_t) (bufreal + 3)) & 0xFFFFFFFCu); + memset(buf, 0, alen); // for safe measure + msg = (struct ip6_packet*) buf; + iov = (struct ip_iovec*) (buf + alen - sizeof(struct ip_iovec)); + iov->iov_next = NULL; + iov->iov_len = sizeof(struct tcphdr) + optlen; + iov->iov_base = (void*) (msg + 1); + msg->ip6_data = iov; + ip6 = &msg->ip6_hdr; + #endif + ip6->ip6_vfc = 0x60; + ip6->ip6_nxt = IANA_TCP; + ip6->ip6_plen = htons(sizeof(struct tcphdr) + optlen); + memset(&ip6->ip6_src, 0x00, sizeof(ip6->ip6_src)); + ip6->ip6_dst = tp->faddr; + nth->th_sport = tp->lport; + nth->th_dport = tp->fport; + nth->th_seq = htonl(tp->snd_nxt); + nth->th_ack = htonl(tp->rcv_nxt); + nth->th_x2 = 0; + nth->th_off = (sizeof(struct tcphdr) + optlen) >> 2; + nth->th_flags = flags; + nth->th_win = htons(tp->tw_last_win); + nth->th_urp = 0; + + memcpy(nth + 1, opt, optlen); + + send_message(ip6snip); + + return 0; +#if 0 +// struct inpcb *inp = tw->tw_inpcb; +#if defined(INET6) || defined(INET) + struct tcphdr *th = NULL; +#endif + struct mbuf *m; +#ifdef INET + struct ip *ip = NULL; +#endif + u_int hdrlen, optlen; + int error = 0; /* Keep compiler happy */ + struct tcpopt to; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; + int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; +#endif + hdrlen = 0; /* Keep compiler happy */ + + INP_WLOCK_ASSERT(inp); + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) + return (ENOBUFS); + m->m_data += max_linkhdr; + +#ifdef MAC + mac_inpcb_create_mbuf(inp, m); +#endif + +#ifdef INET6 + if (isipv6) { + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + ip6 = mtod(m, struct ip6_hdr *); + th = (struct tcphdr *)(ip6 + 1); + tcpip_fillheaders(inp, ip6, th); + } +#endif +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + { + hdrlen = sizeof(struct tcpiphdr); + ip = mtod(m, struct ip *); + th = (struct tcphdr *)(ip + 1); + tcpip_fillheaders(inp, ip, th); + } +#endif + to.to_flags = 0; + + /* + * Send a timestamp and echo-reply if both our side and our peer + * have sent timestamps in our SYN's and this is not a RST. + */ + if (tw->t_recent && flags == TH_ACK) { + to.to_flags |= TOF_TS; + to.to_tsval = tcp_ts_getticks() + tw->ts_offset; + to.to_tsecr = tw->t_recent; + } + optlen = tcp_addoptions(&to, (u_char *)(th + 1)); + + m->m_len = hdrlen + optlen; + m->m_pkthdr.len = m->m_len; + + KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small")); + + th->th_seq = htonl(tw->snd_nxt); + th->th_ack = htonl(tw->rcv_nxt); + th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; + th->th_flags = flags; + th->th_win = htons(tw->last_win); + + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); +#ifdef INET6 + if (isipv6) { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + th->th_sum = in6_cksum_pseudo(ip6, + sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); + ip6->ip6_hlim = in6_selecthlim(inp, NULL); + error = ip6_output(m, inp->in6p_outputopts, NULL, + (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); + } +#endif +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + { + m->m_pkthdr.csum_flags = CSUM_TCP; + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); + ip->ip_len = htons(m->m_pkthdr.len); + if (V_path_mtu_discovery) + ip->ip_off |= htons(IP_DF); + error = ip_output(m, inp->inp_options, NULL, + ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), + NULL, inp); + } +#endif + if (flags & TH_ACK) + TCPSTAT_INC(tcps_sndacks); + else + TCPSTAT_INC(tcps_sndctrl); + TCPSTAT_INC(tcps_sndtotal); + return (error); +#endif +} + +/* + * Move a TCP connection into TIME_WAIT state. + * tcbinfo is locked. + * inp is locked, and is unlocked before returning. + */ +void +tcp_twstart(struct tcpcb *tp) +{ +#if 0 + struct tcptw *tw; + struct inpcb *inp = tp->t_inpcb; +#endif + int acknow; +#if 0 + struct socket *so; +#ifdef INET6 + int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; +#endif + + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + INP_WLOCK_ASSERT(inp); +#endif +// if (V_nolocaltimewait) { +// int error = 0; +//#ifdef INET6 +// if (isipv6) +// error = in6_localaddr(&inp->in6p_faddr); +//#endif +//#if defined(INET6) && defined(INET) +// else +//#endif +//#ifdef INET +// error = in_localip(inp->inp_faddr); +//#endif +// if (error) { +// tp = tcp_close(tp); +// if (tp != NULL) +// INP_WUNLOCK(inp); +// return; +// } +// } + + /* + * For use only by DTrace. We do not reference the state + * after this point so modifying it in place is not a problem. + * Sam: Not true anymore. I use this state, since I don't associate every struct tcpcb with a struct inpcb. + */ + tcp_state_change(tp, TCPS_TIME_WAIT); + +#if 0 //RATHER THAN CLOSING THE SOCKET AND KEEPING TRACK OF TIMEWAIT USING THE struct tcptw, I'M JUST GOING TO KEEP AROUND THE struct tcpcb + tw = uma_zalloc(V_tcptw_zone, M_NOWAIT); + if (tw == NULL) { + /* + * Reached limit on total number of TIMEWAIT connections + * allowed. Remove a connection from TIMEWAIT queue in LRU + * fashion to make room for this connection. + * + * XXX: Check if it possible to always have enough room + * in advance based on guarantees provided by uma_zalloc(). + */ + tw = tcp_tw_2msl_scan(1); + if (tw == NULL) { + tp = tcp_close(tp); + if (tp != NULL) + INP_WUNLOCK(inp); + return; + } + } + /* + * The tcptw will hold a reference on its inpcb until tcp_twclose + * is called + */ + tw->tw_inpcb = inp; + in_pcbref(inp); /* Reference from tw */ +#endif + /* + * Recover last window size sent. + */ + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + tp->tw_last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; + else + tp->tw_last_win = 0; + + /* + * Set t_recent if timestamps are used on the connection. + */ + if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) { +// tw->t_recent = tp->ts_recent; +// tw->ts_offset = tp->ts_offset; + } else { + tp->/*t_recent*/ts_recent = 0; + tp->ts_offset = 0; + } + +// tw->snd_nxt = tp->snd_nxt; +// tw->rcv_nxt = tp->rcv_nxt; +// tw->iss = tp->iss; +// tw->irs = tp->irs; +// tw->t_starttime = tp->t_starttime; + /*tw*/tp->tw_time = 0; + +/* XXX + * If this code will + * be used for fin-wait-2 state also, then we may need + * a ts_recent from the last segment. + */ + acknow = tp->t_flags & TF_ACKNOW; + + /* + * First, discard tcpcb state, which includes stopping its timers and + * freeing it. tcp_discardcb() used to also release the inpcb, but + * that work is now done in the caller. + * + * Note: soisdisconnected() call used to be made in tcp_discardcb(), + * and might not be needed here any longer. + */ + tcp_cancel_timers(tp); /*tcp_discardcb(tp);*/ // The discardcb() call needs to be moved to tcp_close() +// so = inp->inp_socket; +// soisdisconnected(so); +// tw->tw_cred = crhold(so->so_cred); +// SOCK_LOCK(so); +// tw->tw_so_options = so->so_options; +// SOCK_UNLOCK(so); + if (acknow) + tcp_twrespond(/*tw*/tp, TH_ACK); +// inp->inp_ppcb = tw; +// inp->inp_flags |= INP_TIMEWAIT; + tcp_tw_2msl_reset(/*tw*/tp, 0); +#if 0 + /* + * If the inpcb owns the sole reference to the socket, then we can + * detach and free the socket as it is not needed in time wait. + */ + if (inp->inp_flags & INP_SOCKREF) { + KASSERT(so->so_state & SS_PROTOREF, + ("tcp_twstart: !SS_PROTOREF")); + inp->inp_flags &= ~INP_SOCKREF; + INP_WUNLOCK(inp); + ACCEPT_LOCK(); + SOCK_LOCK(so); + so->so_state &= ~SS_PROTOREF; + sofree(so); + } else + INP_WUNLOCK(inp); +#endif +} + +/* + * Returns 1 if the TIME_WAIT state was killed and we should start over, + * looking for a pcb in the listen state. Returns 0 otherwise. + */ +int +tcp_twcheck(struct tcpcb* tp,/*struct inpcb *inp, struct tcpopt *to __unused, */struct tcphdr *th, + /*struct mbuf *m, */int tlen) +{ +// struct tcptw *tw; + int thflags; + tcp_seq seq; + +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); +// INP_WLOCK_ASSERT(inp); + + /* + * XXXRW: Time wait state for inpcb has been recycled, but inpcb is + * still present. This is undesirable, but temporarily necessary + * until we work out how to handle inpcb's who's timewait state has + * been removed. + */ +// tw = intotw(inp); +// if (tw == NULL) +// goto drop; + + thflags = th->th_flags; + + /* + * NOTE: for FIN_WAIT_2 (to be added later), + * must validate sequence number before accepting RST + */ + + /* + * If the segment contains RST: + * Drop the segment - see Stevens, vol. 2, p. 964 and + * RFC 1337. + */ + if (thflags & TH_RST) + goto drop; + +#if 0 +/* PAWS not needed at the moment */ + /* + * RFC 1323 PAWS: If we have a timestamp reply on this segment + * and it's less than ts_recent, drop it. + */ + if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && + TSTMP_LT(to.to_tsval, tp->ts_recent)) { + if ((thflags & TH_ACK) == 0) + goto drop; + goto ack; + } + /* + * ts_recent is never updated because we never accept new segments. + */ +#endif + + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, /*tw*/tp->rcv_nxt)) { + //tcp_twclose(tw, 0); + tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + return (1); + } + + /* + * Drop the segment if it does not contain an ACK. + */ + if ((thflags & TH_ACK) == 0) + goto drop; + + /* + * Reset the 2MSL timer if this is a duplicate FIN. + */ + if (thflags & TH_FIN) { + seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); + if (seq + 1 == /*tw*/tp->rcv_nxt) + tcp_tw_2msl_reset(/*tw*/tp, 1); + } + + /* + * Acknowledge the segment if it has data or is not a duplicate ACK. + */ + if (thflags != TH_ACK || tlen != 0 || + th->th_seq != /*tw*/tp->rcv_nxt || th->th_ack != /*tw*/tp->snd_nxt) + tcp_twrespond(/*tw*/tp, TH_ACK); +drop: +// INP_WUNLOCK(inp); +// m_freem(m); + return (0); +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_usrreq.c b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_usrreq.c new file mode 100644 index 000000000000..61f08b1b7380 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_usrreq.c @@ -0,0 +1,753 @@ +/*- + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. + * Copyright (c) 2006-2007 Robert N. M. Watson + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 + */ + +#include + +#include "../gnrc_tcp_freebsd_internal.h" +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_var.h" +#include "tcp_timer.h" +#include +#include "ip6.h" +#include "../lib/lbuf.h" + +#include "tcp_const.h" + +#include "debug.h" + +//static void tcp_disconnect(struct tcpcb *); +static void tcp_usrclosed(struct tcpcb *); + +#if 0 +static int +tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + struct sockaddr_in6 *sin6p; + + sin6p = (struct sockaddr_in6 *)nam; + if (nam->sa_len != sizeof (*sin6p)) + return (EINVAL); + /* + * Must check for multicast addresses and disallow binding + * to them. + */ + if (sin6p->sin6_family == AF_INET6 && + IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) + return (EAFNOSUPPORT); + + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = EINVAL; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + INP_HASH_WLOCK(&V_tcbinfo); + inp->inp_vflag &= ~INP_IPV4; + inp->inp_vflag |= INP_IPV6; +#ifdef INET + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) + inp->inp_vflag |= INP_IPV4; + else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { + struct sockaddr_in sin; + + in6_sin6_2_sin(&sin, sin6p); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; + error = in_pcbbind(inp, (struct sockaddr *)&sin, + td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); + goto out; + } + } +#endif + error = in6_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); +out: + TCPDEBUG2(PRU_BIND); + TCP_PROBE2(debug__user, tp, PRU_BIND); + INP_WUNLOCK(inp); + return (error); +} +#endif + +/* Based on a function in in6_pcb.c. */ +static int in6_pcbconnect(struct tcpcb* tp, struct sockaddr_in6* nam) { + register struct sockaddr_in6 *sin6 = nam; + tp->faddr = sin6->sin6_addr; + tp->fport = sin6->sin6_port; + return 0; +} + +/* + * Initiate connection to peer. + * Create a template for use in transmissions on this connection. + * Enter SYN_SENT state, and mark socket as connecting. + * Start keep-alive timer, and seed output sequence space. + * Send initial segment on connection. + */ +/* Signature used to be +static int +tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) +*/ +static int +tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *nam) +{ +// struct inpcb *inp = tp->t_inpcb; + int error; + + int sb_max = cbuf_free_space(&tp->recvbuf); // same as sendbuf +// INP_WLOCK_ASSERT(inp); +// INP_HASH_WLOCK(&V_tcbinfo); + if (/*inp->inp_lport == 0*/tp->lport == 0) { + /*error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); + if (error) + goto out;*/ + error = EINVAL; // First, the socket must be bound + goto out; + } + error = in6_pcbconnect(/*inp*/tp, nam/*, td->td_ucred*/); + if (error != 0) + goto out; +// INP_HASH_WUNLOCK(&V_tcbinfo); + + /* Compute window scaling to request. */ + while (tp->request_r_scale < TCP_MAX_WINSHIFT && + (TCP_MAXWIN << tp->request_r_scale) < sb_max) + tp->request_r_scale++; + +// soisconnecting(inp->inp_socket); +// TCPSTAT_INC(tcps_connattempt); + tcp_state_change(tp, TCPS_SYN_SENT); + tp->iss = tcp_new_isn(tp); + tcp_sendseqinit(tp); + + return 0; + +out: +// INP_HASH_WUNLOCK(&V_tcbinfo); + return error; +} + +/* +The signature used to be +static int +tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) +*/ +int +tcp6_usr_connect(struct tcpcb* tp, struct sockaddr_in6* sin6p) +{ + int error = 0; + + if (tp->t_state != TCPS_CLOSED) { // This is a check that I added + return (EISCONN); + } +// struct inpcb *inp; +// struct tcpcb *tp = NULL; +// struct sockaddr_in6 *sin6p; + +// TCPDEBUG0; + +// sin6p = (struct sockaddr_in6 *)nam; +// if (nam->sa_len != sizeof (*sin6p)) +// return (EINVAL); + /* + * Must disallow TCP ``connections'' to multicast addresses. + */ + if (/*sin6p->sin6_family == AF_INET6 + && */IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) + return (EAFNOSUPPORT); +#if 0 // We already have the TCB + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & INP_TIMEWAIT) { + error = EADDRINUSE; + goto out; + } + if (inp->inp_flags & INP_DROPPED) { + error = ECONNREFUSED; + goto out; + } + tp = intotcpcb(inp); +#endif +// TCPDEBUG1(); +//#ifdef INET + /* + * XXXRW: Some confusion: V4/V6 flags relate to binding, and + * therefore probably require the hash lock, which isn't held here. + * Is this a significant problem? + */ + if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { +// struct sockaddr_in sin; + + DEBUG("V4-Mapped Address!\n"); + + if (/*(inp->inp_flags & IN6P_IPV6_V6ONLY) != 0*/1) { + error = EINVAL; + goto out; + } +#if 0 // Not needed since we'll take the if branch anyway + in6_sin6_2_sin(&sin, sin6p); + inp->inp_vflag |= INP_IPV4; + inp->inp_vflag &= ~INP_IPV6; + if ((error = prison_remote_ip4(td->td_ucred, + &sin.sin_addr)) != 0) + goto out; + if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) + goto out; +#endif +#if 0 +#ifdef TCP_OFFLOAD + if (registered_toedevs > 0 && + (so->so_options & SO_NO_OFFLOAD) == 0 && + (error = tcp_offload_connect(so, nam)) == 0) + goto out; +#endif +#endif + error = tcp_output(tp); + goto out; + } +//#endif +// inp->inp_vflag &= ~INP_IPV4; +// inp->inp_vflag |= INP_IPV6; +// inp->inp_inc.inc_flags |= INC_ISIPV6; +// if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0) +// goto out; + if ((error = tcp6_connect(tp, sin6p/*, td*/)) != 0) + goto out; +#if 0 +#ifdef TCP_OFFLOAD + if (registered_toedevs > 0 && + (so->so_options & SO_NO_OFFLOAD) == 0 && + (error = tcp_offload_connect(so, nam)) == 0) + goto out; +#endif +#endif + tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); + error = tcp_output(tp); + +out: +#if 0 + TCPDEBUG2(PRU_CONNECT); + TCP_PROBE2(debug__user, tp, PRU_CONNECT); + INP_WUNLOCK(inp); +#endif + return (error); +} + +/* + * Do a send by putting data in output queue and updating urgent + * marker if URG set. Possibly send more data. Unlike the other + * pru_*() routines, the mbuf chains are our responsibility. We + * must either enqueue them or free them. The other pru_* routines + * generally are caller-frees. + */ +/* I changed the signature of this function. */ +/*static int +tcp_usr_send(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *nam, struct mbuf *control, struct thread *td)*/ +/* Returns error condition, and stores bytes sent into SENT. */ +int tcp_usr_send(struct tcpcb* tp, int moretocome, struct lbufent* data, int* status) +{ + int error = 0; + *status = 0; +// struct inpcb *inp; +// struct tcpcb *tp = NULL; +#if 0 +#ifdef INET6 + int isipv6; +#endif + TCPDEBUG0; +#endif + if (tp->t_state < TCPS_ESTABLISHED) { // This if statement and the next are checks that I added + error = ENOTCONN; + goto out; + } + + /* For the TinyOS version I used ESHUTDOWN, but apparently it doesn't + * come by default when you include errno.h: you need to also #define + * __LINUX_ERRNO_EXTENSIONS__. So I switched to EPIPE. + */ + if (tpiscantsend(tp)) { + //error = ESHUTDOWN; + error = EPIPE; + goto out; + } + + if ((tp->t_state == TCPS_TIME_WAIT) || (tp->t_state == TCPS_CLOSED)) { // copied from the commented-out code from below + error = ECONNRESET; + goto out; + } + + /* + * We require the pcbinfo lock if we will close the socket as part of + * this call. + */ +#if 0 + if (flags & PRUS_EOF) + INP_INFO_RLOCK(&V_tcbinfo); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + if (control) + m_freem(control); + /* + * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible + * for freeing memory. + */ + if (m && (flags & PRUS_NOTREADY) == 0) + m_freem(m); + error = ECONNRESET; + goto out; + } +#ifdef INET6 + isipv6 = nam && nam->sa_family == AF_INET6; +#endif /* INET6 */ + tp = intotcpcb(inp); + TCPDEBUG1(); + if (control) { + /* TCP doesn't do control messages (rights, creds, etc) */ + if (control->m_len) { + m_freem(control); + if (m) + m_freem(m); + error = EINVAL; + goto out; + } + m_freem(control); /* empty control, just free it */ + } + if (!(flags & PRUS_OOB)) { +#endif // DON'T SUPPORT URGENT DATA + /*sbappendstream(&so->so_snd, m, flags);*/ + *status = lbuf_append(&tp->sendbuf, data); +#if 0 // DON'T SUPPORT IMPLIED CONNECTION + if (nam && tp->t_state < TCPS_SYN_SENT) { + /* + * Do implied connect if not yet connected, + * initialize window to default value, and + * initialize maxseg/maxopd using peer's cached + * MSS. + */ +#ifdef INET6 + if (isipv6) + error = tcp6_connect(tp, nam, td); +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + error = tcp_connect(tp, nam, td); +#endif + if (error) + goto out; + tp->snd_wnd = TTCP_CLIENT_SND_WND; + tcp_mss(tp, -1); + } +#endif +#if 0 + if (flags & PRUS_EOF) { + /* + * Close the send side of the connection after + * the data is sent. + */ + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + socantsendmore(so); + tcp_usrclosed(tp); + } +#endif +// if (!(inp->inp_flags & INP_DROPPED) && +// !(flags & PRUS_NOTREADY)) { + if (/*flags & PRUS_MORETOCOME*/ moretocome) + tp->t_flags |= TF_MORETOCOME; + error = tcp_output(tp); + if (/*flags & PRUS_MORETOCOME*/ moretocome) + tp->t_flags &= ~TF_MORETOCOME; +// } +#if 0 // DON'T SUPPORT OUT-OF-BAND DATA (URGENT POINTER IN TCP CASE) + } else { + /* + * XXXRW: PRUS_EOF not implemented with PRUS_OOB? + */ + SOCKBUF_LOCK(&so->so_snd); + if (sbspace(&so->so_snd) < -512) { + SOCKBUF_UNLOCK(&so->so_snd); + m_freem(m); + error = ENOBUFS; + goto out; + } + /* + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section. + * Otherwise, snd_up should be one lower. + */ + sbappendstream_locked(&so->so_snd, m, flags); + SOCKBUF_UNLOCK(&so->so_snd); + if (nam && tp->t_state < TCPS_SYN_SENT) { + /* + * Do implied connect if not yet connected, + * initialize window to default value, and + * initialize maxseg/maxopd using peer's cached + * MSS. + */ +#ifdef INET6 + if (isipv6) + error = tcp6_connect(tp, nam, td); +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + error = tcp_connect(tp, nam, td); +#endif + if (error) + goto out; + tp->snd_wnd = TTCP_CLIENT_SND_WND; + tcp_mss(tp, -1); + } + tp->snd_up = tp->snd_una + sbavail(&so->so_snd); + if (!(flags & PRUS_NOTREADY)) { + tp->t_flags |= TF_FORCEDATA; + error = tcp_output(tp); + tp->t_flags &= ~TF_FORCEDATA; + } + } +#endif +out: +#if 0 // REMOVE THEIR SYNCHRONIZATION + TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : + ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); + TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB : + ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); + INP_WUNLOCK(inp); + if (flags & PRUS_EOF) + INP_INFO_RUNLOCK(&V_tcbinfo); +#endif + return (error); +} + +/* + * After a receive, possibly send window update to peer. + */ +int +tcp_usr_rcvd(struct tcpcb* tp/*, int flags*/) +{ +// struct inpcb *inp; +// struct tcpcb *tp = NULL; + int error = 0; + if ((tp->t_state == TCPS_TIME_WAIT) || (tp->t_state == TCPS_CLOSED)) { + error = ECONNRESET; + goto out; + } +#if 0 + TCPDEBUG0; + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + error = ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE) + tcp_offload_rcvd(tp); + else +#endif +#endif + tcp_output(tp); + +out: +// TCPDEBUG2(PRU_RCVD); +// TCP_PROBE2(debug__user, tp, PRU_RCVD); +// INP_WUNLOCK(inp); + return (error); +} + +#if 0 + +/* + * Initiate (or continue) disconnect. + * If embryonic state, just send reset (once). + * If in ``let data drain'' option and linger null, just drop. + * Otherwise (hard), mark socket disconnecting and drop + * current input data; switch states based on user close, and + * send segment to peer (with FIN). + */ +static void +tcp_disconnect(struct tcpcb *tp) +{ +// struct inpcb *inp = tp->t_inpcb; +// struct socket *so = inp->inp_socket; + +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); +// INP_WLOCK_ASSERT(inp); + + /* + * Neither tcp_close() nor tcp_drop() should return NULL, as the + * socket is still open. + */ + if (tp->t_state < TCPS_ESTABLISHED) { + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + KASSERT(tp != NULL, + ("tcp_disconnect: tcp_close() returned NULL")); + }/* else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { + tp = tcp_drop(tp, 0); + KASSERT(tp != NULL, + ("tcp_disconnect: tcp_drop() returned NULL")); + }*/ else { +// soisdisconnecting(so); +// sbflush(&so->so_rcv); + tcp_usrclosed(tp); + if (/*!(inp->inp_flags & INP_DROPPED)*/tp->t_state != TCPS_CLOSED) + tcp_output(tp); + } +} + +#endif + +/* + * Mark the connection as being incapable of further output. + */ +int +tcp_usr_shutdown(struct tcpcb* tp) +{ + int error = 0; +#if 0 + struct inpcb *inp; + struct tcpcb *tp = NULL; + + TCPDEBUG0; + INP_INFO_RLOCK(&V_tcbinfo); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("inp == NULL")); + INP_WLOCK(inp); +#endif + if (/*inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)*/ + (tp->t_state == TCPS_TIME_WAIT) || (tp->t_state == TCPS_CLOSED)) { + error = ECONNRESET; + goto out; + } +#if 0 + tp = intotcpcb(inp); + TCPDEBUG1(); +#endif +// socantsendmore(so); + tpcantsendmore(tp); + tcp_usrclosed(tp); + if (/*!(inp->inp_flags & INP_DROPPED)*/tp->t_state != TCPS_CLOSED) + error = tcp_output(tp); + +out: +#if 0 + TCPDEBUG2(PRU_SHUTDOWN); + TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); +#endif + return (error); +} + + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +static void +tcp_usrclosed(struct tcpcb *tp) +{ + +// INP_INFO_RLOCK_ASSERT(&V_tcbinfo); +// INP_WLOCK_ASSERT(tp->t_inpcb); + + switch (tp->t_state) { + case TCPS_LISTEN: +//#ifdef TCP_OFFLOAD +// tcp_offload_listen_stop(tp); +//#endif + tcp_state_change(tp, TCPS_CLOSED); + /* FALLTHROUGH */ + case TCPS_CLOSED: + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + /* + * tcp_close() should never return NULL here as the socket is + * still open. + */ + KASSERT(tp != NULL, + ("tcp_usrclosed: tcp_close() returned NULL")); + break; + + case TCPS_SYN_SENT: + case TCPS_SYN_RECEIVED: + tp->t_flags |= TF_NEEDFIN; + break; + + case TCPS_ESTABLISHED: + tcp_state_change(tp, TCPS_FIN_WAIT_1); + break; + + case TCPS_CLOSE_WAIT: + tcp_state_change(tp, TCPS_LAST_ACK); + break; + } + if (tp->t_state >= TCPS_FIN_WAIT_2) { +// soisdisconnected(tp->t_inpcb->inp_socket); + /* Prevent the connection hanging in FIN_WAIT_2 forever. */ + if (tp->t_state == TCPS_FIN_WAIT_2) { + int timeout; + + timeout = (tcp_fast_finwait2_recycle) ? + tcp_finwait2_timeout : TP_MAXIDLE(tp); + tcp_timer_activate(tp, TT_2MSL, timeout); + } + } +} + +#if 0 +/* + * TCP socket is closed. Start friendly disconnect. + */ +static void +tcp_usr_close(struct tcpcb* tp/*struct socket *so*/) +{ +// struct inpcb *inp; +// struct tcpcb *tp = NULL; +// TCPDEBUG0; + +// inp = sotoinpcb(so); +// KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); + +// INP_INFO_RLOCK(&V_tcbinfo); +// INP_WLOCK(inp); +// KASSERT(inp->inp_socket != NULL, +// ("tcp_usr_close: inp_socket == NULL")); + + /* + * If we still have full TCP state, and we're not dropped, initiate + * a disconnect. + */ + if ((tp->t_state != TCP6S_TIME_WAIT) && (tp->t_state != TCPS_CLOSED)/*!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)*/) { +// tp = intotcpcb(inp); +// TCPDEBUG1(); + tpcantsendmore(tp); + tpcantrcvmore(tp); /* Added by Sam: This would be probably be done at the socket layer. */ + tcp_disconnect(tp); +// TCPDEBUG2(PRU_CLOSE); +// TCP_PROBE2(debug__user, tp, PRU_CLOSE); + } +#if 0 + if (!(inp->inp_flags & INP_DROPPED)) { + SOCK_LOCK(so); + so->so_state |= SS_PROTOREF; + SOCK_UNLOCK(so); + inp->inp_flags |= INP_SOCKREF; + } +#endif +// INP_WUNLOCK(inp); +// INP_INFO_RUNLOCK(&V_tcbinfo); +} +#endif + +/* + * Abort the TCP. Drop the connection abruptly. + */ +void +tcp_usr_abort(/*struct socket *so*/struct tcpcb* tp) +{ +#if 0 + struct inpcb *inp; + struct tcpcb *tp = NULL; + TCPDEBUG0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); + + INP_INFO_RLOCK(&V_tcbinfo); + INP_WLOCK(inp); + KASSERT(inp->inp_socket != NULL, + ("tcp_usr_abort: inp_socket == NULL")); +#endif + /* + * If we still have full TCP state, and we're not dropped, drop. + */ + if (/*!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)*/ + tp->t_state != TCP6S_TIME_WAIT && + tp->t_state != TCP6S_CLOSED) { +// tp = intotcpcb(inp); +// TCPDEBUG1(); + tcp_drop(tp, ECONNABORTED); +// TCPDEBUG2(PRU_ABORT); +// TCP_PROBE2(debug__user, tp, PRU_ABORT); + } else if (tp->t_state == TCPS_TIME_WAIT) { // This clause added by Sam + tp = tcp_close(tp); + connection_lost(tp, CONN_LOST_NORMAL); + } +#if 0 + if (!(inp->inp_flags & INP_DROPPED)) { + SOCK_LOCK(so); + so->so_state |= SS_PROTOREF; + SOCK_UNLOCK(so); + inp->inp_flags |= INP_SOCKREF; + } + INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); +#endif +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_var.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_var.h new file mode 100644 index 000000000000..267a0367e0a9 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/tcp_var.h @@ -0,0 +1,917 @@ +/*- + * Copyright (c) 1982, 1986, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_VAR_H_ +#define _NETINET_TCP_VAR_H_ + + +#include "../lib/bitmap.h" +#include "../lib/cbuf.h" +#include "cc.h" +#include "../lib/lbuf.h" +#include "tcp.h" +#include "types.h" +#include "netinet/in.h" +#include "ip6.h" + +#include "sys/queue.h" + +#if 0 //#ifdef _KERNEL +#include +#include + +/* + * Kernel variables for tcp. + */ +VNET_DECLARE(int, tcp_do_rfc1323); +#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) + +#endif /* _KERNEL */ + +#if 0 // I have a segment queue, but it is not implemented like this +/* TCP segment queue entry */ +struct tseg_qent { + LIST_ENTRY(tseg_qent) tqe_q; + int tqe_len; /* TCP segment data length */ + struct tcphdr *tqe_th; /* a pointer to tcp header */ + struct mbuf *tqe_m; /* mbuf contains packet */ +}; +LIST_HEAD(tsegqe_head, tseg_qent); +#endif + +struct sackblk { + tcp_seq start; /* start seq no. of sack block */ + tcp_seq end; /* end seq no. */ +}; + +struct sackhole { + tcp_seq start; /* start seq no. of hole */ + tcp_seq end; /* end seq no. */ + tcp_seq rxmit; /* next seq. no in hole to be retransmitted */ + TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */ +}; + +struct sackhint { + struct sackhole *nexthole; + int sack_bytes_rexmit; + tcp_seq last_sack_ack; /* Most recent/largest sacked ack */ + +// int ispare; /* explicit pad for 64bit alignment */ +// uint64_t _pad[2]; /* 1 sacked_bytes, 1 TBD */ +}; + +struct tcptemp { + u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ + struct tcphdr tt_t; +}; + +#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ + +/* Abridged TCB for passive sockets. */ +struct tcpcb_listen { + int t_state; /* Always CLOSED or LISTEN. */ + int index; + uint16_t lport; +}; + +#define TCB_CANTRCVMORE 0x20 +#define TCB_CANTSENDMORE 0x40 + +#define TCB_PASSIVE 0x80 + +#define tpcantrcvmore(tp) (tp)->miscflags |= TCB_CANTRCVMORE +#define tpcantsendmore(tp) (tp)->miscflags |= TCB_CANTSENDMORE +#define tpiscantrcv(tp) (((tp)->miscflags & TCB_CANTRCVMORE) != 0) +#define tpiscantsend(tp) (((tp)->miscflags & TCB_CANTSENDMORE) != 0) +#define tpmarktimeractive(tp, timer) (tp)->miscflags |= timer +#define tpistimeractive(tp, timer) (((tp)->miscflags & timer) != 0) +#define tpcleartimeractive(tp, timer) (tp)->miscflags &= ~timer +#define tpmarkpassiveopen(tp) (tp)->miscflags |= TCB_PASSIVE +#define tpispassiveopen(tp) (((tp)->miscflags & TCB_PASSIVE) != 0) + +#define REASSBMP_SIZE(tp) BITS_TO_BYTES((tp)->recvbuf.size) + +/* These estimates are used to allocate sackholes (see tcp_sack.c). */ +#define AVG_SACKHOLES 2 // per TCB +#define MAX_SACKHOLES 5 // per TCB +#define SACKHOLE_POOL_SIZE AVG_SACKHOLES * GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS +#define SACKHOLE_BMP_SIZE BITS_TO_BYTES(SACKHOLE_POOL_SIZE) + +// You can set the maximum number of SACK blocks in tcp.h + +/* + * Tcp control block, one per tcp; fields: + * Organized for 16 byte cacheline efficiency. + */ +struct tcpcb { + uint8_t t_state; /* state of this connection */ + + /* Extra fields that I added. */ + uint8_t index; /* Index/ID of this TCB */ + uint8_t miscflags; + + struct tcpcb_listen* accepted_from; + + struct lbufhead sendbuf; + struct cbufhead recvbuf; + uint8_t* reassbmp; + int16_t reass_fin_index; + + uint16_t lport; // local port, network byte order + uint16_t fport; // foreign port, network byte order + + struct in6_addr faddr; // foreign IP address + +#if 0 // I used unused space in the receive buffer for the reassembly queue + struct tsegqe_head t_segq; /* segment reassembly queue */ + void *t_pspare[2]; /* new reassembly queue */ + int t_segqlen; /* segment reassembly queue length */ +#endif + int t_dupacks; /* consecutive dup acks recd */ +#if 0 + struct tcp_timer *t_timers; /* All the TCP timers in one struct */ + + struct inpcb *t_inpcb; /* back pointer to internet pcb */ +#endif + + u_short tw_last_win; /* For time wait */ + int tw_time; /* For time wait */ + + u_int t_flags; + +// struct vnet *t_vnet; /* back pointer to parent vnet */ + + tcp_seq snd_una; /* sent but unacknowledged */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + tcp_seq irs; /* initial receive sequence number */ + + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_adv; /* advertised window */ + u_long rcv_wnd; /* receive window */ + tcp_seq rcv_up; /* receive urgent pointer */ + + u_long snd_wnd; /* send window */ + u_long snd_cwnd; /* congestion-controlled window */ +// u_long snd_spare1; /* unused */ + u_long snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ +// u_long snd_spare2; /* unused */ + tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ + + u_int t_maxopd; /* mss plus options */ + + u_int t_rcvtime; /* inactivity time */ + u_int t_starttime; /* time connection was established */ + u_int t_rtttime; /* RTT measurement start time */ + tcp_seq t_rtseq; /* sequence number being timed */ + +// u_int t_bw_spare1; /* unused */ +// tcp_seq t_bw_spare2; /* unused */ + + int t_rxtcur; /* current retransmit value (ticks) */ + u_int t_maxseg; /* maximum segment size */ + int t_srtt; /* smoothed round-trip time */ + int t_rttvar; /* variance in round-trip time */ + + int t_rxtshift; /* log(2) of rexmt exp. backoff */ + u_int t_rttmin; /* minimum rtt allowed */ + u_int t_rttbest; /* best rtt we've seen */ + u_long t_rttupdated; /* number of times rtt sampled */ + u_long max_sndwnd; /* largest window peer has offered */ + + int t_softerror; /* possible error not yet reported */ +/* out-of-band data */ +// char t_oobflags; /* have some */ +// char t_iobc; /* input character */ +/* RFC 1323 variables */ + u_char snd_scale; /* window scaling for send window */ + u_char rcv_scale; /* window scaling for recv window */ + u_char request_r_scale; /* pending window scaling */ + u_int32_t ts_recent; /* timestamp echo data */ + u_int ts_recent_age; /* when last updated */ + u_int32_t ts_offset; /* our timestamp offset */ + + tcp_seq last_ack_sent; +/* experimental */ + u_long snd_cwnd_prev; /* cwnd prior to retransmit */ + u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ + tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ +// int t_sndzerowin; /* zero-window updates sent */ + u_int t_badrxtwin; /* window for retransmit recovery */ + u_char snd_limited; /* segments limited transmitted */ + +/* SACK related state */ + int snd_numholes; /* number of holes seen by sender */ + TAILQ_HEAD(sackhole_head, sackhole) snd_holes; + /* SACK scoreboard (sorted) */ + tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ + int rcv_numsacks; /* # distinct sack blks present */ + struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ + tcp_seq sack_newdata; /* New data xmitted in this recovery + episode starts at this seq number */ + struct sackhint sackhint; /* SACK scoreboard hint */ + + int t_rttlow; /* smallest observed RTT */ +#if 0 + u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ + int rfbuf_cnt; /* recv buffer autoscaling byte count */ + struct toedev *tod; /* toedev handling this connection */ +#endif +// int t_sndrexmitpack; /* retransmit packets sent */ +// int t_rcvoopack; /* out-of-order packets received */ +// void *t_toe; /* TOE pcb pointer */ + int t_bytes_acked; /* # bytes acked during current RTT */ +// struct cc_algo *cc_algo; /* congestion control algorithm */ + struct cc_var ccv[1]; /* congestion control specific vars */ +#if 0 + struct osd *osd; /* storage for Khelp module data */ +#endif +#if 0 // Just use the default values for the KEEP constants (see tcp_timer.h) + u_int t_keepinit; /* time to establish connection */ + u_int t_keepidle; /* time before keepalive probes begin */ + u_int t_keepintvl; /* interval between keepalives */ + u_int t_keepcnt; /* number of keepalives before close */ +#endif +#if 0 // Don't support TCP Segment Offloading + u_int t_tsomax; /* TSO total burst length limit in bytes */ + u_int t_tsomaxsegcount; /* TSO maximum segment count */ + u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ +#endif +// u_int t_pmtud_saved_maxopd; /* pre-blackhole MSS */ + u_int t_flags2; /* More tcpcb flags storage */ + +// uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ +// void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */ +#if 0 +#if defined(_KERNEL) && defined(TCPPCAP) + struct mbufq t_inpkts; /* List of saved input packets. */ + struct mbufq t_outpkts; /* List of saved output packets. */ +#ifdef _LP64 + uint64_t _pad[0]; /* all used! */ +#else + uint64_t _pad[2]; /* 2 are available */ +#endif /* _LP64 */ +#else + uint64_t _pad[6]; +#endif /* defined(_KERNEL) && defined(TCPPCAP) */ +#endif +}; + +/* Defined in tcp_subr.c. */ +void initialize_tcb(struct tcpcb* tp, uint16_t lport, uint8_t* recvbuf, size_t recvbuflen, uint8_t* reassbmp); + +/* Copied from the "dead" portions below. */ + +void tcp_init(void); +void tcp_state_change(struct tcpcb *, int); +tcp_seq tcp_new_isn(struct tcpcb *); +struct tcpcb *tcp_close(struct tcpcb *); +struct tcpcb *tcp_drop(struct tcpcb *, int); +void +tcp_respond(struct tcpcb *tp, struct ip6_hdr* ip6gen, struct tcphdr *thgen, + tcp_seq ack, tcp_seq seq, int flags); +void tcp_setpersist(struct tcpcb *); +void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); + +/* Added, since there is no header file for tcp_usrreq.c. */ +int tcp6_usr_connect(struct tcpcb* tp, struct sockaddr_in6* sinp6); +int tcp_usr_send(struct tcpcb* tp, int moretocome, struct lbufent* data, int* status); +int tcp_usr_rcvd(struct tcpcb* tp); +int tcp_usr_shutdown(struct tcpcb* tp); +void tcp_usr_abort(struct tcpcb* tp); + +/* + * Flags and utility macros for the t_flags field. + */ +#define TF_ACKNOW 0x000001 /* ack peer immediately */ +#define TF_DELACK 0x000002 /* ack, but try to delay it */ +#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x000008 /* don't use tcp options */ +#define TF_SENTFIN 0x000010 /* have sent FIN */ +#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */ +#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ +#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ +#define TF_NOPUSH 0x001000 /* don't push */ +#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ +#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ +#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ +#define TF_LASTIDLE 0x040000 /* connection was previously idle */ +#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ +#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ +#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ +#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ +#define TF_FORCEDATA 0x800000 /* force out a byte */ +#define TF_TSO 0x1000000 /* TSO enabled on this connection */ +#define TF_TOE 0x2000000 /* this connection is offloaded */ +#define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */ +#define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */ +#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */ +#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ +#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ + +#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) +#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY +#define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY + +#define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY) +#define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY +#define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY + +#define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY)) +#define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY) +#define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY) + +#define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una) + +/* + * Flags for the t_oobflags field. + */ +#define TCPOOB_HAVEDATA 0x01 +#define TCPOOB_HADDATA 0x02 + +#ifdef TCP_SIGNATURE +/* + * Defines which are needed by the xform_tcp module and tcp_[in|out]put + * for SADB verification and lookup. + */ +#define TCP_SIGLEN 16 /* length of computed digest in bytes */ +#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ +#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ +/* + * Only a single SA per host may be specified at this time. An SPI is + * needed in order for the KEY_ALLOCSA() lookup to work. + */ +#define TCP_SIG_SPI 0x1000 +#endif /* TCP_SIGNATURE */ + +/* + * Flags for PLPMTU handling, t_flags2 + */ +#define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */ +#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */ +#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */ + +/* + * Structure to hold TCP options that are only used during segment + * processing (in tcp_input), but not held in the tcpcb. + * It's basically used to reduce the number of parameters + * to tcp_dooptions and tcp_addoptions. + * The binary order of the to_flags is relevant for packing of the + * options in tcp_addoptions. + */ +struct tcpopt { + u_int64_t to_flags; /* which options are present */ +#define TOF_MSS 0x0001 /* maximum segment size */ +#define TOF_SCALE 0x0002 /* window scaling */ +#define TOF_SACKPERM 0x0004 /* SACK permitted */ +#define TOF_TS 0x0010 /* timestamp */ +#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ +#define TOF_SACK 0x0080 /* Peer sent SACK option */ +#define TOF_MAXOPT 0x0100 + u_int32_t to_tsval; /* new timestamp */ + u_int32_t to_tsecr; /* reflected timestamp */ + u_char *to_sacks; /* pointer to the first SACK blocks */ + u_char *to_signature; /* pointer to the TCP-MD5 signature */ + u_int16_t to_mss; /* maximum segment size */ + u_int8_t to_wscale; /* window scaling */ + u_int8_t to_nsacks; /* number of SACK blocks */ + u_int32_t to_spare; /* UTO */ +}; + +/* + * Flags for tcp_dooptions. + */ +#define TO_SYN 0x01 /* parse SYN-only options */ + +struct hc_metrics_lite { /* must stay in sync with hc_metrics */ + u_long rmx_mtu; /* MTU for this path */ + u_long rmx_ssthresh; /* outbound gateway buffer limit */ + u_long rmx_rtt; /* estimated round trip time */ + u_long rmx_rttvar; /* estimated rtt variance */ + u_long rmx_bandwidth; /* estimated bandwidth */ + u_long rmx_cwnd; /* congestion window */ + u_long rmx_sendpipe; /* outbound delay-bandwidth product */ + u_long rmx_recvpipe; /* inbound delay-bandwidth product */ +}; + +/* + * Used by tcp_maxmtu() to communicate interface specific features + * and limits at the time of connection setup. + */ +struct tcp_ifcap { + int ifcap; + u_int tsomax; + u_int tsomaxsegcount; + u_int tsomaxsegsize; +}; + +void tcp_mss(struct tcpcb *, int); +void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, + struct tcp_ifcap *); + +#ifndef _NETINET_IN_PCB_H_ +struct in_conninfo; +#endif /* _NETINET_IN_PCB_H_ */ + +#if 0 // we won't need these declarations (I think) +struct tcptw { + struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */ + tcp_seq snd_nxt; + tcp_seq rcv_nxt; + tcp_seq iss; + tcp_seq irs; + u_short last_win; /* cached window value */ + u_short tw_so_options; /* copy of so_options */ + struct ucred *tw_cred; /* user credentials */ + u_int32_t t_recent; + u_int32_t ts_offset; /* our timestamp offset */ + u_int t_starttime; + int tw_time; + TAILQ_ENTRY(tcptw) tw_2msl; + void *tw_pspare; /* TCP_SIGNATURE */ + u_int *tw_spare; /* TCP_SIGNATURE */ +}; + +#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) +#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb) +#define sototcpcb(so) (intotcpcb(sotoinpcb(so))) + +#endif + +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ +#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ + +/* My definition of the max macro */ +#define max(x, y) ((x) > (y) ? (x) : (y)) + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This version of the macro adapted from a paper by Lawrence + * Brakmo and Larry Peterson which outlines a problem caused + * by insufficient precision in the original implementation, + * which results in inappropriately large RTO values for very + * fast networks. + */ +#define TCP_REXMTVAL(tp) \ + max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) + +/* + * TCP statistics. + * Many of these should be kept per connection, + * but that's inconvenient at the moment. + */ +struct tcpstat { + uint64_t tcps_connattempt; /* connections initiated */ + uint64_t tcps_accepts; /* connections accepted */ + uint64_t tcps_connects; /* connections established */ + uint64_t tcps_drops; /* connections dropped */ + uint64_t tcps_conndrops; /* embryonic connections dropped */ + uint64_t tcps_minmssdrops; /* average minmss too low drops */ + uint64_t tcps_closed; /* conn. closed (includes drops) */ + uint64_t tcps_segstimed; /* segs where we tried to get rtt */ + uint64_t tcps_rttupdated; /* times we succeeded */ + uint64_t tcps_delack; /* delayed acks sent */ + uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ + uint64_t tcps_rexmttimeo; /* retransmit timeouts */ + uint64_t tcps_persisttimeo; /* persist timeouts */ + uint64_t tcps_keeptimeo; /* keepalive timeouts */ + uint64_t tcps_keepprobe; /* keepalive probes sent */ + uint64_t tcps_keepdrops; /* connections dropped in keepalive */ + + uint64_t tcps_sndtotal; /* total packets sent */ + uint64_t tcps_sndpack; /* data packets sent */ + uint64_t tcps_sndbyte; /* data bytes sent */ + uint64_t tcps_sndrexmitpack; /* data packets retransmitted */ + uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */ + uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */ + uint64_t tcps_sndacks; /* ack-only packets sent */ + uint64_t tcps_sndprobe; /* window probes sent */ + uint64_t tcps_sndurg; /* packets sent with URG only */ + uint64_t tcps_sndwinup; /* window update-only packets sent */ + uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ + + uint64_t tcps_rcvtotal; /* total packets received */ + uint64_t tcps_rcvpack; /* packets received in sequence */ + uint64_t tcps_rcvbyte; /* bytes received in sequence */ + uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */ + uint64_t tcps_rcvbadoff; /* packets received with bad offset */ + uint64_t tcps_rcvreassfull; /* packets dropped for no reass space */ + uint64_t tcps_rcvshort; /* packets received too short */ + uint64_t tcps_rcvduppack; /* duplicate-only packets received */ + uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */ + uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */ + uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ + uint64_t tcps_rcvoopack; /* out-of-order packets received */ + uint64_t tcps_rcvoobyte; /* out-of-order bytes received */ + uint64_t tcps_rcvpackafterwin; /* packets with data after window */ + uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */ + uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */ + uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */ + uint64_t tcps_rcvdupack; /* rcvd duplicate acks */ + uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */ + uint64_t tcps_rcvackpack; /* rcvd ack packets */ + uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */ + uint64_t tcps_rcvwinupd; /* rcvd window update packets */ + uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */ + uint64_t tcps_predack; /* times hdr predict ok for acks */ + uint64_t tcps_preddat; /* times hdr predict ok for data pkts */ + uint64_t tcps_pcbcachemiss; + uint64_t tcps_cachedrtt; /* times cached RTT in route updated */ + uint64_t tcps_cachedrttvar; /* times cached rttvar updated */ + uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */ + uint64_t tcps_usedrtt; /* times RTT initialized from route */ + uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */ + uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/ + uint64_t tcps_persistdrop; /* timeout in persist state */ + uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */ + uint64_t tcps_mturesent; /* resends due to MTU discovery */ + uint64_t tcps_listendrop; /* listen queue overflows */ + uint64_t tcps_badrst; /* ignored RSTs in the window */ + + uint64_t tcps_sc_added; /* entry added to syncache */ + uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */ + uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */ + uint64_t tcps_sc_dropped; /* could not reply to packet */ + uint64_t tcps_sc_completed; /* successful extraction of entry */ + uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */ + uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */ + uint64_t tcps_sc_reset; /* RST removed entry from syncache */ + uint64_t tcps_sc_stale; /* timed out or listen socket gone */ + uint64_t tcps_sc_aborted; /* syncache entry aborted */ + uint64_t tcps_sc_badack; /* removed due to bad ACK */ + uint64_t tcps_sc_unreach; /* ICMP unreachable received */ + uint64_t tcps_sc_zonefail; /* zalloc() failed */ + uint64_t tcps_sc_sendcookie; /* SYN cookie sent */ + uint64_t tcps_sc_recvcookie; /* SYN cookie received */ + + uint64_t tcps_hc_added; /* entry added to hostcache */ + uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */ + + uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */ + + /* SACK related stats */ + uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */ + uint64_t tcps_sack_rexmits; /* SACK rexmit segments */ + uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */ + uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */ + uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */ + uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */ + + /* ECN related stats */ + uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */ + uint64_t tcps_ecn_ect0; /* ECN Capable Transport */ + uint64_t tcps_ecn_ect1; /* ECN Capable Transport */ + uint64_t tcps_ecn_shs; /* ECN successful handshakes */ + uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */ + + /* TCP_SIGNATURE related stats */ + uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */ + uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */ + uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */ + uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */ + uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */ + + uint64_t _pad[12]; /* 6 UTO, 6 TBD */ +}; + +/* Copied from below. */ +static inline void +tcp_fields_to_host(struct tcphdr *th) +{ + + th->th_seq = ntohl(th->th_seq); + th->th_ack = ntohl(th->th_ack); + th->th_win = ntohs(th->th_win); + th->th_urp = ntohs(th->th_urp); +} + +void tcp_twstart(struct tcpcb*); +void tcp_twclose(struct /*tcptw*/tcpcb*, int); +int tcp_twcheck(struct tcpcb*,/*struct inpcb *, struct tcpopt *,*/ struct tcphdr *, + /*struct mbuf *,*/ int); +void tcp_dropwithreset(struct ip6_hdr* ip6, struct tcphdr *th, struct tcpcb *tp, + int tlen, int rstreason); +int tcp_input(struct ip6_hdr* ip6, struct tcphdr* th, struct tcpcb* tp, struct tcpcb_listen* tpl, + uint8_t* signals, uint32_t* freedentries); +int tcp_output(struct tcpcb *); +void tcpip_maketemplate(struct /*inp*/tcpcb *, struct tcptemp*); +void tcpip_fillheaders(struct /*inp*/tcpcb *, void *, void *); +u_long tcp_maxmtu6(/*struct in_conninfo **/ struct tcpcb*, struct tcp_ifcap *); +int tcp_addoptions(struct tcpopt *, u_char *); +int tcp_mssopt(/*struct in_conninfo **/ struct tcpcb*); +int tcp_reass(struct tcpcb *, struct tcphdr *, int *, /*struct mbuf*/uint8_t *, uint8_t*); +void tcp_sack_init(void); // Sam: new function that I added +void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); +void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); +void tcp_clean_sackreport(struct tcpcb *tp); +void tcp_sack_adjust(struct tcpcb *tp); +struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); +void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); +void tcp_free_sackholes(struct tcpcb *tp); + +#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */0 + +#ifdef _KERNEL +#include + +VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */ +/* + * In-kernel consumers can use these accessor macros directly to update + * stats. + */ +#define TCPSTAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val)) +#define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1) + +/* + * Kernel module consumers must use this accessor macro. + */ +void kmod_tcpstat_inc(int statnum); +#define KMOD_TCPSTAT_INC(name) \ + kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t)) + +/* + * TCP specific helper hook point identifiers. + */ +#define HHOOK_TCP_EST_IN 0 +#define HHOOK_TCP_EST_OUT 1 +#define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT + +struct tcp_hhook_data { + struct tcpcb *tp; + struct tcphdr *th; + struct tcpopt *to; + long len; + int tso; + tcp_seq curack; +}; +#endif + +/* + * TCB structure exported to user-land via sysctl(3). + * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been + * included. Not all of our clients do. + */ +#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) +struct xtcp_timer { + int tt_rexmt; /* retransmit timer */ + int tt_persist; /* retransmit persistence */ + int tt_keep; /* keepalive */ + int tt_2msl; /* 2*msl TIME_WAIT timer */ + int tt_delack; /* delayed ACK timer */ + int t_rcvtime; /* Time since last packet received */ +}; +struct xtcpcb { + size_t xt_len; + struct inpcb xt_inp; + struct tcpcb xt_tp; + struct xsocket xt_socket; + struct xtcp_timer xt_timer; + u_quad_t xt_alignment_hack; +}; +#endif + +/* + * Identifiers for TCP sysctl nodes + */ +#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ +#define TCPCTL_MSSDFLT 3 /* MSS default */ +#define TCPCTL_STATS 4 /* statistics (read-only) */ +#define TCPCTL_RTTDFLT 5 /* default RTT estimate */ +#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ +#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ +#define TCPCTL_SENDSPACE 8 /* send buffer space */ +#define TCPCTL_RECVSPACE 9 /* receive buffer space */ +#define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ +#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ +#define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ +#define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ +#define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */ +#define TCPCTL_DROP 15 /* drop tcp connection */ + +#ifdef _KERNEL +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet_tcp); +SYSCTL_DECL(_net_inet_tcp_sack); +MALLOC_DECLARE(M_TCPLOG); +#endif + +VNET_DECLARE(struct inpcbhead, tcb); /* queue of active tcpcb's */ +VNET_DECLARE(struct inpcbinfo, tcbinfo); +extern int tcp_log_in_vain; +VNET_DECLARE(int, tcp_mssdflt); /* XXX */ +VNET_DECLARE(int, tcp_minmss); +VNET_DECLARE(int, tcp_delack_enabled); +VNET_DECLARE(int, tcp_do_rfc3390); +VNET_DECLARE(int, tcp_initcwnd_segments); +VNET_DECLARE(int, tcp_sendspace); +VNET_DECLARE(int, tcp_recvspace); +VNET_DECLARE(int, path_mtu_discovery); +VNET_DECLARE(int, tcp_do_rfc3465); +VNET_DECLARE(int, tcp_abc_l_var); +#define V_tcb VNET(tcb) +#define V_tcbinfo VNET(tcbinfo) +#define V_tcp_mssdflt VNET(tcp_mssdflt) +#define V_tcp_minmss VNET(tcp_minmss) +#define V_tcp_delack_enabled VNET(tcp_delack_enabled) +#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) +#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) +#define V_tcp_sendspace VNET(tcp_sendspace) +#define V_tcp_recvspace VNET(tcp_recvspace) +#define V_path_mtu_discovery VNET(path_mtu_discovery) +#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) +#define V_tcp_abc_l_var VNET(tcp_abc_l_var) + +VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */ +VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ +#define V_tcp_do_sack VNET(tcp_do_sack) +#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) + +VNET_DECLARE(int, tcp_do_ecn); /* TCP ECN enabled/disabled */ +VNET_DECLARE(int, tcp_ecn_maxretries); +#define V_tcp_do_ecn VNET(tcp_do_ecn) +#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) + +VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]); +#define V_tcp_hhh VNET(tcp_hhh) + +int tcp_addoptions(struct tcpopt *, u_char *); +int tcp_ccalgounload(struct cc_algo *unload_algo); +struct tcpcb * + tcp_close(struct tcpcb *); +void tcp_discardcb(struct tcpcb *); +void tcp_twstart(struct tcpcb *); +void tcp_twclose(struct tcptw *, int); +void tcp_ctlinput(int, struct sockaddr *, void *); +int tcp_ctloutput(struct socket *, struct sockopt *); +struct tcpcb * + tcp_drop(struct tcpcb *, int); +void tcp_drain(void); +#ifdef VIMAGE +void tcp_destroy(void); +#endif +void tcp_fini(void *); +char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *, + const void *); +char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *, + const void *); +int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *); +void tcp_reass_global_init(void); +void tcp_reass_flush(struct tcpcb *); +int tcp_input(struct mbuf **, int *, int); +u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); +u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); +void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, + struct tcp_ifcap *); +void tcp_mss(struct tcpcb *, int); +int tcp_mssopt(struct in_conninfo *); +struct inpcb * + tcp_drop_syn_sent(struct inpcb *, int); +struct tcpcb * + tcp_newtcpcb(struct inpcb *); +int tcp_output(struct tcpcb *); +void tcp_state_change(struct tcpcb *, int); +void tcp_respond(struct tcpcb *, void *, + struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); +void tcp_tw_init(void); +#ifdef VIMAGE +void tcp_tw_destroy(void); +#endif +void tcp_tw_zone_change(void); +int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, + struct mbuf *, int); +void tcp_setpersist(struct tcpcb *); +#ifdef TCP_SIGNATURE +struct secasvar; +struct secasvar *tcp_get_sav(struct mbuf *, u_int); +int tcp_signature_do_compute(struct mbuf *, int, int, u_char *, + struct secasvar *); +int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); +int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *, + struct tcphdr *, u_int); +int tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen, + struct tcpopt *to, struct tcphdr *th, u_int tcpbflag); +#endif +void tcp_slowtimo(void); +struct tcptemp * + tcpip_maketemplate(struct inpcb *); +void tcpip_fillheaders(struct inpcb *, void *, void *); +void tcp_timer_activate(struct tcpcb *, uint32_t, u_int); +int tcp_timer_active(struct tcpcb *, uint32_t); +void tcp_timer_stop(struct tcpcb *, uint32_t); +void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int); +/* + * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo) + */ +void tcp_hc_init(void); +#ifdef VIMAGE +void tcp_hc_destroy(void); +#endif +void tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *); +u_long tcp_hc_getmtu(struct in_conninfo *); +void tcp_hc_updatemtu(struct in_conninfo *, u_long); +void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *); + +extern struct pr_usrreqs tcp_usrreqs; +tcp_seq tcp_new_isn(struct tcpcb *); + +void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); +void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); +void tcp_clean_sackreport(struct tcpcb *tp); +void tcp_sack_adjust(struct tcpcb *tp); +struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); +void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); +void tcp_free_sackholes(struct tcpcb *tp); +int tcp_newreno(struct tcpcb *, struct tcphdr *); +u_long tcp_seq_subtract(u_long, u_long ); + +void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); + +static inline void +tcp_fields_to_host(struct tcphdr *th) +{ + + th->th_seq = ntohl(th->th_seq); + th->th_ack = ntohl(th->th_ack); + th->th_win = ntohs(th->th_win); + th->th_urp = ntohs(th->th_urp); +} + +#ifdef TCP_SIGNATURE +static inline void +tcp_fields_to_net(struct tcphdr *th) +{ + + th->th_seq = htonl(th->th_seq); + th->th_ack = htonl(th->th_ack); + th->th_win = htons(th->th_win); + th->th_urp = htons(th->th_urp); +} +#endif +#endif /* _KERNEL */ + +#endif /* _NETINET_TCP_VAR_H_ */ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/types.h b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/types.h new file mode 100644 index 000000000000..7cb8cc51fac5 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/bsdtcp/types.h @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 1982, 1986, 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)types.h 8.6 (Berkeley) 2/19/95 + * $FreeBSD$ + */ + +#ifndef _SYS_TYPES_H_ +#define _SYS_TYPES_H_ + +//#include "_types.h" +#include + +// I'm omitting these since they seem to come predefined in Linux, sometimes differently +#if 0 +typedef uint8_t u_char; +typedef uint16_t u_short; +typedef uint32_t u_int; +typedef uint64_t u_long; + +typedef uint16_t ushort; +typedef uint32_t uint; +#endif + +// I added this one +typedef unsigned int u_int; + +typedef uint8_t __uint8_t; +typedef uint16_t __uint16_t; +typedef uint32_t __uint32_t; +typedef uint64_t __uint64_t; +typedef int8_t __int8_t; +typedef int16_t __int16_t; +typedef int32_t __int32_t; +typedef int64_t __int64_t; + +typedef __uint8_t u_int8_t; /* unsigned integrals (deprecated) */ +typedef __uint16_t u_int16_t; +typedef __uint32_t u_int32_t; +typedef __uint64_t u_int64_t; + +typedef __uint64_t u_quad_t; /* quads (deprecated) */ +typedef __int64_t quad_t; +typedef quad_t * qaddr_t; + +typedef char * caddr_t; /* core address */ +typedef const char * c_caddr_t; /* core address, pointer to const */ + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/checksum.c b/sys/net/gnrc/transport_layer/tcp_freebsd/checksum.c new file mode 100644 index 000000000000..6ea7ca95d196 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/checksum.c @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup net_gnrc_tcp_freebsd + * @{ + * + * @file + * @brief TCP checksum calculation for GNRC + * + * @author Sam Kumar + * + * Unlike the other files in this directory, this is is not taken from the + * FreeBSD TCP stack. + * @} + */ +#include "gnrc_tcp_freebsd_internal.h" +#include "bsdtcp/tcp.h" + +#include +#include + +inline uint16_t deref_safe(uint16_t* unaligned) { + return ((uint16_t) *((uint8_t*) unaligned)) + | (((uint16_t) *(((uint8_t*) unaligned) + 1)) << 8); +} + +int gnrc_tcp_calc_csum(const gnrc_pktsnip_t *hdr, const gnrc_pktsnip_t *pseudo_hdr) +{ + if (hdr == NULL || pseudo_hdr == NULL) { + return -EFAULT; + } else if (hdr->type != GNRC_NETTYPE_TCP) { + return -EBADMSG; + } else if (pseudo_hdr->type != GNRC_NETTYPE_IPV6) { + return -ENOENT; + } + + struct tcphdr* th = hdr->data; + th->th_sum = 0; + + const gnrc_pktsnip_t* snips[3]; + snips[0] = hdr; + snips[1] = (hdr == NULL) ? NULL : hdr->next; + snips[2] = NULL; + + uint32_t csum = get_tcp_checksum(pseudo_hdr, snips); + th->th_sum = csum; + + return 0; +} + +static uint16_t _calc_checksum(struct in6_addr* src, struct in6_addr* dest, + uint32_t ip6hdr_len, const gnrc_pktsnip_t** snips) { + uint32_t total; + uint16_t* current; + uint16_t* end; + uint32_t currlen; + int starthalf; // 1 if the end of the last iovec was not half-word aligned + struct { + struct in6_addr srcaddr; + struct in6_addr destaddr; + uint32_t tcplen; + uint8_t reserved0; + uint8_t reserved1; + uint8_t reserved2; + uint8_t protocol; + } __attribute__((packed, aligned)) pseudoheader; + memcpy(&pseudoheader.srcaddr, src, sizeof(struct in6_addr)); + memcpy(&pseudoheader.destaddr, dest, sizeof(struct in6_addr)); + pseudoheader.reserved0 = 0; + pseudoheader.reserved1 = 0; + pseudoheader.reserved2 = 0; + pseudoheader.protocol = 6; // TCP + pseudoheader.tcplen = (uint32_t) htonl(ip6hdr_len); + + total = 0; + for (current = (uint16_t*) &pseudoheader; + current < (uint16_t*) (&pseudoheader + 1); current++) { + total += (uint32_t) *current; + } + + starthalf = 0; + for (; *snips != NULL; snips++) { + current = (uint16_t*) (*snips)->data; + currlen = (uint32_t) (*snips)->size; + if (starthalf && currlen > 0) { + total += ((uint32_t) *((uint8_t*) current)) << 8; + current = (uint16_t*) (((uint8_t*) current) + 1); + currlen -= 1; + } + if (currlen & 0x1u) { + // This iovec does not end on a half-word boundary + end = (uint16_t*) (((uint8_t*) current) + currlen - 1); + total += *((uint8_t*) end); + starthalf = 1; + } else { + // This iovec ends on a half-word boundary + end = (uint16_t*) (((uint8_t*) current) + currlen); + starthalf = 0; + } + while (current != end) { + // read the memory byte by byte, in case iovec isn't word-aligned + total += deref_safe(current++); + } + } + + while (total >> 16) { + total = (total & 0xFFFF) + (total >> 16); + } + + return ~((uint16_t) total); +} + +uint16_t get_tcp_checksum(const gnrc_pktsnip_t *ip6snip, const gnrc_pktsnip_t** snips) +{ + struct ip6_hdr* ip6 = ip6snip->data; + return _calc_checksum(&ip6->ip6_src, &ip6->ip6_dst, + (uint32_t) htons(ip6->ip6_plen), snips); +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd.c b/sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd.c new file mode 100644 index 000000000000..2511ea47c94a --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd.c @@ -0,0 +1,653 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup net_gnrc_tcp_freebsd + * @{ + * + * @file + * @brief TCP interface to the GNRC + * + * @author Sam Kumar + * + * Based partially on sys/net/gnrc/transport_layer/udp/gnrc_udp.c. + * @} + */ + +#include +#include +#include +#include + +#include "gnrc_sock_internal.h" +#include "gnrc_tcp_freebsd_internal.h" + +#include "msg.h" +#include "thread.h" +#include "net/gnrc/pkt.h" +#include "net/gnrc/tcp_freebsd.h" + +#include "bsdtcp/tcp.h" +#include "bsdtcp/tcp_fsm.h" +#include "bsdtcp/tcp_var.h" + +#include "task_sched.h" +#include "xtimer.h" + +#include "mutex.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +#define SUCCESS 0 + /** + * @brief Save the TCP thread IDs for later reference (just like the UDP + * implementation) + */ +static kernel_pid_t _packet_pid = KERNEL_PID_UNDEF; +static kernel_pid_t _timer_pid = KERNEL_PID_UNDEF; + +static mutex_t tcp_lock = MUTEX_INIT; + +/** + * @brief Statically allocated pools of active and passive TCP sockets + */ +struct tcpcb tcbs[GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS]; +struct tcpcb_listen tcbls[GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS]; + +/** + * @brief Timers used for TCP. Each active socket requires four timers. + */ +static struct task_sched tcp_timer_sched; +struct task tcp_timers[GNRC_TCP_FREEBSD_NUM_TIMERS]; + +/** + * @brief Allocate memory for the TCP thread's stack + */ +#if ENABLE_DEBUG +static char _packet_stack[GNRC_TCP_FREEBSD_STACK_SIZE + THREAD_EXTRA_STACKSIZE_PRINTF]; +static char _timer_stack[GNRC_TCP_FREEBSD_STACK_SIZE + THREAD_EXTRA_STACKSIZE_PRINTF]; +#else +static char _packet_stack[GNRC_TCP_FREEBSD_STACK_SIZE]; +static char _timer_stack[GNRC_TCP_FREEBSD_STACK_SIZE]; +#endif + +static void _handle_timer(int timer_id) +{ + struct tcpcb* tp; + DEBUG("Timer %d fired!\n", timer_id); + assert((timer_id >> 2) < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + + tp = &tcbs[timer_id >> 2]; + timer_id &= 0x3; + + mutex_lock(&tcp_lock); + + switch (timer_id) { + case TOS_DELACK: + DEBUG("Delayed ACK\n"); + tcp_timer_delack(tp); + break; + case TOS_REXMT: // Also include persist case + if (tcp_timer_active(tp, TT_REXMT)) { + DEBUG("Retransmit\n"); + tcp_timer_rexmt(tp); + } else { + DEBUG("Persist\n"); + tcp_timer_persist(tp); + } + break; + case TOS_KEEP: + DEBUG("Keep\n"); + tcp_timer_keep(tp); + break; + case TOS_2MSL: + DEBUG("2MSL\n"); + tcp_timer_2msl(tp); + break; + } + + mutex_unlock(&tcp_lock); +} + +/** + * @brief Passes signals to the user of this module. + */ +void handle_signals(struct tcpcb* tp, uint8_t signals, uint32_t freedentries) +{ + struct sockaddr_in6 addrport; + + if (signals & SIG_CONN_ESTABLISHED && !tpispassiveopen(tp)) { + addrport.sin6_port = tp->fport; + memcpy(&addrport.sin6_addr, &tp->faddr, sizeof(addrport.sin6_addr)); + + event_connectDone((uint8_t) tp->index, &addrport); + } + + if (signals & SIG_RECVBUF_NOTEMPTY) { + event_receiveReady((uint8_t) tp->index, 0); + } + + if (signals & SIG_RCVD_FIN) { + event_receiveReady((uint8_t) tp->index, 1); + } + + if (freedentries > 0) { + event_sendDone((uint8_t) tp->index, freedentries); + } +} + +void _fill_acceptArgs_from_tcpcb(acceptArgs_t* args, struct tcpcb* tcb) { + args->asockid = tcb->index; + args->recvbuf = tcb->recvbuf.buf; + args->recvbuflen = tcb->recvbuf.size; + args->reassbmp = tcb->reassbmp; +} + +/** + * Called when an active socket loses a connection. + */ +void connection_lost(struct tcpcb* tcb, uint8_t errnum) +{ + acceptArgs_t args; + _fill_acceptArgs_from_tcpcb(&args, tcb); + mutex_unlock(&tcp_lock); + event_connectionLost(&args, errnum); + mutex_lock(&tcp_lock); +} + +/** + * Called when a passive socket is about to accept a connection, + * and needs an active socket to accept into. + */ +struct tcpcb* accept_ready(struct tcpcb_listen* tpl) +{ + acceptArgs_t args; + mutex_unlock(&tcp_lock); + args = event_acceptReady((uint8_t) tpl->index); + mutex_lock(&tcp_lock); + if (args.asockid == -1) { + return NULL; + } + assert(args.asockid >= 0 && args.asockid < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + struct tcpcb* asock = &tcbs[args.asockid]; + initialize_tcb(asock, asock->lport, args.recvbuf, args.recvbuflen, args.reassbmp); + return asock; +} + +/** + * Called when a passive socket accepts a connection. + */ +bool accepted_connection(struct tcpcb_listen* tpl, struct tcpcb* accepted, struct in6_addr* addr, uint16_t port) +{ + bool accepted_successfully; + struct sockaddr_in6 addrport; + acceptArgs_t acceptedArgs; + _fill_acceptArgs_from_tcpcb(&acceptedArgs, accepted); + mutex_unlock(&tcp_lock); + addrport.sin6_port = port; + memcpy(&addrport.sin6_addr, addr, sizeof(struct in6_addr)); + accepted_successfully = event_acceptDone((uint8_t) tpl->index, &addrport, &acceptedArgs); + mutex_lock(&tcp_lock); + + return accepted_successfully; +} + +/** + * @brief Called when a TCP segment is received and passed up from the IPv6 + * layer. + */ +static void _receive(gnrc_pktsnip_t* pkt) +{ + gnrc_pktsnip_t* tcp; + gnrc_pktsnip_t* ipv6; + struct tcphdr* th; + struct tcpcb* tcb; + struct tcpcb_listen* tcbl; + struct ip6_hdr* iph; + + int i; + uint16_t sport; + uint16_t dport; + uint16_t packet_len; + + uint16_t empirical_len; + gnrc_pktsnip_t* temp; + + /* Bitmask of signals that need to be sent to the user of this module. */ + uint8_t signals = 0; + + /* Number of lbuf entries that the user of this module can free. */ + uint32_t freedentries = 0; + + tcp = gnrc_pktbuf_start_write(pkt); + if (tcp == NULL) { + DEBUG("tcp_freebsd: unable to get write access to packet\n"); + goto done; + } + pkt = tcp; + + ipv6 = gnrc_pktsnip_search_type(pkt, GNRC_NETTYPE_IPV6); + assert(ipv6 != NULL); + + iph = (struct ip6_hdr*) ipv6->data; + + /* I'm actually not going to mark the TCP section. The tcp_input function + * is written to consider both the TCP section and the payload together, + * and specifically making it would just take up extra memory for the new + * pktsnip. + */ +#if 0 + if ((pkt->next != NULL) && (pkt->next->type == GNRC_NETTYPE_TCP)) { + /* Someone already marked the TCP header, so we can just use it. */ + tcp = pkt->next; + } else if (pkt->size >= sizeof(struct tcphdr)) { + /* The TCP header may include options, and therefore may have variable + * length. So we need to actually parse it first, in order to correctly + * mark it... + */ + th = (struct tcphdr*) pkt->data; + if (th->th_off < 5 || th->th_off > 15) { + goto error; + } + + /* This is the size of the TCP header, in bytes. */ + size_t hdrlen = ((size_t) th->th_off) << 2; + + tcp = gnrc_pktbuf_mark(pkt, hdrlen, GNRC_NETTYPE_TCP); + if (tcp == NULL) { + DEBUG("tcp_freebsd: error marking TCP header, dropping packet\n"); + goto error; + } + } else { + goto error; + } + + /* Mark payload as type UNDEF. */ + pkt->type = GNRC_NETTYPE_UNDEF; +#endif + + /* + * If someone is splitting this for us, that's a problem, since the TCP + * header and the payload need to be contiguous, in one big snip. So I'm + * asserting that the topmost identified layer in the packet is indeed + * labelled as a TCP header. + */ + assert(tcp->type == GNRC_NETTYPE_TCP); + + th = (struct tcphdr*) tcp->data; + + packet_len = htons(iph->ip6_plen); + empirical_len = 0; + for (temp = tcp; temp != ipv6; temp = temp->next) { + DEBUG("Size is %" PRIu16 "\n", temp->size); + empirical_len += temp->size; + } + + if (packet_len != empirical_len) { + DEBUG("Sizes don't add up: packet length is %" PRIu16 ", but got %" PRIu16 "\n", packet_len, empirical_len); + goto done; + } + if (th->th_off < 5 || th->th_off > 15 || (((size_t) th->th_off) << 2) > tcp->size) { + DEBUG("Too many options: header claims %" PRIu8 " words (pktsnip has %u bytes)\n", th->th_off, (unsigned int) tcp->size); + } + + const gnrc_pktsnip_t* snips[2] = { tcp, NULL }; + uint16_t csum = get_tcp_checksum(ipv6, snips); + if (csum != 0) { + DEBUG("Dropping packet: bad checksum (%" PRIu16 ")\n", csum); + goto done; + } + + sport = th->th_sport; // network byte order + dport = th->th_dport; // network byte order + tcp_fields_to_host(th); + + /* Actually do the work. */ + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; i++) { + tcb = &tcbs[i]; + if (tcb->t_state != TCP6S_CLOSED && dport == tcb->lport + && sport == tcb->fport + && !memcmp(&iph->ip6_src, &tcb->faddr, sizeof(iph->ip6_src))) { + int rv; + DEBUG("Matches active socket %d\n", i); + mutex_lock(&tcp_lock); + rv = tcp_input(iph, th, &tcbs[i], NULL, &signals, &freedentries); + mutex_unlock(&tcp_lock); + if (RELOOKUP_REQUIRED == rv) { + break; + } else { + handle_signals(&tcbs[i], signals, freedentries); + } + goto done; + } + } + + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS; i++) { + tcbl = &tcbls[i]; + if (tcbl->t_state == TCP6S_LISTEN && dport == tcbl->lport) { + DEBUG("Matches passive socket %d\n", i); + mutex_lock(&tcp_lock); + tcp_input(iph, th, NULL, &tcbls[i], NULL, NULL); + mutex_unlock(&tcp_lock); + goto done; + } + } + + DEBUG("Does not match any socket\n"); + tcp_dropwithreset(iph, th, NULL, tcp->size - (th->th_off << 2), ECONNREFUSED); + +done: + gnrc_pktbuf_release(pkt); + return; +} + +/** + * @brief Event loop for received TCP segments. + */ +static void* _packet_loop(void* arg) +{ + (void) arg; + msg_t msg; + msg_t setget_reply; + msg_t msg_queue[GNRC_TCP_FREEBSD_MSG_QUEUE_SIZE]; + + /* _packet_pid may not be assigned, if the scheduler switches to this thread + * before thread_create returns, or after thread_create returns but before + * the return value is stored. + */ + _packet_pid = thread_getpid(); + + gnrc_netreg_entry_t netreg = GNRC_NETREG_ENTRY_INIT_PID(GNRC_NETREG_DEMUX_CTX_ALL, + _packet_pid); + + setget_reply.type = GNRC_NETAPI_MSG_TYPE_ACK; + setget_reply.content.value = (uint32_t) -ENOTSUP; + + msg_init_queue(msg_queue, GNRC_TCP_FREEBSD_MSG_QUEUE_SIZE); + + if (gnrc_netreg_register(GNRC_NETTYPE_TCP, &netreg)) { + DEBUG("Error listening for packets\n"); + } + + for (;;) { + msg_receive(&msg); + switch (msg.type) { + case GNRC_NETAPI_MSG_TYPE_RCV: + DEBUG("tcp_freebsd: got RCV message: %p\n", msg.content.ptr); + _receive(msg.content.ptr); + break; + case GNRC_NETAPI_MSG_TYPE_SND: + /* Not sure what kind of protocol is going to pass a packet + * down to TCP, since the whole point of TCP is that protocols + * on top of it deal with _streams_ rather than _packets_. + */ + DEBUG("tcp_freebsd: got SND message: %p\n", msg.content.ptr); + break; + case GNRC_NETAPI_MSG_TYPE_SET: + case GNRC_NETAPI_MSG_TYPE_GET: + msg_reply(&msg, &setget_reply); + case GNRC_NETAPI_MSG_TYPE_ACK: + DEBUG("tcp_freebsd: received SET, GET, or ACK\n"); + break; + default: + DEBUG("tcp_freebsd: received unidentified message\n"); + break; + } + } + + /* not reached */ + return NULL; +} + +int gnrc_tcp_freebsd_init(void) +{ + int i; + + gnrc_tcp_freebsd_allocator_init(); + + if (_packet_pid == KERNEL_PID_UNDEF) { + _packet_pid = thread_create(_packet_stack, sizeof(_packet_stack), + GNRC_TCP_FREEBSD_PRIO, THREAD_CREATE_STACKTEST, + _packet_loop, NULL, "tcp_freebsd"); + tcp_timer_sched.coalesce_shift = 64; + tcp_timer_sched.max_coalesce_time_delta = 0; // no coalescence for now + tcp_timer_sched.tasks = tcp_timers; + tcp_timer_sched.num_tasks = GNRC_TCP_FREEBSD_NUM_TIMERS; + tcp_timer_sched.thread_stack = _timer_stack; + tcp_timer_sched.thread_stack_size = sizeof(_timer_stack); + tcp_timer_sched.thread_priority = GNRC_TCP_FREEBSD_PRIO; + tcp_timer_sched.thread_name = "tcp_freebsd timers"; + tcp_timer_sched.task_handler = _handle_timer; + _timer_pid = start_task_sched(&tcp_timer_sched); + + /* Additional initialization work for TCP. */ + tcp_init(); + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; i++) { + tcbs[i].index = i; + initialize_tcb(&tcbs[i], 0, NULL, 0, NULL); + } + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS; i++) { + tcbls[i].t_state = TCPS_CLOSED; + tcbls[i].index = i; + tcbls[i].lport = 0; + } + } + return _packet_pid; +} + +/* A helper function. PORT is in network byte order. */ +bool gnrc_tcp_freebsd_portisfree(uint16_t port) +{ + int i; + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; i++) { + if (tcbs[i].lport == port) { + return false; + } + } + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS; i++) { + if (tcbls[i].lport == port) { + return false; + } + } + return true; +} + +/* The external API. */ + +int psock_getID_impl(int psockid) +{ + return tcbls[psockid].index; +} + +int asock_getID_impl(int asockid) +{ + return tcbs[asockid].index; +} + +int asock_getState_impl(int asockid) +{ + return tcbs[asockid].t_state; +} + +void asock_getPeerInfo_impl(int asockid, struct in6_addr** addr, uint16_t** port) +{ + mutex_lock(&tcp_lock); + *addr = &tcbs[asockid].faddr; + *port = &tcbs[asockid].fport; + mutex_unlock(&tcp_lock); +} + +error_t asock_bind_impl(int asockid, uint16_t port) +{ + error_t rv; + uint16_t oldport; + mutex_lock(&tcp_lock); + oldport = tcbs[asockid].lport; + port = htons(port); + tcbs[asockid].lport = 0; + if (port == 0 || gnrc_tcp_freebsd_portisfree(port)) { + tcbs[asockid].lport = port; + rv = SUCCESS; + goto done; + } + tcbs[asockid].lport = oldport; + rv = EADDRINUSE; +done: + mutex_unlock(&tcp_lock); + return rv; +} + +error_t psock_bind_impl(int psockid, uint16_t port) +{ + error_t rv; + uint16_t oldport; + mutex_lock(&tcp_lock); + oldport = tcbls[psockid].lport; + port = htons(port); + tcbls[psockid].lport = 0; + if (port == 0 || gnrc_tcp_freebsd_portisfree(port)) { + tcbls[psockid].lport = port; + rv = SUCCESS; + goto done; + } + tcbls[psockid].lport = oldport; + rv = EADDRINUSE; +done: + mutex_unlock(&tcp_lock); + return rv; +} + +error_t psock_listen_impl(int psockid) +{ + mutex_lock(&tcp_lock); + tcbls[psockid].t_state = TCPS_LISTEN; + mutex_unlock(&tcp_lock); + return SUCCESS; +} + +error_t asock_connect_impl(int asockid, struct sockaddr_in6* addr, uint8_t* recvbuf, size_t recvbuflen, uint8_t* reassbmp) +{ + error_t rv; + struct tcpcb* tp = &tcbs[asockid]; + mutex_lock(&tcp_lock); + if (tp->t_state != TCPS_CLOSED) { // This is a check that I added + rv = EISCONN; + goto done; + } + initialize_tcb(tp, tp->lport, recvbuf, recvbuflen, reassbmp); + rv = (error_t) tcp6_usr_connect(tp, addr); + +done: + mutex_unlock(&tcp_lock); + return rv; +} + +error_t asock_send_impl(int asockid, struct lbufent* data, int moretocome, int* status) +{ + error_t rv; + struct tcpcb* tp = &tcbs[asockid]; + mutex_lock(&tcp_lock); + rv = (error_t) tcp_usr_send(tp, moretocome, data, status); + mutex_unlock(&tcp_lock); + return rv; +} + +error_t asock_receive_impl(int asockid, uint8_t* buffer, uint32_t len, size_t* bytessent) +{ + error_t rv; + struct tcpcb* tp = &tcbs[asockid]; + mutex_lock(&tcp_lock); + *bytessent = cbuf_read(&tp->recvbuf, buffer, len, 1); + rv = (error_t) tcp_usr_rcvd(tp); + mutex_unlock(&tcp_lock); + return rv; +} + +error_t asock_shutdown_impl(int asockid, bool shut_rd, bool shut_wr) +{ + int error = SUCCESS; + mutex_lock(&tcp_lock); + if (shut_rd) { + cbuf_pop(&tcbs[asockid].recvbuf, cbuf_used_space(&tcbs[asockid].recvbuf)); // remove all data from the cbuf + // TODO We need to deal with bytes received out-of-order + // Our strategy is to "pretend" that we got those extra bytes and ACK them. + tpcantrcvmore(&tcbs[asockid]); + } + if (shut_wr) { + error = tcp_usr_shutdown(&tcbs[asockid]); + } + mutex_unlock(&tcp_lock); + return error; +} + +error_t psock_close_impl(int psockid) +{ + mutex_lock(&tcp_lock); + tcbls[psockid].t_state = TCP6S_CLOSED; + mutex_unlock(&tcp_lock); + return SUCCESS; +} + +error_t asock_abort_impl(int asockid) +{ + mutex_lock(&tcp_lock); + tcp_usr_abort(&tcbs[asockid]); + mutex_unlock(&tcp_lock); + return SUCCESS; +} + +/* The internal API. */ + +void send_message(gnrc_pktsnip_t* pkt) +{ + DEBUG("Sending TCP message: %d, payload_size = %d\n", pkt->type, pkt->next->next == NULL ? 0 : pkt->next->next->size); + if (!gnrc_netapi_dispatch_send(pkt->type, GNRC_NETREG_DEMUX_CTX_ALL, pkt)) { + DEBUG("tcp: cannot send packet: network layer not found\n"); + gnrc_pktbuf_release(pkt); + } +} + +uint32_t get_millis(void) +{ + uint64_t micros = xtimer_now_usec64(); + return micros / 1000; +} + +uint32_t get_ticks(void) +{ + return get_millis(); +} + + +/* + * The lock ordering for the timing code is that the TCP lock is always + * acquired first, and then the timer lock. + */ +void set_timer(struct tcpcb* tcb, uint8_t timer_id, uint32_t delay) +{ + int task_id = (((int) tcb->index) << 2) | (int) timer_id; + int64_t delay_micros = MICROS_PER_TICK * (int64_t) delay; + + DEBUG("Setting timer %d: %d\n", task_id, (int) (delay_micros / 1000)); + + if (sched_task(&tcp_timer_sched, task_id, delay_micros) != 0) { + DEBUG("sched_task failed!\n"); + } +} + +void stop_timer(struct tcpcb* tcb, uint8_t timer_id) +{ + int task_id = (((int) tcb->index) << 2) | (int) timer_id; + + DEBUG("Stopping timer %d\n", task_id); + + if (cancel_task(&tcp_timer_sched, task_id) != 0) { + DEBUG("cancel_task failed!\n"); + } +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd_internal.h b/sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd_internal.h new file mode 100644 index 000000000000..97341325fb86 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/gnrc_tcp_freebsd_internal.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup net_gnrc_tcp_freebsd + * @{ + * + * @file + * @brief Internal API to the TCP frontend for GNRC + * + * @author Sam Kumar + * + * This file describes the API that the TCP frontend presents to the TCP + * protocol logic. The protocol logic interacts with other parts of the + * kernel (GNRC, xtimer, etc.) via this API. It also describes the API that + * the TCP frontend presents to the interface to the GNRC. + * @} + */ + +#ifndef GNRC_TCP_FREEBSD_INTERNAL_H_ +#define GNRC_TCP_FREEBSD_INTERNAL_H_ + +#include +#include +#include "bsdtcp/ip6.h" +#include "bsdtcp/tcp.h" +#include "bsdtcp/tcp_fsm.h" +#include "bsdtcp/tcp_timer.h" +#include "bsdtcp/tcp_var.h" +#include "net/gnrc/pkt.h" +#include "net/tcp_freebsd.h" + +#define GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS 1 +#define GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS 1 + +#define TIMERS_PER_ACTIVE_SOCKET 4 + +/* Possible return value from tcp_input. */ +#define RELOOKUP_REQUIRED -1 + +#define IANA_TCP PROTNUM_TCP + +#define hz 1000 // number of ticks per second +#define MICROS_PER_TICK 1000 // number of microseconds per tick + +#define FRAMES_PER_SEG 5 +#define FRAMECAP_6LOWPAN (124 - 23 - 5) // Fragmentation limit: maximum frame size of the IP and TCP headers + +#ifdef MODULE_GNRC_SIXLOWPAN_IPHC +#define IP6HDR_SIZE (2 + 1 + 1 + 16 + 8) // IPHC header (2) + Next header (1) + Hop count (1) + Dest. addr (16) + Src. addr (8) +#else +#define IP6HDR_SIZE 40 +#endif + +#define SIG_CONN_ESTABLISHED 0x01 +#define SIG_RECVBUF_NOTEMPTY 0x02 +#define SIG_RCVD_FIN 0x04 + +#define GNRC_TCP_FREEBSD_NUM_TIMERS (GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS * TIMERS_PER_ACTIVE_SOCKET) + +#define CONN_LOST_NORMAL 0 // errno of 0 means that the connection closed gracefully + +struct ip6_packet { + // Dummy for now + struct ip6_hdr ip6_hdr; + struct ip_iovec* ip6_data; +}; + +/* + * Functions that the TCP protocol logic can call to interact with the rest of + * the kernel. + */ +void send_message(gnrc_pktsnip_t* pkt); +uint32_t get_ticks(void); +uint32_t get_millis(void); +void set_timer(struct tcpcb* tcb, uint8_t timer_id, uint32_t delay); +void stop_timer(struct tcpcb* tcb, uint8_t timer_id); +struct tcpcb* accept_ready(struct tcpcb_listen* tpl); +bool accepted_connection(struct tcpcb_listen* tpl, struct tcpcb* accepted, struct in6_addr* addr, uint16_t port); +void connection_lost(struct tcpcb* tcb, uint8_t errnum); +uint16_t get_tcp_checksum(const gnrc_pktsnip_t *ip6snip, const gnrc_pktsnip_t** snips); + +/* + * Functions that the TCP API code can call to interact with the rest of the + * TCP stack. + */ +int psock_getID_impl(int psockid); +int asock_getID_impl(int asockid); +int asock_getState_impl(int asockid); +void asock_getPeerInfo_impl(int asockid, struct in6_addr** addr, uint16_t** port); +error_t asock_bind_impl(int asockid, uint16_t port); +error_t psock_bind_impl(int psockid, uint16_t port); +error_t psock_listen_impl(int psockid); +error_t asock_connect_impl(int asockid, struct sockaddr_in6* addr, uint8_t* recvbuf, size_t recvbuflen, uint8_t* reassbmp); +error_t asock_send_impl(int asockid, struct lbufent* data, int moretocome, int* status); +error_t asock_receive_impl(int asockid, uint8_t* buffer, uint32_t len, size_t* bytessent); +error_t asock_shutdown_impl(int asockid, bool shut_rd, bool shut_wr); +error_t psock_close_impl(int psockid); +error_t asock_abort_impl(int asockid); + +/* + * Functions that allow the TCP protocol logic to inform the user of TCP-related + * events. + */ +void gnrc_tcp_freebsd_allocator_init(void); +acceptArgs_t event_acceptReady(uint8_t pi); +bool event_acceptDone(uint8_t pi, struct sockaddr_in6* addr, acceptArgs_t* accepted); +void event_connectDone(uint8_t ai, struct sockaddr_in6* addr); +void event_receiveReady(uint8_t ai, int gotfin); +void event_sendDone(uint8_t ai, uint32_t numentries); +void event_connectionLost(acceptArgs_t* lost, uint8_t how); + +#endif // GNRC_TCP_FREEBSD_INTERNAL_H_ diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/Makefile b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/Makefile new file mode 100644 index 000000000000..66c25b12e38a --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/Makefile @@ -0,0 +1,3 @@ +MODULE = gnrc_tcp_freebsd_internal + +include $(RIOTBASE)/Makefile.base diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.c b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.c new file mode 100644 index 000000000000..f62b98079dfb --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.c @@ -0,0 +1,131 @@ +/* BITMAP */ + +#include +#include +#include +#include + +#include "bitmap.h" + +void bmp_init(uint8_t* buf, size_t numbytes) { + memset(buf, 0x00, numbytes); +} + +#define _bmp_getrangeinfo(buf, start, len, first_bit_id, first_byte_ptr, last_bit_id, last_byte_ptr) \ + first_bit_id = (start & 0x7); \ + first_byte_ptr = buf + (start >> 3); \ + last_bit_id = (len & 0x7) + first_bit_id; \ + last_byte_ptr = first_byte_ptr + (len >> 3) + (last_bit_id >> 3); \ + last_bit_id &= 0x7; + +/* Sets the specified range of bits. START is the index + of the first bit to be set. LEN is the number of bits + to be set. */ +void bmp_setrange(uint8_t* buf, size_t start, size_t len) { + uint8_t first_bit_id; + uint8_t* first_byte_set; + uint8_t last_bit_id; + uint8_t* last_byte_set; + uint8_t first_byte_mask, last_byte_mask; + _bmp_getrangeinfo(buf, start, len, first_bit_id, first_byte_set, + last_bit_id, last_byte_set) + + first_byte_mask = (uint8_t) (0xFF >> first_bit_id); + last_byte_mask = (uint8_t) (0xFF << (8 - last_bit_id)); + + /* Set the bits. */ + if (first_byte_set == last_byte_set) { + *first_byte_set |= (first_byte_mask & last_byte_mask); + } else { + *first_byte_set |= first_byte_mask; + memset(first_byte_set + 1, 0xFF, last_byte_set - first_byte_set - 1); + *last_byte_set |= last_byte_mask; + } +} + +/* Clears the specified range of bits. START is the index + of the first bit to be cleared. LEN is the number of bits + to be cleared. */ +void bmp_clrrange(uint8_t* buf, size_t start, size_t len) { + uint8_t first_bit_id; + uint8_t* first_byte_clear; + uint8_t last_bit_id; + uint8_t* last_byte_clear; + uint8_t first_byte_mask, last_byte_mask; + _bmp_getrangeinfo(buf, start, len, first_bit_id, first_byte_clear, + last_bit_id, last_byte_clear) + + first_byte_mask = (uint8_t) (0xFF << (8 - first_bit_id)); + last_byte_mask = (uint8_t) (0xFF >> last_bit_id); + + /* Clear the bits. */ + if (first_byte_clear == last_byte_clear) { + *first_byte_clear &= (first_byte_mask | last_byte_mask); + } else { + *first_byte_clear &= first_byte_mask; + memset(first_byte_clear + 1, 0x00, last_byte_clear - first_byte_clear - 1); + *last_byte_clear &= last_byte_mask; + } +} + +/* Counts the number of set bits in BUF starting at START. BUF has length + BUFLEN, in bytes. Counts the number of set bits until it either (1) finds + a bit that isn't set, in which case it returns the number of set bits, + (2) it has counted at least LIMIT bits, in which case it returns a number + greater than or equal to LIMIT, or (3) reaches the end of the buffer, in + which case it returns exactly the number of set bits it found. */ +size_t bmp_countset(uint8_t* buf, size_t buflen, size_t start, size_t limit) { + uint8_t first_bit_id; + uint8_t first_byte; + uint8_t ideal_first_byte; + size_t numset; + uint8_t curr_byte; + size_t curr_index = start >> 3; + first_bit_id = start & 0x7; + first_byte = *(buf + curr_index); + + numset = 8 - first_bit_id; // initialize optimistically, assuming that the first byte will have all 1's in the part we care about + ideal_first_byte = (uint8_t) (0xFF >> first_bit_id); + first_byte &= ideal_first_byte; + if (first_byte == ideal_first_byte) { + // All bits in the first byte starting at first_bit_id are set + for (curr_index = curr_index + 1; curr_index < buflen && numset < limit; curr_index++) { + curr_byte = buf[curr_index]; + if (curr_byte == (uint8_t) 0xFF) { + numset += 8; + } else { + while (curr_byte & (uint8_t) 0x80) { // we could add a numset < limit check here, but it probably isn't worth it + curr_byte <<= 1; + numset++; + } + break; + } + } + } else { + // The streak ends within the first byte + do { + first_byte >>= 1; + ideal_first_byte >>= 1; + numset--; + } while (first_byte != ideal_first_byte); + } + return numset; +} + +int bmp_isempty(uint8_t* buf, size_t buflen) { + uint8_t* bufend = buf + buflen; + while (buf < bufend) { + if (*(buf++)) { + return 0; + } + } + return 1; +} + +void bmp_print(uint8_t* buf, size_t buflen) { + size_t i; + for (i = 0; i < buflen; i++) { + printf("%02X", buf[i]); + } + printf("\n"); +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.h b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.h new file mode 100644 index 000000000000..7126db132932 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/bitmap.h @@ -0,0 +1,16 @@ +#ifndef BITMAP_H_ +#define BITMAP_H_ + +#include +#include + +#define BITS_TO_BYTES(bits) (((bits) >> 3) + (((bits) & 0x7) ? 1 : 0)) + +void bmp_init(uint8_t* buf, size_t numbytes); +void bmp_setrange(uint8_t* buf, size_t start, size_t len); +void bmp_clrrange(uint8_t* buf, size_t start, size_t len); +size_t bmp_countset(uint8_t* buf, size_t buflen, size_t start, size_t limit); +int bmp_isempty(uint8_t* buf, size_t buflen); +void bmp_print(uint8_t* buf, size_t buflen); + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.c b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.c new file mode 100644 index 000000000000..c1c666624760 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.c @@ -0,0 +1,234 @@ +/* CIRCULAR BUFFER */ +#include "cbuf.h" +#include "bitmap.h" + +#include +#include +#include + +void cbuf_init(struct cbufhead* chdr, uint8_t* buf, size_t len) { + chdr->r_index = 0; + chdr->w_index = 0; + chdr->size = len; + chdr->buf = buf; +} + +size_t cbuf_used_space(struct cbufhead* chdr) { + if (chdr->w_index >= chdr->r_index) { + return chdr->w_index - chdr->r_index; + } else { + return chdr->size + chdr->w_index - chdr->r_index; + } +} + +/* There's always one byte of lost space so I can distinguish between a full + buffer and an empty buffer. */ +size_t cbuf_free_space(struct cbufhead* chdr) { + return chdr->size - 1 - cbuf_used_space(chdr); +} + +size_t cbuf_size(struct cbufhead* chdr) { + return chdr->size - 1; +} + +size_t cbuf_write(struct cbufhead* chdr, uint8_t* data, size_t data_len) { + size_t free_space = cbuf_free_space(chdr); + uint8_t* buf_data; + size_t fw_index; + size_t bytes_to_end; + if (free_space < data_len) { + data_len = free_space; + } + buf_data = chdr->buf; + fw_index = (chdr->w_index + data_len) % chdr->size; + if (fw_index >= chdr->w_index) { + memcpy(buf_data + chdr->w_index, data, data_len); + } else { + bytes_to_end = chdr->size - chdr->w_index; + memcpy(buf_data + chdr->w_index, data, bytes_to_end); + memcpy(buf_data, data + bytes_to_end, data_len - bytes_to_end); + } + chdr->w_index = fw_index; + return data_len; +} + +void cbuf_read_unsafe(struct cbufhead* chdr, uint8_t* data, size_t numbytes, int pop) { + uint8_t* buf_data = chdr->buf; + size_t fr_index = (chdr->r_index + numbytes) % chdr->size; + size_t bytes_to_end; + if (fr_index >= chdr->r_index) { + memcpy(data, buf_data + chdr->r_index, numbytes); + } else { + bytes_to_end = chdr->size - chdr->r_index; + memcpy(data, buf_data + chdr->r_index, bytes_to_end); + memcpy(data + bytes_to_end, buf_data, numbytes - bytes_to_end); + } + if (pop) { + chdr->r_index = fr_index; + } +} + +size_t cbuf_read(struct cbufhead* chdr, uint8_t* data, size_t numbytes, int pop) { + size_t used_space = cbuf_used_space(chdr); + if (used_space < numbytes) { + numbytes = used_space; + } + cbuf_read_unsafe(chdr, data, numbytes, pop); + return numbytes; +} + +size_t cbuf_read_offset(struct cbufhead* chdr, uint8_t* data, size_t numbytes, size_t offset) { + size_t used_space = cbuf_used_space(chdr); + size_t oldpos; + if (used_space <= offset) { + return 0; + } else if (used_space < offset + numbytes) { + numbytes = used_space - offset; + } + oldpos = chdr->r_index; + chdr->r_index = (chdr->r_index + offset) % chdr->size; + cbuf_read_unsafe(chdr, data, numbytes, 0); + chdr->r_index = oldpos; + return numbytes; +} + +size_t cbuf_pop(struct cbufhead* chdr, size_t numbytes) { + size_t used_space = cbuf_used_space(chdr); + if (used_space < numbytes) { + numbytes = used_space; + } + chdr->r_index = (chdr->r_index + numbytes) % chdr->size; + return numbytes; +} + +/* Writes DATA to the unused portion of the buffer, at the position OFFSET past + the end of the buffer. BITMAP is updated by setting bits according to which + bytes now contain data. + The index of the first byte written is stored into FIRSTINDEX, if it is not + NULL. */ +size_t cbuf_reass_write(struct cbufhead* chdr, size_t offset, uint8_t* data, size_t numbytes, uint8_t* bitmap, size_t* firstindex) { + uint8_t* buf_data = chdr->buf; + size_t free_space = cbuf_free_space(chdr); + size_t start_index; + size_t end_index; + size_t bytes_to_end; + if (offset > free_space) { + return 0; + } else if (offset + numbytes > free_space) { + numbytes = free_space - offset; + } + start_index = (chdr->w_index + offset) % chdr->size; + end_index = (start_index + numbytes) % chdr->size; + if (end_index >= start_index) { + memcpy(buf_data + start_index, data, numbytes); + if (bitmap) { + bmp_setrange(bitmap, start_index, numbytes); + } + } else { + bytes_to_end = chdr->size - start_index; + memcpy(buf_data + start_index, data, bytes_to_end); + memcpy(buf_data, data + bytes_to_end, numbytes - bytes_to_end); + if (bitmap) { + bmp_setrange(bitmap, start_index, bytes_to_end); + bmp_setrange(bitmap, 0, numbytes - bytes_to_end); + } + } + if (firstindex) { + *firstindex = start_index; + } + return numbytes; +} + +/* Writes NUMBYTES bytes to the buffer. The bytes are taken from the unused + space of the buffer, and can be set using cbuf_reass_write. */ +size_t cbuf_reass_merge(struct cbufhead* chdr, size_t numbytes, uint8_t* bitmap) { + size_t old_w = chdr->w_index; + size_t free_space = cbuf_free_space(chdr); + size_t bytes_to_end; + if (numbytes > free_space) { + numbytes = free_space; + } + chdr->w_index = (chdr->w_index + numbytes) % chdr->size; + if (bitmap) { + if (chdr->w_index >= old_w) { + bmp_clrrange(bitmap, old_w, numbytes); + } else { + bytes_to_end = chdr->size - old_w; + bmp_clrrange(bitmap, old_w, bytes_to_end); + bmp_clrrange(bitmap, 0, numbytes - bytes_to_end); + } + } + return numbytes; +} + +size_t cbuf_reass_count_set(struct cbufhead* chdr, size_t offset, uint8_t* bitmap, size_t limit) { + size_t bitmap_size = BITS_TO_BYTES(chdr->size); + size_t until_end; + offset = (chdr->w_index + offset) % chdr->size; + until_end = bmp_countset(bitmap, bitmap_size, offset, limit); + if (until_end >= limit || until_end < (chdr->size - offset)) { + // If we already hit the limit, or if the streak ended before wrapping, then stop here + return until_end; + } + limit -= until_end; // effectively, this is our limit when continuing + // Continue until either the new limit or until we have scanned OFFSET bits (if we scan more than OFFSET bits, we'll wrap and scan some parts twice) + return until_end + bmp_countset(bitmap, bitmap_size, 0, limit < offset ? limit : offset); +} + +/* Returns a true value iff INDEX is the index of a byte within OFFSET bytes + past the end of the buffer. */ +int cbuf_reass_within_offset(struct cbufhead* chdr, size_t offset, size_t index) { + size_t range_start = chdr->w_index; + size_t range_end = (range_start + offset) % chdr->size; + if (range_end >= range_start) { + return index >= range_start && index < range_end; + } else { + return index < range_end || (index >= range_start && index < chdr->size); + } +} + +#if 0 // The segment functionality doesn't look like it's going to be used + +/* Reads NBYTES bytes of the first segment into BUF. If there aren't NBYTES + to read in the buffer, does nothing and returns 0. Otherwise, returns + the number of bytes read. */ +size_t cbuf_peek_segment(struct cbufhead* chdr, uint8_t* data, size_t numbytes) { + size_t used_space = cbuf_used_space(chdr); + size_t old_ridx; + if (used_space < numbytes + sizeof(size_t)) { + return 0; + } + old_ridx = chdr->r_index; + chdr->r_index = (chdr->r_index + sizeof(size_t)) % chdr->size; + _cbuf_read_unsafe(chdr, data, numbytes, 0); + chdr->r_index = old_ridx; + return numbytes; +} + + +int cbuf_write_segment(struct cbufhead* chdr, uint8_t* segment, size_t seglen) { + if (_cbuf_free_space(chdr) < seglen + sizeof(seglen)) { + return -1; + } + cbuf_write(chdr, (uint8_t*) &seglen, sizeof(seglen)); + cbuf_write(chdr, segment, seglen); + return 0; +} + +size_t cbuf_peek_segment_size(struct cbufhead* chdr) { + size_t segsize; + if (cbuf_read(chdr, (uint8_t*) &segsize, + sizeof(size_t), 0) < sizeof(size_t)) { + return 0; + } + return segsize; +} + +size_t cbuf_pop_segment(struct cbufhead* chdr, size_t segsize) { + if (!segsize) { + segsize = cbuf_peek_segment_size(chdr); + } + return cbuf_pop(chdr, segsize + sizeof(size_t)); +} + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.h b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.h new file mode 100644 index 000000000000..5232bba521ec --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/cbuf.h @@ -0,0 +1,41 @@ +#ifndef CBUF_H_ +#define CBUF_H_ + +/* CIRCULAR BUFFER + The circular buffer can be treated either as a buffer of bytes, or a buffer + of TCP segments. Don't mix and match the functions unless you know what + you're doing! */ + + #include + #include + +struct cbufhead { + size_t r_index; + size_t w_index; + size_t size; + uint8_t* buf; +}; + +void cbuf_init(struct cbufhead* chdr, uint8_t* buf, size_t len); + +size_t cbuf_write(struct cbufhead* chdr, uint8_t* data, size_t data_len); +size_t cbuf_read(struct cbufhead* chdr, uint8_t* data, size_t numbytes, int pop); +size_t cbuf_read_offset(struct cbufhead* chdr, uint8_t* data, size_t numbytes, size_t offset); +size_t cbuf_pop(struct cbufhead* chdr, size_t numbytes); +size_t cbuf_used_space(struct cbufhead* chdr); +size_t cbuf_free_space(struct cbufhead* chdr); +size_t cbuf_size(struct cbufhead* chdr); + +size_t cbuf_reass_write(struct cbufhead* chdr, size_t offset, uint8_t* data, size_t numbytes, uint8_t* bitmap, size_t* firstindex); +size_t cbuf_reass_merge(struct cbufhead* chdr, size_t numbytes, uint8_t* bitmap); +size_t cbuf_reass_count_set(struct cbufhead* chdr, size_t offset, uint8_t* bitmap, size_t limit); +int cbuf_reass_within_offset(struct cbufhead* chdr, size_t offset, size_t index); + +/* +int cbuf_write_segment(struct cbufhead* chdr, uint8_t* segment, size_t seglen); +size_t cbuf_pop_segment(struct cbufhead* chdr, size_t segsize); +size_t cbuf_peek_segment_size(struct cbufhead* chdr); +size_t cbuf_peek_segment(struct cbufhead* chdr, uint8_t* data, size_t numbytes); +*/ + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.c b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.c new file mode 100644 index 000000000000..30ed121762bf --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.c @@ -0,0 +1,90 @@ +/* LINKED BUFFER */ + +#include "lbuf.h" +#include + +void lbuf_init(struct lbufhead* buffer) { + memset(buffer, 0x00, sizeof(struct lbufhead)); +} + +struct ip_iovec* lbuf_to_iovec(struct lbufhead* buffer) { + if (buffer == NULL || buffer->head == NULL) { + return NULL; + } else { + return &buffer->head->iov; + } +} + +int lbuf_append(struct lbufhead* buffer, struct lbufent* newentry) { + struct lbufent* tail = buffer->tail; + if (tail == NULL) { + buffer->head = newentry; + buffer->tail = newentry; + buffer->length = (uint32_t) newentry->iov.iov_len; + newentry->iov.iov_next = NULL; + } else if (newentry->iov.iov_len <= (uint32_t) tail->extraspace) { + memcpy(tail->iov.iov_base + tail->iov.iov_len, + newentry->iov.iov_base, newentry->iov.iov_len); + tail->extraspace -= newentry->iov.iov_len; + buffer->length += (uint32_t) newentry->iov.iov_len; + tail->iov.iov_len += newentry->iov.iov_len; + return 2; + } else { + tail->iov.iov_next = &newentry->iov; + buffer->tail = newentry; + buffer->length += (uint32_t) newentry->iov.iov_len; + newentry->iov.iov_next = NULL; + } + return 1; +} + +uint32_t lbuf_pop(struct lbufhead* buffer, uint32_t numbytes, int* ntraversed) { + struct lbufent* curr = buffer->head; + uint32_t bytesleft = numbytes; + while (bytesleft >= curr->iov.iov_len) { + ++*ntraversed; + buffer->head = IOV_TO_LBUFENT(curr->iov.iov_next); + bytesleft -= curr->iov.iov_len; + buffer->length -= curr->iov.iov_len; + if (buffer->tail == curr) { + /* buffer->head should be NULL. */ + buffer->tail = NULL; + return numbytes - bytesleft; + } + curr = buffer->head; + } + /* Handle the last entry. */ + curr->iov.iov_base += bytesleft; + curr->iov.iov_len -= bytesleft; + buffer->length -= bytesleft; + return numbytes; +} + +int lbuf_getrange(struct lbufhead* buffer, uint32_t offset, uint32_t numbytes, + struct lbufent** first, uint32_t* firstoffset, + struct lbufent** last, uint32_t* lastextra) { + struct lbufent* curr = buffer->head; + uint32_t offsetleft = offset; + uint32_t bytesleft = numbytes; + if (buffer->length < offset + numbytes) { + return 1; // out of range + } + while (offsetleft > 0 && offsetleft >= curr->iov.iov_len) { + offsetleft -= curr->iov.iov_len; + curr = IOV_TO_LBUFENT(curr->iov.iov_next); + } + *first = curr; + *firstoffset = offsetleft; + bytesleft += offsetleft; + while (bytesleft > 0 && bytesleft > curr->iov.iov_len) { + bytesleft -= curr->iov.iov_len; + curr = IOV_TO_LBUFENT(curr->iov.iov_next); + } + *last = curr; + *lastextra = curr->iov.iov_len - bytesleft; + return 0; +} + +uint32_t lbuf_used_space(struct lbufhead* buffer) { + return buffer->length; +} diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.h b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.h new file mode 100644 index 000000000000..3c271f1aad09 --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/lib/lbuf.h @@ -0,0 +1,65 @@ +#ifndef LBUF_H_ +#define LBUF_H_ + +#include "../blip/iovec.h" + +/* LINKED BUFFER */ + +struct lbufhead { + struct lbufent* head; + struct lbufent* tail; + uint32_t length; +}; + +struct lbufent { + struct ip_iovec iov; + uint16_t extraspace; +}; + +#define IOV_TO_LBUFENT(iovec) (iovec ? ((struct lbufent*) \ + (((uint8_t*) (iovec)) \ + - offsetof(struct lbufent, iov))) : NULL) + +/* Initializes a linked buffer. */ +void lbuf_init(struct lbufhead* buffer); + +/* Returns the contents of the buffer as an iovec, or NULL if the buffer has + no head. */ +struct ip_iovec* lbuf_to_iovec(struct lbufhead* buffer); + +/* Adds the contents to NEWENTRY to the buffer. This may happen in one of + two ways: (1) a reference to NEWENTRY is maintained by the buffer, or + (2) the contents of NEWENTRY are copied into the extra space of the + last buffer. In the first case, this function returns a 1, and the user + must not reclaim the space allocated to the entry or the data until + it is no longer needed by the buffer (as is indicated with the NTRAVERSED + argument to lbuf_pop; the entries are guaranteed to be released in FIFO + order). In the second case, this function returns a 2, and the user can + immediately reclaim NEWENTRY */ +int lbuf_append(struct lbufhead* buffer, struct lbufent* newentry); + +/* Removes the first NUMBYTES bytes from the buffer, and returns the number of + bytes removed (which is fewer than NUMBYTES if there were fewer than + NUMBYTES bytes in the buffer to begin with). *NTRAVERSED is incremented once + for each entry in the buffer that is no longer referenced and can be + reclaimed. */ +uint32_t lbuf_pop(struct lbufhead* buffer, uint32_t numbytes, int* ntraversed); + +/* Given a range of indices, specified by an OFFSET from the start and a + length NUMBYTES, this function locates the range of indices inside the + A pointer to the first entry in the range is stored into FIRST, and the + number of bytes in the entry before the start of the range is stored into + FIRSTOFFSET. A pointer to the last entry in the range is stored into LAST, + and the number of bytes in that entry after the end of the range is stored + into LASTEXTRA. + Returns 0 on success and 1 on failure. On failure, FIRST, LAST, FIRSTOFFSET, + and LASTEXTRA are not set. The only failure condition is when there are not + enough bytes in the buffer to do the full traversal. */ +int lbuf_getrange(struct lbufhead* buffer, uint32_t offset, uint32_t numbytes, + struct lbufent** first, uint32_t* firstoffset, + struct lbufent** last, uint32_t* lastextra); + +/* Returns the total number of bytes stored in the buffer. */ +uint32_t lbuf_used_space(struct lbufhead* buffer); + +#endif diff --git a/sys/net/gnrc/transport_layer/tcp_freebsd/socket_allocator.c b/sys/net/gnrc/transport_layer/tcp_freebsd/socket_allocator.c new file mode 100644 index 000000000000..c5d224d3b3ba --- /dev/null +++ b/sys/net/gnrc/transport_layer/tcp_freebsd/socket_allocator.c @@ -0,0 +1,437 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup net_gnrc_tcp_freebsd + * @{ + * + * @file + * @brief TCP socket allocator for GNRC + * + * @author Sam Kumar + * + * The code that provides the "raw" API for usage of this TCP module; in other + * words, the frontend of the TCP module. + * @} + */ + +#include +#include +#include "gnrc_tcp_freebsd_internal.h" +#include "lib/lbuf.h" +#include +#include +#include + +#define ENABLE_DEBUG (0) +#include "debug.h" + +typedef struct asock { + connectDone_t connectDone; + sendDone_t sendDone; + receiveReady_t receiveReady; + connectionLost_t connectionLost; + void* context; +} active_socket_t; + +typedef struct psock { + acceptReady_t acceptReady; + acceptDone_t acceptDone; + void* context; +} passive_socket_t; + +active_socket_t activesockets[GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS]; +passive_socket_t passivesockets[GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS]; + +/* Bitmasks to keep track of which sockets are allocated. */ +uint8_t activemask[1 + ((GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS - 1) >> 3)]; +uint8_t passivemask[1 + ((GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS - 1) >> 3)]; + +void clear_activesocket(active_socket_t* asock) +{ + memset(asock, 0x00, sizeof(active_socket_t)); +} + +void clear_passivesocket(passive_socket_t* psock) +{ + memset(psock, 0x00, sizeof(passive_socket_t)); +} + +inline int _is_allocated(uint8_t* mask, int fd) +{ + return mask[fd >> 3] & (1 << (fd & 0x7)); +} + +inline void _force_alloc(uint8_t* mask, int fd) +{ + mask[fd >> 3] |= (1 << (fd & 0x7)); +} + +inline void _force_dealloc(uint8_t* mask, int fd) +{ + mask[fd >> 3] &= ~(1 << (fd & 0x7)); +} + +int alloc_fd(uint8_t* mask, int num_fds, bool (*isvalid)(int) ) { + int i; + for (i = 0; i < num_fds; i++) { + if (!_is_allocated(mask, i) && isvalid(i)) { + _force_alloc(mask, i); + return i; + } + } + return -1; +} + +bool _always_true(int pi) +{ + return true; +} + +bool _active_isclosed(int ai) +{ + return TCPS_CLOSED == asock_getState_impl(ai); +} + +bool _active_istimewait(int ai) +{ + int state = asock_getState_impl(ai); + return TCPS_TIME_WAIT == state || TCPS_CLOSED == state; +} + +int alloc_pfd(void) +{ + int pfd = alloc_fd(passivemask, GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS, _always_true); + return pfd + GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; +} + +int alloc_afd(void) +{ + int afd; + // First, try to get a socket that's closed. + afd = alloc_fd(activemask, GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS, _active_isclosed); + if (afd == -1) { + // If that failed, try to get a socket in TIME-WAIT, and end the TIME-WAIT early. + afd = alloc_fd(activemask, GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS, _active_istimewait); + if (afd != -1) { + asock_abort_impl(afd); + } + } + return afd; +} + +void dealloc_fd(uint8_t* mask, int fd) +{ + assert(_is_allocated(mask, fd)); + _force_dealloc(mask, fd); +} + +void dealloc_afd(int afd) +{ + assert(afd < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + dealloc_fd(activemask, afd); + clear_activesocket(&activesockets[afd]); +} + +void dealloc_pfd(int pfd) +{ + assert(pfd < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS); + dealloc_fd(passivemask, pfd); + clear_passivesocket(&passivesockets[pfd]); +} + +int decode_fd(int rawfd, bool* passive) { + if (rawfd < 0 || rawfd >= GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS + GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS) { + return -1; + } + if (rawfd >= GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS) { + *passive = true; + rawfd -= GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; + if (!_is_allocated(passivemask, rawfd)) { + return -1; + } + } else { + *passive = false; + if (!_is_allocated(activemask, rawfd)) { + return -1; + } + } + return rawfd; +} + +/* External API */ + +int bsdtcp_active_socket(connectDone_t cd, sendDone_t sd, receiveReady_t rr, connectionLost_t cl, void* ctx) +{ + int fd = alloc_afd(); + if (fd != -1) { + active_socket_t* asock = &activesockets[fd]; + asock->connectDone = cd; + asock->sendDone = sd; + asock->receiveReady = rr; + asock->connectionLost = cl; + asock->context = ctx; + } + return fd; +} + +int bsdtcp_passive_socket(acceptReady_t ar, acceptDone_t ad, void* ctx) +{ + int fd = alloc_pfd(); + int decoded_fd = fd - GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; + if (fd != -1) { + passive_socket_t* psock = &passivesockets[decoded_fd]; + psock->acceptReady = ar; + psock->acceptDone = ad; + psock->context = ctx; + } + return fd; +} + +int bsdtcp_set_ctx(int fd, void* newctx) { + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1) { + return EBADF; + } + + if (passive) { + passive_socket_t* psock = &passivesockets[fd]; + psock->context = newctx; + } else { + active_socket_t* asock = &activesockets[fd]; + asock->context = newctx; + } + + return 0; +} + +int bsdtcp_bind(int fd, uint16_t port) +{ + int rv; + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1) { + return EBADF; + } + if (passive) { + rv = psock_bind_impl(fd, port); + DEBUG("Bound passive socket to port %" PRIu16 "\n", port); + } else { + rv = asock_bind_impl(fd, port); + DEBUG("Bound active socket to port %" PRIu16 "\n", port); + } + return rv; +} + +int bsdtcp_connect(int fd, struct sockaddr_in6* faddrport, uint8_t* recvbuf, size_t recvbuflen, uint8_t* reassbmp) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + return asock_connect_impl(fd, faddrport, recvbuf, recvbuflen, reassbmp); +} + +int bsdtcp_listen(int fd) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || !passive) { + return EBADF; + } + return psock_listen_impl(fd); +} + +int bsdtcp_send(int fd, struct lbufent* data, int* status) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + return asock_send_impl(fd, data, 0, status); +} + +int bsdtcp_receive(int fd, uint8_t* buffer, size_t length, size_t* numbytes) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + return asock_receive_impl(fd, buffer, length, numbytes); +} + +int bsdtcp_shutdown(int fd, int how) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + return asock_shutdown_impl(fd, how == SHUT_RD || how == SHUT_RDWR, + how == SHUT_WR || how == SHUT_RDWR); +} + +int bsdtcp_close(int fd) +{ + bool passive; + int rv; + fd = decode_fd(fd, &passive); + if (fd == -1) { + return EBADF; + } + if (passive) { + rv = psock_close_impl(fd); + dealloc_pfd(fd); + } else { + rv = asock_shutdown_impl(fd, true, true); + dealloc_afd(fd); + } + return rv; +} + +int bsdtcp_abort(int fd) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1) { + return EBADF; + } + if (passive) { + return psock_close_impl(fd); + } else { + return asock_abort_impl(fd); + } +} + +int bsdtcp_isestablished(int fd) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + return TCPS_HAVEESTABLISHED(asock_getState_impl(fd)); +} + +int bsdtcp_hasrcvdfin(int fd) +{ + bool passive; + int state; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + state = asock_getState_impl(fd); + return state == TCPS_TIME_WAIT || state == TCPS_CLOSE_WAIT || + state == TCPS_LAST_ACK || state == TCPS_CLOSING; +} + +int bsdtcp_peerinfo(int fd, struct in6_addr** addrptr, uint16_t** portptr) +{ + bool passive; + fd = decode_fd(fd, &passive); + if (fd == -1 || passive) { + return EBADF; + } + asock_getPeerInfo_impl(fd, addrptr, portptr); + return 0; +} + +/* API to the TCP frontend. */ +acceptArgs_t event_acceptReady(uint8_t pi) +{ + assert(pi >= 0 && pi < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS); + assert(_is_allocated(passivemask, pi)); + + passive_socket_t* psock = &passivesockets[pi]; + + if (psock->acceptReady != NULL) { + return psock->acceptReady(pi, psock->context); + } else { + acceptArgs_t args = { -1, NULL, 0, NULL }; + return args; + } +} +bool event_acceptDone(uint8_t pi, struct sockaddr_in6* addr, acceptArgs_t* accepted) +{ + assert(pi >= 0 && pi < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS); + assert(_is_allocated(passivemask, pi)); + + passive_socket_t* psock = &passivesockets[pi]; + + if (psock->acceptDone != NULL) { + return psock->acceptDone(pi, addr, accepted, psock->context); + } + + return false; +} + +void event_connectDone(uint8_t ai, struct sockaddr_in6* addr) +{ + assert(ai >= 0 && ai < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + // Doesn't need to be allocated + + active_socket_t* asock = &activesockets[ai]; + + if (asock->connectDone != NULL) { + asock->connectDone(ai, addr, asock->context); + } +} + +void event_receiveReady(uint8_t ai, int gotfin) +{ + assert(ai >= 0 && ai < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + // Doesn't need to be allocated + + active_socket_t* asock = &activesockets[ai]; + + if (asock->receiveReady != NULL) { + asock->receiveReady(ai, gotfin, asock->context); + } +} + +void event_sendDone(uint8_t ai, uint32_t numentries) +{ + assert(ai >= 0 && ai < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + // Doesn't need to be allocated + + active_socket_t* asock = &activesockets[ai]; + + if (asock->sendDone != NULL) { + asock->sendDone(ai, numentries, asock->context); + } +} + +void event_connectionLost(acceptArgs_t* lost, uint8_t how) +{ + int ai = lost->asockid; + assert(ai >= 0 && ai < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS); + // Doesn't need t be allocated + + active_socket_t* asock = &activesockets[ai]; + + if (asock->connectionLost != NULL) { + asock->connectionLost(lost, how, asock->context); + } +} + +void gnrc_tcp_freebsd_allocator_init(void) { + int i; + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_ACTIVE_SOCKETS; i++) { + clear_activesocket(&activesockets[i]); + } + for (i = 0; i < GNRC_TCP_FREEBSD_NUM_PASSIVE_SOCKETS; i++) { + clear_passivesocket(&passivesockets[i]); + } + memset(activemask, 0x00, sizeof(activemask)); + memset(passivemask, 0x00, sizeof(passivemask)); +} diff --git a/sys/pm_layered/pm.c b/sys/pm_layered/pm.c index 29399e1f0739..ca626f439941 100644 --- a/sys/pm_layered/pm.c +++ b/sys/pm_layered/pm.c @@ -80,7 +80,7 @@ void pm_block(unsigned mode) void pm_unblock(unsigned mode) { - assert(pm_blocker.val_u8[mode] > 0); + //assert(pm_blocker.val_u8[mode] > 0); unsigned state = irq_disable(); pm_blocker.val_u8[mode]--; diff --git a/sys/posix/include/sys/socket.h b/sys/posix/include/sys/socket.h index 1bd9ea8009b9..2907c1f92cca 100644 --- a/sys/posix/include/sys/socket.h +++ b/sys/posix/include/sys/socket.h @@ -117,6 +117,16 @@ extern "C" { #define SO_TYPE (15) /**< Socket type. */ /** @} */ +/** + * @name Socket shutdown options + * @brief Parameter to shutdown() + * @{ + */ +#define SHUT_RD (0) +#define SHUT_WR (1) +#define SHUT_RDWR (2) +/** @} */ + typedef unsigned short sa_family_t; /**< address family type */ /** diff --git a/sys/posix/sockets/posix_sockets.c b/sys/posix/sockets/posix_sockets.c index af85174c846a..97b0d779260e 100644 --- a/sys/posix/sockets/posix_sockets.c +++ b/sys/posix/sockets/posix_sockets.c @@ -35,6 +35,7 @@ #include "net/sock/ip.h" #include "net/sock/udp.h" #include "net/sock/tcp.h" +#include "net/sock/tcp_freebsd.h" /* enough to create sockets both with socket() and accept() */ #define _ACTUAL_SOCKET_POOL_SIZE (SOCKET_POOL_SIZE + \ @@ -61,6 +62,9 @@ typedef union { #ifdef MODULE_SOCK_UDP sock_udp_t udp; /**< UDP sock */ #endif /* MODULE_SOCK_UDP */ +#ifdef MODULE_SOCK_TCP_FREEBSD + sock_tcp_freebsd_t tcp_freebsd; +#endif } socket_sock_t; typedef struct { @@ -139,7 +143,7 @@ static int _get_sock_idx(socket_sock_t *sock) static inline int _choose_ipproto(int type, int protocol) { switch (type) { -#ifdef MODULE_SOCK_TCP +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) case SOCK_STREAM: if ((protocol == 0) || (protocol == IPPROTO_TCP)) { return protocol; @@ -272,6 +276,11 @@ static int socket_close(vfs_file_t *filp) sock_tcp_stop_listen(&s->sock->tcp.queue); } break; +#endif +#ifdef MODULE_SOCK_TCP_FREEBSD + case SOCK_STREAM: + sock_tcp_freebsd_close(&s->sock->tcp_freebsd); + break; #endif default: errno = EOPNOTSUPP; @@ -386,8 +395,14 @@ int socket(int domain, int type, int protocol) int accept(int socket, struct sockaddr *restrict address, socklen_t *restrict address_len) { +#ifdef MODULE_SOCK_TCP_FREEBSD + sock_tcp_freebsd_t* sock = NULL; +#endif #ifdef MODULE_SOCK_TCP sock_tcp_t *sock = NULL; +#endif + +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) socket_t *s, *new_s = NULL; int res = 0; @@ -418,6 +433,29 @@ int accept(int socket, struct sockaddr *restrict address, res = -1; break; } + new_s->sock = _get_free_sock(); + if (new_s->sock == NULL) { + new_s->domain = AF_UNSPEC; + errno = ENOMEM; + res = -1; + break; + } + +#ifdef MODULE_SOCK_TCP_FREEBSD + if (s->domain != AF_INET6) { + errno = EPROTO; + res = -1; + break; + } + sock = (sock_tcp_freebsd_t*) &new_s->sock->tcp_freebsd; + (void) recv_timeout; + if ((res = sock_tcp_freebsd_accept(&s->sock->tcp_freebsd, sock)) < 0) { + + errno = -res; + res = -1; + break; + } +#else sock = (sock_tcp_t *)new_s->sock; if ((res = sock_tcp_accept(&s->sock->tcp.queue, &sock, recv_timeout)) < 0) { @@ -425,17 +463,27 @@ int accept(int socket, struct sockaddr *restrict address, res = -1; break; } +#endif else { if ((address != NULL) && (address_len != NULL)) { sock_tcp_ep_t ep; struct sockaddr_storage sa; socklen_t sa_len; +#ifdef MODULE_SOCK_TCP if ((res = sock_tcp_get_remote(sock, &ep)) < 0) { errno = -res; res = -1; break; } +#endif +#ifdef MODULE_SOCK_TCP_FREEBSD + if ((res = sock_tcp_freebsd_getpeeraddr(sock, &ep.addr.ipv6, &ep.port)) < 0) { + errno = -res; + res = -1; + break; + } +#endif sa.ss_family = s->domain; sa_len = _ep_to_sockaddr(&ep, &sa); *address_len = _addr_truncate(address, *address_len, &sa, @@ -456,8 +504,10 @@ int accept(int socket, struct sockaddr *restrict address, new_s->type = s->type; new_s->protocol = s->protocol; new_s->bound = true; +#ifdef MODULE_SOCK_TCP new_s->queue_array = NULL; new_s->queue_array_len = 0; +#endif memset(&s->local, 0, sizeof(sock_tcp_ep_t)); } break; @@ -467,7 +517,12 @@ int accept(int socket, struct sockaddr *restrict address, break; } if ((res < 0) && (sock != NULL)) { +#ifdef MODULE_SOCK_TCP sock_tcp_disconnect(sock); +#endif +#ifdef MODULE_SOCK_TCP_FREEBSD + sock_tcp_freebsd_close(sock); +#endif } mutex_unlock(&_socket_pool_mutex); return res; @@ -510,6 +565,10 @@ int bind(int socket, const struct sockaddr *address, socklen_t address_len) case SOCK_STREAM: break; #endif +#ifdef MODULE_SOCK_TCP_FREEBSD + case SOCK_STREAM: + break; +#endif #ifdef MODULE_SOCK_UDP case SOCK_DGRAM: break; @@ -570,6 +629,19 @@ static int _bind_connect(socket_t *s, const struct sockaddr *address, (local == NULL) ? 0 : local->port, 0); break; #endif +#ifdef MODULE_SOCK_TCP_FREEBSD + case SOCK_STREAM: + if (remote == NULL) { + res = -EFAULT; + break; + } + if ((res = sock_tcp_freebsd_create(&sock->tcp_freebsd, &s->local.addr.ipv6, sizeof(s->local.addr.ipv6), s->domain, s->local.port)) < 0) { + errno = -res; + return -1; + } + res = sock_tcp_freebsd_connect(&sock->tcp_freebsd, &remote->addr.ipv6, sizeof(remote->addr.ipv6), remote->port); + break; +#endif #ifdef MODULE_SOCK_UDP case SOCK_DGRAM: /* TODO apply flags if possible */ @@ -645,6 +717,11 @@ static int _getpeername(socket_t *s, struct sockaddr *__restrict address, } break; #endif +#ifdef MODULE_SOCK_TCP_FREEBSD + case SOCK_STREAM: + res = sock_tcp_freebsd_getpeeraddr(&s->sock->tcp_freebsd, &ep.addr.ipv6, &ep.port); + break; +#endif #ifdef MODULE_SOCK_UDP case SOCK_DGRAM: res = sock_udp_get_remote(&s->sock->udp, &ep); @@ -719,6 +796,11 @@ int getsockname(int socket, struct sockaddr *__restrict address, } break; #endif +#ifdef MODULE_SOCK_TCP_FREEBSD + case SOCK_STREAM: + res = sock_tcp_freebsd_getlocaladdr(&s->sock->tcp_freebsd, &ep.addr.ipv6, &ep.port); + break; +#endif #ifdef MODULE_SOCK_UDP case SOCK_DGRAM: res = sock_udp_get_local(&s->sock->udp, &ep); @@ -743,7 +825,7 @@ int getsockname(int socket, struct sockaddr *__restrict address, int listen(int socket, int backlog) { -#ifdef MODULE_SOCK_TCP +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) socket_t *s; socket_sock_t *sock; int res = 0; @@ -756,11 +838,13 @@ int listen(int socket, int backlog) return -1; } if (s->sock != NULL) { +#ifdef MODULE_SOCK_TCP /* or this socket is already connected, this is an error */ if (s->queue_array == NULL) { errno = EINVAL; res = -1; } +#endif mutex_unlock(&_socket_pool_mutex); return res; } @@ -770,15 +854,26 @@ int listen(int socket, int backlog) errno = ENOMEM; return -1; } +#ifdef MODULE_SOCK_TCP s->queue_array = _tcp_sock_pool[_get_sock_idx(sock)]; s->queue_array_len = (backlog < SOCKET_TCP_QUEUE_SIZE) ? backlog : SOCKET_TCP_QUEUE_SIZE; +#endif switch (s->type) { case SOCK_STREAM: if (s->bound) { +#ifdef MODULE_SOCK_TCP /* TODO apply flags if possible */ res = sock_tcp_listen(&sock->tcp.queue, &s->local, s->queue_array, s->queue_array_len, 0); +#endif +#ifdef MODULE_SOCK_TCP_FREEBSD + if ((res = sock_tcp_freebsd_create(&sock->tcp_freebsd, &s->local.addr.ipv6, sizeof(s->local.addr.ipv6), s->domain, s->local.port)) < 0) { + errno = -res; + return -1; + } + res = sock_tcp_freebsd_listen(&sock->tcp_freebsd, backlog); +#endif } else { res = -EDESTADDRREQ; @@ -820,7 +915,7 @@ static ssize_t socket_recvfrom(socket_t *s, void *restrict buffer, return -ENOTSOCK; } if (s->sock == NULL) { /* socket is not connected */ -#ifdef MODULE_SOCK_TCP +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) if (s->type == SOCK_STREAM) { return -ENOTCONN; } @@ -850,6 +945,11 @@ static ssize_t socket_recvfrom(socket_t *s, void *restrict buffer, recv_timeout); break; #endif +#ifdef MODULE_SOCK_TCP_FREEBSD + case SOCK_STREAM: + res = sock_tcp_freebsd_recv(&s->sock->tcp_freebsd, buffer, length); + break; +#endif #ifdef MODULE_SOCK_UDP case SOCK_DGRAM: res = sock_udp_recv(&s->sock->udp, buffer, length, recv_timeout, @@ -865,7 +965,7 @@ static ssize_t socket_recvfrom(socket_t *s, void *restrict buffer, } if ((res >= 0) && (address != NULL) && (address_len != NULL)) { switch (s->type) { -#ifdef MODULE_SOCK_TCP +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) case SOCK_STREAM: res = _getpeername(s, address, address_len); break; @@ -917,7 +1017,7 @@ static ssize_t socket_sendto(socket_t *s, const void *buffer, size_t length, return -1; } if (s->sock == NULL) { /* socket is not connected */ -#ifdef MODULE_SOCK_TCP +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) if (s->type == SOCK_STREAM) { errno = ENOTCONN; return -1; @@ -942,11 +1042,24 @@ static ssize_t socket_sendto(socket_t *s, const void *buffer, size_t length, } break; #endif -#ifdef MODULE_SOCK_TCP +#if defined(MODULE_SOCK_TCP) || defined(MODULE_SOCK_TCP_FREEBSD) case SOCK_STREAM: if (address == NULL) { (void)address_len; - if ((res = sock_tcp_write(&s->sock->tcp.sock, buffer, length)) < 0) { +#ifdef MODULE_SOCK_TCP + res = sock_tcp_write(&s->sock->tcp.sock, buffer, length); +#endif +#ifdef MODULE_SOCK_TCP_FREEBSD + res = sock_tcp_freebsd_send(&s->sock->tcp_freebsd, buffer, length); + if (res < 0) { + errno = -res; + res = -1; + } else { + errno = 0; + res = length; + } +#endif + if (res < 0) { errno = -res; res = -1; } diff --git a/sys/task_sched/Makefile b/sys/task_sched/Makefile new file mode 100644 index 000000000000..9f4465f423ba --- /dev/null +++ b/sys/task_sched/Makefile @@ -0,0 +1,3 @@ +MODULE = task_sched + +include $(RIOTBASE)/Makefile.base diff --git a/sys/task_sched/task_sched.c b/sys/task_sched/task_sched.c new file mode 100644 index 000000000000..b8c9a54bd062 --- /dev/null +++ b/sys/task_sched/task_sched.c @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2016 University of California, Berkeley + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup task_sched + * @{ + * + * @file + * @brief TinyOS-style task scheduler + * + * @author Sam Kumar + * + * This module, built on top of the xtimer module, emulates a TinyOS-style task + * scheduler in a single thread. + * + * Tasks in TinyOS execute in an event loop. When a task is "posted", it is + * placed at the back of the event queue, unless that task is already on the + * event queue, in which case nothing happens. + * + * This is slightly different from messages in RIOT's IPC mechanism in that a + * task can only exist in the event queue in one place. This has the advantage + * that the memory needed for the event queue is bounded by the number of + * different tasks. + * @} + */ + +#include +#include +#include +#include +#include +#include + +#define ENABLE_DEBUG (0) + +#include "debug.h" + +msg_t expired = { 0, 0, { 0 } }; + +void* _task_sched(void* arg) +{ + struct task_sched* sched = arg; + + msg_t msg; + msg_t msg_queue[1]; + + msg_init_queue(msg_queue, 1); + + while (1) { + msg_receive(&msg); + DEBUG("Woke up\n"); + + mutex_lock(&sched->_lock); + sched->_in_process_loop = true; + + while (sched->_first != -1 + && 0 <= (int64_t) (xtimer_now_usec64() - sched->tasks[sched->_first]._min_exec_time)) { + int taskid = sched->_first; + struct task* t = &sched->tasks[sched->_first]; + + DEBUG("Setting first to %d\n", t->_next); + sched->_first = t->_next; + if (sched->_first != -1) { + assert(sched->tasks[sched->_first]._prev == taskid); + sched->tasks[sched->_first]._prev = -1; + } + + assert(t->_prev == -1); + t->_next = -1; + + /* Process the task. */ + mutex_unlock(&sched->_lock); + sched->task_handler(taskid); + mutex_lock(&sched->_lock); + } + + /* Schedule the next timer, if any. */ + xtimer_remove(&sched->_timer); + if (sched->_first != -1) { + uint64_t until_next = (uint64_t) + (sched->tasks[sched->_first]._req_exec_time - xtimer_now_usec64()); + xtimer_set_msg64(&sched->_timer, until_next, &expired, sched->_pid); + } + + sched->_in_process_loop = false; + mutex_unlock(&sched->_lock); + } + + /* Not reached */ + return NULL; +} + +kernel_pid_t start_task_sched(struct task_sched* args) +{ + int i; + for (i = 0; i < args->num_tasks; i++) { + args->tasks[i]._next = -1; + args->tasks[i]._prev = -1; + } + mutex_init(&args->_lock); + memset(&args->_timer, 0x00, sizeof(xtimer_t)); + args->_first = -1; + args->_in_process_loop = false; + args->_pid = thread_create(args->thread_stack, args->thread_stack_size, + args->thread_priority, THREAD_CREATE_STACKTEST, + _task_sched, args, args->thread_name); + return args->_pid; +} + +static int _sched_task(struct task_sched* sched, int taskid, bool cancel, + int64_t delay); + +int sched_task(struct task_sched* sched, int taskid, int64_t delay) +{ + return _sched_task(sched, taskid, false, delay); +} + +int cancel_task(struct task_sched* sched, int taskid) +{ + return _sched_task(sched, taskid, true, 0); +} + +static int _sched_task(struct task_sched* sched, int taskid, bool cancel, + int64_t delay) +{ + uint64_t now; + struct task* t; + int oldfirst = sched->_first; + + if (taskid < 0 || taskid > sched->num_tasks) { + return -1; + } + + t = &sched->tasks[taskid]; + + mutex_lock(&sched->_lock); + + /* Remove the task from the queue. */ + if (t->_prev != -1) { + sched->tasks[t->_prev]._next = t->_next; + } else if (sched->_first == taskid) { + sched->_first = t->_next; + } + if (t->_next != -1) { + sched->tasks[t->_next]._prev = t->_prev; + } + + now = xtimer_now_usec64(); + + if (cancel) { + + t->_prev = -1; + t->_next = -1; + + } else { + + int64_t coalesce_delta; + int curr; + int prev = -1; + + /* Find the correct place in the queue. */ + DEBUG("Finding the spot. _first is %d\n", oldfirst); + for (curr = sched->_first; curr != -1; curr = sched->tasks[curr]._next) { + DEBUG("Iterating: prev = %d, curr = %d\n", prev, curr); + if (delay < (int64_t) (sched->tasks[curr]._req_exec_time - now)) { + break; + } + prev = curr; + } + DEBUG("Found the spot. prev = %d, curr = %d\n", prev, curr); + + /* Put the task at the correct place in the queue. */ + t->_prev = prev; + t->_next = curr; + if (curr != -1) { + sched->tasks[curr]._prev = taskid; + } + if (t->_prev != -1) { + sched->tasks[t->_prev]._next = taskid; + } else { + sched->_first = taskid; + } + + /* Correctly set the exec time. */ + t->_req_exec_time = now + (uint64_t) delay; + coalesce_delta = delay >> sched->coalesce_shift; + if (sched->max_coalesce_time_delta >= 0 + && coalesce_delta > sched->max_coalesce_time_delta) { + coalesce_delta = sched->max_coalesce_time_delta; + } + t->_min_exec_time = t->_req_exec_time - (uint64_t) coalesce_delta; + } + + /* + * If the head of the queue changed, reset the timer so the correct + * event fires (unless we're in the precessing loop; then we'll check + * anyway, so don't bother with sending a message). + */ + if (!sched->_in_process_loop && sched->_first != -1 + && (sched->_first == taskid || oldfirst == taskid)) { + + xtimer_remove(&sched->_timer); + + // If the next event is sufficiently close, just fire it. + if (0 <= (int64_t) (now - sched->tasks[sched->_first]._min_exec_time)) { + DEBUG("Firing immediately\n"); + msg_try_send(&expired, sched->_pid); + } else { + uint64_t delay_to_first = (uint64_t) + (sched->tasks[sched->_first]._req_exec_time - now); + DEBUG("Scheduled in %d milliseconds\n", (int) (delay_to_first / 1000)); + xtimer_set_msg64(&sched->_timer, delay_to_first, &expired, + sched->_pid); + } + } + + mutex_unlock(&sched->_lock); + return 0; +} diff --git a/sys/xtimer/xtimer_core.c b/sys/xtimer/xtimer_core.c index 1ac645838e9c..0c8711af7c87 100644 --- a/sys/xtimer/xtimer_core.c +++ b/sys/xtimer/xtimer_core.c @@ -170,20 +170,22 @@ static inline void _lltimer_set(uint32_t target) int _xtimer_set_absolute(xtimer_t *timer, uint32_t target) { - uint32_t now = _xtimer_now(); + uint32_t now; int res = 0; DEBUG("timer_set_absolute(): now=%" PRIu32 " target=%" PRIu32 "\n", now, target); + unsigned state = irq_disable(); timer->next = NULL; + now = _xtimer_now(); if ((target >= now) && ((target - XTIMER_BACKOFF) < now)) { + irq_restore(state); /* backoff */ xtimer_spin_until(target + XTIMER_BACKOFF); _shoot(timer); return 0; } - unsigned state = irq_disable(); if (_is_set(timer)) { _remove(timer); } From e47a1abf1940fa33bc332631ed891e67eaa793dc Mon Sep 17 00:00:00 2001 From: Sam Kumar Date: Fri, 12 May 2017 18:43:32 -0700 Subject: [PATCH 2/3] Fix PM_BLOCKER_INITIAL --- cpu/sam0_common/include/periph_cpu_common.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpu/sam0_common/include/periph_cpu_common.h b/cpu/sam0_common/include/periph_cpu_common.h index 15d924b06ea7..c6c654083b81 100644 --- a/cpu/sam0_common/include/periph_cpu_common.h +++ b/cpu/sam0_common/include/periph_cpu_common.h @@ -64,8 +64,6 @@ typedef uint32_t gpio_t; * @{ */ #define PM_NUM_MODES (3) -/** @todo we block all modes per default, until PM is cleanly implemented */ -#define PM_BLOCKER_INITIAL { .val_u32 = 0x01010101 } /** @} */ #ifndef DOXYGEN From fe86f341b708c244de5db694ffb8740aab075169 Mon Sep 17 00:00:00 2001 From: Sam Kumar Date: Fri, 12 May 2017 18:43:51 -0700 Subject: [PATCH 3/3] Fix use of fd_new in posix_sockets --- sys/posix/sockets/posix_sockets.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/posix/sockets/posix_sockets.c b/sys/posix/sockets/posix_sockets.c index 97b0d779260e..48ff281668c2 100644 --- a/sys/posix/sockets/posix_sockets.c +++ b/sys/posix/sockets/posix_sockets.c @@ -490,8 +490,7 @@ int accept(int socket, struct sockaddr *restrict address, sa_len); } - int fd = fd_new(new_s - _socket_pool, socket_read, socket_write, - socket_close); + int fd = vfs_bind(VFS_ANY_FD, 0, &socket_ops, new_s); if (fd < 0) { errno = ENFILE; res = -1;