diff --git a/amy b/amy
index 8852d18d1..1230166b6 160000
--- a/amy
+++ b/amy
@@ -1 +1 @@
-Subproject commit 8852d18d1c2ed9145a4a5462a3cf193c934800b0
+Subproject commit 1230166b659c4c5aaab62464d3c7e54f0814700b
diff --git a/tulip/README.md b/tulip/README.md
index a65f6ed0b..3840ffcaf 100644
--- a/tulip/README.md
+++ b/tulip/README.md
@@ -1,6 +1,8 @@
 # Tulip folder structure
 
 ```
+amyboard - Tulip (headless) for the AMYboard
+amyrepl - micropython build for AMY-included python with no display or tulip stuff
 esp32s3 - all TulipCC hardware specific files for the supported boards 
 fs - the filesystem that gets flashed as /sys on first run -- examples, images, etc
 linux - all Tulip Desktop for Linux specific files
@@ -8,6 +10,7 @@ macos - all Tulip Desktop for macOS specific files
 shared - code shared between all Tulip ports (hardware & Desktop)
 shared/py - Python modules that get loaded into Tulip
 shared/desktop - code shared between Tulip Desktop ports (macOS, iOS, Linux)
+shared/ulab - our fork of ulab, a numpy/scipy wrapper for micropython
 web - all Tulip Desktop for Web specific files
 ```
 
diff --git a/tulip/amyboard/CMakeLists.txt b/tulip/amyboard/CMakeLists.txt
new file mode 100644
index 000000000..31e9eaef0
--- /dev/null
+++ b/tulip/amyboard/CMakeLists.txt
@@ -0,0 +1,77 @@
+# Top-level cmake file for building MicroPython on ESP32.
+
+cmake_minimum_required(VERSION 3.12)
+
+execute_process(COMMAND bash -c "../shared/grab_submodules.sh"
+                    WORKING_DIRECTORY ".."
+                    OUTPUT_VARIABLE GIT_SUBMOD_RESULT)
+
+# We have to do some nasty stuff to get the LVGL MP submodule compiled
+# We generate a lvgl.pp.c file, which is just the compiler preprocessor running in this env
+execute_process(COMMAND bash -c "xtensa-esp32s3-elf-gcc -E -DLVGL_PREPROCESS -I ../../../lv_binding_micropython_tulip/pycparser/utils/fake_libc_include -I../../../lv_binding_micropython_tulip -I. -I../../../lv_binding_micropython_tulip/lvgl/src ../../../lv_binding_micropython_tulip/lvgl/lvgl.h > ../build/lvgl.pp.c"
+                    WORKING_DIRECTORY "."
+                )
+
+# Then we run a python script which generates a MP module for LVGL based on the source files (and your conf and stuff)
+# This gets compiled into Tulip 
+execute_process(COMMAND bash -c "python3 ../../../lv_binding_micropython_tulip/gen/gen_mpy.py -M lvgl -MP lv -MD ../build/lv_mpy.json -E ../build/lvgl.pp.c ../../../lv_binding_micropython_tulip/lvgl/lvgl.h > ../build/lv_mpy.c"
+                    WORKING_DIRECTORY "."
+                )
+
+# We also have to copy over mpconfigport.h
+execute_process(COMMAND bash -c "cp ../mpconfigport.h ../../../micropython/ports/esp32/mpconfigport.h" WORKING_DIRECTORY ".")
+
+# Turn this on for debugging submodules
+#file(WRITE "submod" "${GIT_SUBMOD_RESULT}")
+
+
+# Set the board if it's not already set.
+if(NOT MICROPY_BOARD)
+    set(MICROPY_BOARD TULIP4_R11)
+endif()
+
+# Set the board directory and check that it exists.
+if(NOT MICROPY_BOARD_DIR)
+    set(MICROPY_BOARD_DIR ${CMAKE_CURRENT_LIST_DIR}/boards/${MICROPY_BOARD})
+endif()
+if(NOT EXISTS ${MICROPY_BOARD_DIR}/mpconfigboard.cmake)
+    message(FATAL_ERROR "Invalid MICROPY_BOARD specified: ${MICROPY_BOARD}")
+endif()
+
+# Define the output sdkconfig so it goes in the build directory.
+set(SDKCONFIG ${CMAKE_BINARY_DIR}/sdkconfig)
+
+# Save the manifest file set from the cmake command line.
+set(MICROPY_USER_FROZEN_MANIFEST ${MICROPY_FROZEN_MANIFEST})
+
+# Include board config; this is expected to set (among other options):
+# - SDKCONFIG_DEFAULTS
+# - IDF_TARGET
+include(${MICROPY_BOARD_DIR}/mpconfigboard.cmake)
+
+# Set the frozen manifest file. Note if MICROPY_FROZEN_MANIFEST is set from the cmake
+# command line, then it will override the default and any manifest set by the board.
+if (MICROPY_USER_FROZEN_MANIFEST)
+    set(MICROPY_FROZEN_MANIFEST ${MICROPY_USER_FROZEN_MANIFEST})
+elseif (NOT MICROPY_FROZEN_MANIFEST)
+    set(MICROPY_FROZEN_MANIFEST ${CMAKE_CURRENT_LIST_DIR}/boards/manifest.py)
+endif()
+
+# Concatenate all sdkconfig files into a combined one for the IDF to use.
+file(WRITE ${CMAKE_BINARY_DIR}/sdkconfig.combined.in "")
+foreach(SDKCONFIG_DEFAULT ${SDKCONFIG_DEFAULTS})
+    file(READ ${SDKCONFIG_DEFAULT} CONTENTS)
+    file(APPEND ${CMAKE_BINARY_DIR}/sdkconfig.combined.in "${CONTENTS}")
+endforeach()
+configure_file(${CMAKE_BINARY_DIR}/sdkconfig.combined.in ${CMAKE_BINARY_DIR}/sdkconfig.combined COPYONLY)
+set(SDKCONFIG_DEFAULTS ${CMAKE_BINARY_DIR}/sdkconfig.combined)
+
+# Include main IDF cmake file.
+include($ENV{IDF_PATH}/tools/cmake/project.cmake)
+
+# Set the location of the main component for the project (one per target).
+#set(EXTRA_COMPONENT_DIRS main components)
+
+# Define the project.
+project(micropython)
+idf_build_set_property(COMPILE_OPTIONS "-fdiagnostics-color=always" APPEND)
diff --git a/tulip/amyboard/boards/AMYBOARD/board.json b/tulip/amyboard/boards/AMYBOARD/board.json
new file mode 100644
index 000000000..a4cf989dd
--- /dev/null
+++ b/tulip/amyboard/boards/AMYBOARD/board.json
@@ -0,0 +1,18 @@
+{
+    "deploy": [
+        "../deploy_s3.md"
+    ],
+    "docs": "",
+    "features": [
+        "BLE",
+        "WiFi"
+    ],
+    "images": [
+        "generic_s3.jpg"
+    ],
+    "mcu": "esp32s3",
+    "product": "Tulip CC (R10)",
+    "thumbnail": "",
+    "url": "https://www.espressif.com/en/products/modules",
+    "vendor": "Espressif"
+}
diff --git a/tulip/amyboard/boards/AMYBOARD/mpconfigboard.cmake b/tulip/amyboard/boards/AMYBOARD/mpconfigboard.cmake
new file mode 100644
index 000000000..33bb2af95
--- /dev/null
+++ b/tulip/amyboard/boards/AMYBOARD/mpconfigboard.cmake
@@ -0,0 +1,23 @@
+set(IDF_TARGET esp32s3)
+
+set(MICROPY_PY_TINYUSB ON)
+
+
+set(BOARD_DEFINITION1 TULIP4_R11)
+set(BOARD_DEFINITION2 MAKERFABS)
+
+set(SDKCONFIG_DEFAULTS
+    ../../micropython/ports/esp32/boards/sdkconfig.base
+    ../../micropython/ports/esp32/boards/sdkconfig.usb
+    ../../micropython/ports/esp32/boards/sdkconfig.240mhz
+    boards/sdkconfig.tulip
+    boards/N32R8/sdkconfig.board
+    boards/TULIP4_R11/sdkconfig.board
+)
+
+list(APPEND MICROPY_SOURCE_BOARD
+     gt911_touchscreen.c
+     esp_lcd_touch_gt911.c
+     esp32s3_display.c
+     usb_host.c
+)
\ No newline at end of file
diff --git a/tulip/amyboard/boards/AMYBOARD/mpconfigboard.h b/tulip/amyboard/boards/AMYBOARD/mpconfigboard.h
new file mode 100644
index 000000000..34c15eb85
--- /dev/null
+++ b/tulip/amyboard/boards/AMYBOARD/mpconfigboard.h
@@ -0,0 +1,9 @@
+#include "pins.h"
+#define MICROPY_HW_BOARD_NAME               "TulipCC"
+#define MICROPY_HW_MCU_NAME                 "ESP32S3"
+
+#define MICROPY_PY_MACHINE_DAC              (0)
+
+// Enable UART REPL for modules that have an external USB-UART and don't use native USB.
+#define MICROPY_HW_ENABLE_UART_REPL         (1)
+
diff --git a/tulip/amyboard/boards/AMYBOARD/pins.csv b/tulip/amyboard/boards/AMYBOARD/pins.csv
new file mode 100644
index 000000000..46d28cf1a
--- /dev/null
+++ b/tulip/amyboard/boards/AMYBOARD/pins.csv
@@ -0,0 +1,5 @@
+I2C_SCL,GPIO9
+I2C_SDA,GPIO8
+FG_INT,GPIO21
+UART0_TX,GPIO43
+UART0_RX,GPIO44
diff --git a/tulip/amyboard/boards/AMYBOARD/sdkconfig.board b/tulip/amyboard/boards/AMYBOARD/sdkconfig.board
new file mode 100644
index 000000000..36b7a96e0
--- /dev/null
+++ b/tulip/amyboard/boards/AMYBOARD/sdkconfig.board
@@ -0,0 +1,4 @@
+# Nothing added to the N32 defaults
+
+
+
diff --git a/tulip/amyboard/boards/manifest.py b/tulip/amyboard/boards/manifest.py
new file mode 100644
index 000000000..fcd857b65
--- /dev/null
+++ b/tulip/amyboard/boards/manifest.py
@@ -0,0 +1,29 @@
+# Just not _boot, we have our own
+freeze("$(PORT_DIR)/modules", "apa106.py")
+freeze("$(PORT_DIR)/modules", "inisetup.py")
+freeze("$(PORT_DIR)/modules", "espnow.py")
+freeze("$(PORT_DIR)/modules", "flashbdev.py")
+
+include("$(MPY_DIR)/extmod/asyncio")
+
+# Useful networking-related packages.
+#require("mip")
+require("ntptime")
+#require("webrepl")
+
+# Require some micropython-lib modules.
+# require("aioespnow")
+require("dht")
+require("ds18x20")
+require("onewire")
+require("umqtt.robust")
+require("umqtt.simple")
+
+freeze("$(PORT_DIR)/../shared/py")
+freeze("$(MPY_DIR)/../amy", "amy.py")
+freeze("$(MPY_DIR)/../amy", "juno.py")
+freeze("$(MPY_DIR)/../amy", "amy_wave.py")
+freeze("$(MPY_DIR)/../amy/experiments", "tulip_piano.py")
+freeze("$(MPY_DIR)/../amy/experiments", "piano_params.py")
+
+#freeze("$(MPY_DIR)/lib/micropython-lib/micropython/utarfile", "utarfile.py")
diff --git a/tulip/amyboard/boards/sdkconfig.tulip b/tulip/amyboard/boards/sdkconfig.tulip
new file mode 100644
index 000000000..b6170b264
--- /dev/null
+++ b/tulip/amyboard/boards/sdkconfig.tulip
@@ -0,0 +1,33 @@
+CONFIG_SPIRAM=y
+CONFIG_SPIRAM_CACHE_WORKAROUND=y
+CONFIG_SPIRAM_IGNORE_NOTFOUND=y
+#CONFIG_COMPILER_OPTIMIZATION_SIZE=y
+CONFIG_COMPILER_OPTIMIZATION_PERF=y
+#CONFIG_COMPILER_OPTIMIZATION_NONE=y
+CONFIG_USB_HOST_HUBS_SUPPORTED=y
+CONFIG_USB_HOST_HUB_MULTI_LEVEL=y
+CONFIG_USB_HOST_EXT_PORT_SUPPORT_LS=y
+#CONFIG_FREERTOS_PLACE_FUNCTIONS_INTO_FLASH=n
+CONFIG_FREERTOS_HZ=1000
+
+CONFIG_SPIRAM_TYPE_AUTO=y
+CONFIG_SPIRAM_CLK_IO=30
+CONFIG_SPIRAM_CS_IO=26
+CONFIG_SPIRAM_BOOT_INIT=y
+CONFIG_SPIRAM_IGNORE_NOTFOUND=y
+CONFIG_SPIRAM_USE_CAPS_ALLOC=y
+CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y
+
+CONFIG_FREERTOS_USE_TRACE_FACILITY=n
+CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y
+CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y
+
+
+CONFIG_SPIRAM_FETCH_INSTRUCTIONS=y
+CONFIG_SPIRAM_RODATA=y
+CONFIG_LCD_RGB_ISR_IRAM_SAFE=n
+CONFIG_LCD_RGB_RESTART_IN_VSYNC=y
+
+CONFIG_LWIP_PPP_SUPPORT=n
+
+CONFIG_ESP_TIMER_TASK_STACK_SIZE=8192
diff --git a/tulip/amyboard/boot.py b/tulip/amyboard/boot.py
new file mode 100644
index 000000000..85d5d2f65
--- /dev/null
+++ b/tulip/amyboard/boot.py
@@ -0,0 +1,2 @@
+# boot.py
+# Put anything here you want to run on Tulip startup
diff --git a/tulip/amyboard/esp32_common.cmake b/tulip/amyboard/esp32_common.cmake
new file mode 100644
index 000000000..194b348bf
--- /dev/null
+++ b/tulip/amyboard/esp32_common.cmake
@@ -0,0 +1,428 @@
+# Set location of base MicroPython directory.
+if(NOT MICROPY_DIR)
+    get_filename_component(MICROPY_DIR ${CMAKE_CURRENT_LIST_DIR}/../../micropython ABSOLUTE)
+endif()
+
+# Set location of base MicroPython esp32 port (which this is based on).
+if(NOT MICROPY_ESP32_DIR)
+    get_filename_component(MICROPY_ESP32_DIR ${CMAKE_CURRENT_LIST_DIR}/../../micropython/ports/esp32 ABSOLUTE)
+endif()
+
+# Set location of the ESP32 port directory.
+if(NOT MICROPY_PORT_DIR)
+    get_filename_component(MICROPY_PORT_DIR ${CMAKE_CURRENT_LIST_DIR}/ ABSOLUTE)
+endif()
+
+
+# Set location of the tulip shared directory.
+if(NOT TULIP_SHARED_DIR)
+    get_filename_component(TULIP_SHARED_DIR ${CMAKE_CURRENT_LIST_DIR}/../shared ABSOLUTE)
+endif()
+
+# Set location of the ulab directory.
+if(NOT ULAB_DIR)
+    get_filename_component(ULAB_DIR ${CMAKE_CURRENT_LIST_DIR}/../shared/ulab/code ABSOLUTE)
+endif()
+
+# Set location of the amy directory.
+if(NOT AMY_DIR)
+    get_filename_component(AMY_DIR ${CMAKE_CURRENT_LIST_DIR}/../../amy ABSOLUTE)
+endif()
+
+# Set location of the tulip esp32s3 directory.
+if(NOT TULIP_ESP32S3_DIR)
+    get_filename_component(TULIP_ESP32S3_DIR ${CMAKE_CURRENT_LIST_DIR} ABSOLUTE)
+endif()
+
+
+# Set location of lvgl_mp dir
+if(NOT LV_BINDING_DIR)
+    get_filename_component(LV_BINDING_DIR ${CMAKE_CURRENT_LIST_DIR}/../../lv_binding_micropython_tulip ABSOLUTE)
+endif()
+
+# Set location of lvgl dir
+if(NOT LVGL_DIR)
+    get_filename_component(LVGL_DIR ${CMAKE_CURRENT_LIST_DIR}/../../lv_binding_micropython_tulip/lvgl ABSOLUTE)
+endif()
+
+file(GLOB_RECURSE LVGL_SOURCES ${LVGL_DIR}/src/*.c)
+
+
+# Include core source components.
+include(${MICROPY_DIR}/py/py.cmake)
+
+if(NOT CMAKE_BUILD_EARLY_EXPANSION)
+    # Enable extmod components that will be configured by extmod.cmake.
+    # A board may also have enabled additional components.
+    set(MICROPY_PY_BTREE ON)
+
+    include(${MICROPY_DIR}/py/usermod.cmake)
+    include(${MICROPY_DIR}/extmod/extmod.cmake)
+endif()
+
+list(APPEND MICROPY_QSTRDEFS_PORT
+    ${MICROPY_PORT_DIR}/qstrdefsport.h
+)
+
+list(APPEND MICROPY_SOURCE_SHARED
+    ${MICROPY_DIR}/shared/readline/readline.c
+    ${MICROPY_DIR}/shared/netutils/netutils.c
+    ${MICROPY_DIR}/shared/timeutils/timeutils.c
+    ${MICROPY_DIR}/shared/runtime/interrupt_char.c
+    ${MICROPY_DIR}/shared/runtime/mpirq.c
+    ${MICROPY_DIR}/shared/runtime/stdout_helpers.c
+    ${MICROPY_DIR}/shared/runtime/sys_stdio_mphal.c
+    ${MICROPY_DIR}/shared/runtime/pyexec.c
+)
+
+
+list(APPEND MICROPY_SOURCE_LIB
+    ${MICROPY_DIR}/lib/littlefs/lfs1.c
+    ${MICROPY_DIR}/lib/littlefs/lfs1_util.c
+    ${MICROPY_DIR}/lib/littlefs/lfs2.c
+    ${MICROPY_DIR}/lib/littlefs/lfs2_util.c
+    ${MICROPY_DIR}/lib/mbedtls_errors/esp32_mbedtls_errors.c
+    ${MICROPY_DIR}/lib/oofatfs/ff.c
+    ${MICROPY_DIR}/lib/oofatfs/ffunicode.c
+)
+
+
+list(APPEND MICROPY_SOURCE_DRIVERS
+    ${MICROPY_DIR}/drivers/bus/softspi.c
+    ${MICROPY_DIR}/drivers/dht/dht.c
+)
+
+
+string(CONCAT GIT_SUBMODULES "${GIT_SUBMODULES} " lib/tinyusb)
+if(MICROPY_PY_TINYUSB)
+    set(TINYUSB_SRC "${MICROPY_DIR}/lib/tinyusb/src")
+    string(TOUPPER OPT_MCU_${IDF_TARGET} tusb_mcu)
+
+    list(APPEND MICROPY_DEF_TINYUSB
+        CFG_TUSB_MCU=${tusb_mcu}
+    )
+
+    list(APPEND MICROPY_SOURCE_TINYUSB
+        ${TINYUSB_SRC}/tusb.c
+        ${TINYUSB_SRC}/common/tusb_fifo.c
+        ${TINYUSB_SRC}/device/usbd.c
+        ${TINYUSB_SRC}/device/usbd_control.c
+        ${TINYUSB_SRC}/class/cdc/cdc_device.c
+        ${TINYUSB_SRC}/portable/synopsys/dwc2/dcd_dwc2.c
+        ${MICROPY_DIR}/shared/tinyusb/mp_usbd.c
+        ${MICROPY_DIR}/shared/tinyusb/mp_usbd_cdc.c
+        ${MICROPY_DIR}/shared/tinyusb/mp_usbd_descriptor.c
+    )
+
+    list(APPEND MICROPY_INC_TINYUSB
+        ${TINYUSB_SRC}
+        ${MICROPY_DIR}/shared/tinyusb/
+    )
+
+    list(APPEND MICROPY_LINK_TINYUSB
+        -Wl,--wrap=dcd_event_handler
+    )
+endif()
+
+
+list(APPEND MICROPY_SOURCE_PORT
+    ${MICROPY_PORT_DIR}/main.c
+    ${MICROPY_PORT_DIR}/multicast.c
+    ${MICROPY_PORT_DIR}/help.c
+    ${MICROPY_PORT_DIR}/build/lv_mpy.c
+    ${MICROPY_PORT_DIR}/network_common.c
+    ${MICROPY_PORT_DIR}/esp_lcd_touch.c
+    ${MICROPY_PORT_DIR}/modsocket.c
+    ${MICROPY_PORT_DIR}/mphalport.c
+    ${MICROPY_PORT_DIR}/usb.c
+
+    ${MICROPY_ESP32_DIR}/panichandler.c
+    ${MICROPY_ESP32_DIR}/adc.c
+    ${MICROPY_ESP32_DIR}/uart.c
+    ${MICROPY_ESP32_DIR}/usb_serial_jtag.c
+    ${MICROPY_ESP32_DIR}/gccollect.c
+    ${MICROPY_ESP32_DIR}/fatfs_port.c
+    ${MICROPY_ESP32_DIR}/machine_bitstream.c
+    ${MICROPY_ESP32_DIR}/machine_sdcard.c
+    ${MICROPY_ESP32_DIR}/machine_timer.c
+    ${MICROPY_ESP32_DIR}/machine_pin.c
+    ${MICROPY_ESP32_DIR}/machine_touchpad.c
+    ${MICROPY_ESP32_DIR}/machine_dac.c
+    ${MICROPY_ESP32_DIR}/machine_i2c.c
+    ${MICROPY_ESP32_DIR}/network_lan.c
+    ${MICROPY_ESP32_DIR}/network_wlan.c
+    ${MICROPY_ESP32_DIR}/modesp.c
+    ${MICROPY_ESP32_DIR}/esp32_nvs.c
+    ${MICROPY_ESP32_DIR}/esp32_partition.c
+    ${MICROPY_ESP32_DIR}/esp32_rmt.c
+    ${MICROPY_ESP32_DIR}/esp32_ulp.c
+    ${MICROPY_ESP32_DIR}/modesp32.c
+    ${MICROPY_ESP32_DIR}/machine_hw_spi.c
+    ${MICROPY_ESP32_DIR}/mpthreadport.c
+    ${MICROPY_ESP32_DIR}/machine_rtc.c
+    ${MICROPY_ESP32_DIR}/machine_sdcard.c
+    ${MICROPY_ESP32_DIR}/modespnow.c
+)
+
+list(TRANSFORM MICROPY_SOURCE_BOARD PREPEND ${MICROPY_PORT_DIR}/)
+
+list(APPEND MICROPY_SOURCE_PORT ${CMAKE_BINARY_DIR}/pins.c)
+
+
+list(APPEND MICROPY_SOURCE_EXTMOD 
+    ${TULIP_SHARED_DIR}/modtulip.c
+    ${TULIP_SHARED_DIR}/polyfills.c
+    ${TULIP_SHARED_DIR}/smallfont.c
+    ${TULIP_SHARED_DIR}/display.c
+    ${TULIP_SHARED_DIR}/bresenham.c
+    ${TULIP_SHARED_DIR}/tulip_helpers.c
+    ${TULIP_SHARED_DIR}/editor.c
+    ${TULIP_SHARED_DIR}/keyscan.c
+    ${TULIP_SHARED_DIR}/help.c
+    ${TULIP_SHARED_DIR}/alles.c
+    ${TULIP_SHARED_DIR}/ui.c
+    ${TULIP_SHARED_DIR}/midi.c
+    ${TULIP_SHARED_DIR}/sounds.c
+    ${TULIP_SHARED_DIR}/tsequencer.c
+    ${TULIP_SHARED_DIR}/lodepng.c
+    ${TULIP_SHARED_DIR}/lvgl_u8g2.c
+    ${TULIP_SHARED_DIR}/u8fontdata.c
+    ${TULIP_SHARED_DIR}/u8g2_fonts.c
+    ${AMY_DIR}/src/dsps_biquad_f32_ae32.S
+    ${AMY_DIR}/src/algorithms.c
+    ${AMY_DIR}/src/custom.c
+    ${AMY_DIR}/src/patches.c
+    ${AMY_DIR}/src/custom.c
+    ${AMY_DIR}/src/amy.c
+    ${AMY_DIR}/src/delay.c
+    ${AMY_DIR}/src/envelope.c
+    ${AMY_DIR}/src/filters.c
+    ${AMY_DIR}/src/oscillators.c
+    ${AMY_DIR}/src/transfer.c
+    ${AMY_DIR}/src/sequencer.c
+    ${AMY_DIR}/src/partials.c
+    ${AMY_DIR}/src/pcm.c
+    ${AMY_DIR}/src/log2_exp2.c
+    ${ULAB_DIR}/scipy/integrate/integrate.c
+    ${ULAB_DIR}/scipy/linalg/linalg.c
+    ${ULAB_DIR}/scipy/optimize/optimize.c
+    ${ULAB_DIR}/scipy/signal/signal.c
+    ${ULAB_DIR}/scipy/special/special.c
+    ${ULAB_DIR}/ndarray_operators.c
+    ${ULAB_DIR}/ulab_tools.c
+    ${ULAB_DIR}/ndarray.c
+    ${ULAB_DIR}/numpy/ndarray/ndarray_iter.c
+    ${ULAB_DIR}/ndarray_properties.c
+    ${ULAB_DIR}/numpy/approx.c
+    ${ULAB_DIR}/numpy/bitwise.c
+    ${ULAB_DIR}/numpy/compare.c
+    ${ULAB_DIR}/numpy/carray/carray.c
+    ${ULAB_DIR}/numpy/carray/carray_tools.c
+    ${ULAB_DIR}/numpy/create.c
+    ${ULAB_DIR}/numpy/fft/fft.c
+    ${ULAB_DIR}/numpy/fft/fft_tools.c
+    ${ULAB_DIR}/numpy/filter.c
+    ${ULAB_DIR}/numpy/io/io.c
+    ${ULAB_DIR}/numpy/linalg/linalg.c
+    ${ULAB_DIR}/numpy/linalg/linalg_tools.c
+    ${ULAB_DIR}/numpy/numerical.c
+    ${ULAB_DIR}/numpy/poly.c
+    ${ULAB_DIR}/numpy/random/random.c
+    ${ULAB_DIR}/numpy/stats.c
+    ${ULAB_DIR}/numpy/transform.c
+    ${ULAB_DIR}/numpy/vector.c
+    ${ULAB_DIR}/numpy/numpy.c
+    ${ULAB_DIR}/scipy/scipy.c
+    ${ULAB_DIR}/user/user.c
+    ${ULAB_DIR}/utils/utils.c
+    ${ULAB_DIR}/ulab.c
+)
+
+list(APPEND MICROPY_SOURCE_QSTR
+    ${MICROPY_SOURCE_PY}
+    ${MICROPY_SOURCE_EXTMOD}
+    ${MICROPY_SOURCE_USERMOD}
+    ${MICROPY_SOURCE_SHARED}
+    ${MICROPY_SOURCE_LIB}
+    ${MICROPY_SOURCE_PORT}
+    ${MICROPY_SOURCE_BOARD}
+    ${MICROPY_SOURCE_TINYUSB}
+)
+
+list(APPEND IDF_COMPONENTS
+    app_update
+    bootloader_support
+    #bt
+    driver
+    esp_driver_tsens
+    esp_adc
+    esp_app_format
+    esp_bootloader_format
+    esp_common
+    esp_eth
+    esp_driver_uart
+    esp_driver_i2s
+    esp_driver_i2c
+    esp_driver_sdmmc
+    esp_driver_sdspi
+    esp_driver_spi
+    esp_driver_gpio
+    esp_driver_ledc
+    esp_event
+    esp_hw_support
+    esp_lcd
+    esp_mm
+    esp_netif
+    esp_partition
+    esp_pm
+    esp_psram
+    esp_ringbuf
+    esp_rom
+    esp_system
+    esp_timer
+    esp_wifi
+    freertos
+    hal
+    heap
+    log
+    lwip
+    mbedtls
+    newlib
+    nvs_flash
+    sdmmc
+    soc
+    spi_flash
+    ulp
+    usb
+    vfs
+    xtensa
+)
+
+# Register the main IDF component.
+idf_component_register(
+    SRCS
+        ${MICROPY_SOURCE_PY}
+        ${MICROPY_SOURCE_EXTMOD}
+        ${MICROPY_SOURCE_SHARED}
+        ${MICROPY_SOURCE_LIB}
+        ${MICROPY_SOURCE_DRIVERS}
+        ${MICROPY_SOURCE_PORT}
+        ${MICROPY_SOURCE_BOARD}
+        ${MICROPY_SOURCE_TINYUSB}
+        ${LVGL_SOURCES}
+    INCLUDE_DIRS
+        .
+        ${MICROPY_INC_CORE}
+        ${MICROPY_INC_USERMOD}
+        ${MICROPY_ESP32_DIR}
+        ${MICROPY_BOARD_DIR}
+        ${CMAKE_BINARY_DIR}
+        ${MICROPY_INC_TINYUSB}
+        ../../tulip/shared
+        ../../amy/src
+        ../../tulip/shared/ulab/code
+        ${LV_BINDING_DIR}
+        ${LVGL_DIR}/src
+    REQUIRES
+        ${IDF_COMPONENTS}
+)
+
+                    
+# Set the MicroPython target as the current (main) IDF component target.
+set(MICROPY_TARGET ${COMPONENT_TARGET})
+
+# Define mpy-cross flags, for use with frozen code.
+set(MICROPY_CROSS_FLAGS -march=xtensawin)
+
+# Set compile options for this port.
+target_compile_definitions(${MICROPY_TARGET} PUBLIC
+    ${MICROPY_DEF_CORE}
+    MICROPY_ESP_IDF_4=1
+    MICROPY_VFS_FAT=1
+    MICROPY_VFS_LFS2=1
+    MICROPY_VFS_LFS1=1
+    MODULE_ULAB_ENABLED=1
+    FFCONF_H=\"${MICROPY_OOFATFS_DIR}/ffconf.h\"
+    LFS1_NO_MALLOC LFS1_NO_DEBUG LFS1_NO_WARN LFS1_NO_ERROR LFS1_NO_ASSERT
+    LFS2_NO_MALLOC LFS2_NO_ASSERT
+    ESP_PLATFORM
+    TULIP
+    #AMY_DEBUG
+    LV_CONF_INCLUDE_SIMPLE
+    ${BOARD_DEFINITION1}
+    ${BOARD_DEFINITION2}
+    ${MICROPY_DEF_TINYUSB}
+)
+
+#LFS2_NO_DEBUG LFS2_NO_WARN LFS2_NO_ERROR 
+
+# Disable some warnings to keep the build output clean.
+target_compile_options(${MICROPY_TARGET} PUBLIC
+    -Wno-clobbered
+    -Wno-uninitialized
+    -Wno-dangling-pointer
+    -Wno-deprecated-declarations
+    -Wno-missing-field-initializers
+    -Wno-unused-const-variable
+    -fsingle-precision-constant
+    -Wno-strict-aliasing
+    -DESP_PLATFORM
+    -DSTATIC=static
+    -DLFS2_NO_DEBUG
+)
+
+# Additional include directories needed for private NimBLE headers.
+#target_include_directories(${MICROPY_TARGET} PUBLIC
+#    ${IDF_PATH}/components/bt/host/nimble/nimble
+#)
+
+target_link_options(${MICROPY_TARGET} PUBLIC
+     ${MICROPY_LINK_TINYUSB}
+)
+
+# Add additional extmod and usermod components.
+target_link_libraries(${MICROPY_TARGET} micropy_extmod_btree)
+target_link_libraries(${MICROPY_TARGET} usermod)
+
+# Collect all of the include directories and compile definitions for the IDF components.
+foreach(comp ${IDF_COMPONENTS})
+    micropy_gather_target_properties(__idf_${comp})
+    micropy_gather_target_properties(${comp})
+endforeach()
+
+
+# Include the main MicroPython cmake rules.
+include(${MICROPY_DIR}/py/mkrules.cmake)
+
+
+
+# Generate source files for named pins (requires mkrules.cmake for MICROPY_GENHDR_DIR).
+
+set(GEN_PINS_PREFIX "${MICROPY_ESP32_DIR}/boards/pins_prefix.c")
+set(GEN_PINS_MKPINS "${MICROPY_ESP32_DIR}/boards/make-pins.py")
+set(GEN_PINS_SRC "${CMAKE_BINARY_DIR}/pins.c")
+set(GEN_PINS_HDR "${MICROPY_GENHDR_DIR}/pins.h")
+
+if(EXISTS "${MICROPY_BOARD_DIR}/pins.csv")
+    set(GEN_PINS_BOARD_CSV "${MICROPY_BOARD_DIR}/pins.csv")
+    set(GEN_PINS_BOARD_CSV_ARG --board-csv "${GEN_PINS_BOARD_CSV}")
+endif()
+
+target_sources(${MICROPY_TARGET} PRIVATE ${GEN_PINS_HDR})
+
+add_custom_command(
+    OUTPUT ${GEN_PINS_SRC} ${GEN_PINS_HDR}
+    COMMAND ${Python3_EXECUTABLE} ${GEN_PINS_MKPINS} ${GEN_PINS_BOARD_CSV_ARG}
+        --prefix ${GEN_PINS_PREFIX} --output-source ${GEN_PINS_SRC} --output-header ${GEN_PINS_HDR}
+    DEPENDS
+        ${MICROPY_MPVERSION}
+        ${GEN_PINS_MKPINS}
+        ${GEN_PINS_BOARD_CSV}
+        ${GEN_PINS_PREFIX}
+    VERBATIM
+    COMMAND_EXPAND_LISTS
+)
+
+
+
+
diff --git a/tulip/amyboard/main.c b/tulip/amyboard/main.c
new file mode 100644
index 000000000..d2e505dea
--- /dev/null
+++ b/tulip/amyboard/main.c
@@ -0,0 +1,489 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * Development of the code in this file was sponsored by Microbric Pty Ltd
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2016 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "esp_system.h"
+#include "nvs_flash.h"
+#include "esp_task.h"
+#include "esp_event.h"
+#include "esp_log.h"
+#include "esp_psram.h"
+
+#include "py/stackctrl.h"
+#include "py/nlr.h"
+#include "py/compile.h"
+#include "py/runtime.h"
+#include "py/persistentcode.h"
+#include "py/repl.h"
+#include "py/gc.h"
+#include "py/mphal.h"
+#include "shared/readline/readline.h"
+#include "shared/runtime/pyexec.h"
+#include "uart.h"
+
+
+#include "usb.h"
+#include "usb_serial_jtag.h"
+#include "modmachine.h"
+#include "modnetwork.h"
+#include "mpthreadport.h"
+#ifdef TDECK
+#include "tdeck_display.h"
+#include "tdeck_keyboard.h"
+#endif
+
+#include "tsequencer.h"
+
+
+#if MICROPY_BLUETOOTH_NIMBLE
+#include "extmod/modbluetooth.h"
+#endif
+
+#if MICROPY_ESPNOW
+#include "modespnow.h"
+#endif
+
+// Settings for memory-mapped location of SPIRAM.
+#if CONFIG_IDF_TARGET_ESP32 || CONFIG_IDF_TARGET_ESP32S2
+#define IDF_TARGET_PSRAM_ADDR_START (SOC_EXTRAM_DATA_LOW)
+#define IDF_TARGET_PSRAM_SIZE (SOC_EXTRAM_DATA_SIZE)
+#elif CONFIG_IDF_TARGET_ESP32S3
+#define IDF_TARGET_PSRAM_ADDR_START (SOC_DROM_HIGH)
+#define IDF_TARGET_PSRAM_SIZE (SOC_EXTRAM_DATA_HIGH - IDF_TARGET_PSRAM_ADDR_START)
+#endif
+
+// MicroPython runs as a task under FreeRTOS
+#define MP_TASK_PRIORITY        (ESP_TASK_PRIO_MIN + 1)
+
+// Set the margin for detecting stack overflow, depending on the CPU architecture.
+#if CONFIG_IDF_TARGET_ESP32C3
+#define MP_TASK_STACK_LIMIT_MARGIN (2048)
+#else
+#define MP_TASK_STACK_LIMIT_MARGIN (1024)
+#endif
+
+#include "display.h"
+#include "alles.h"
+#include "tasks.h"
+
+#ifdef TULIP_DIY
+#include "ft5x06_touchscreen.h"
+#elif defined MAKERFABS
+#include "gt911_touchscreen.h"
+#endif
+#ifdef TDECK
+#include "tdeck_keyboard.h"
+#else
+#include "usb_host.h"
+#endif
+
+TaskHandle_t display_handle;
+TaskHandle_t usb_handle;
+TaskHandle_t touchscreen_handle;
+TaskHandle_t tulip_mp_handle;
+TaskHandle_t midi_handle;
+TaskHandle_t alles_handle;
+TaskHandle_t alles_parse_handle;
+TaskHandle_t alles_receive_handle;
+TaskHandle_t amy_render_handle;
+TaskHandle_t alles_fill_buffer_handle;
+TaskHandle_t idle_0_handle;
+TaskHandle_t idle_1_handle;
+TaskHandle_t sequencer_handle;
+
+// For CPU usage
+unsigned long last_task_counters[MAX_TASKS];
+
+
+typedef void (*esp_alloc_failed_hook_t) (size_t size, uint32_t caps, const char * function_name);
+#include "esp_heap_caps.h"
+
+void esp_alloc_failed(size_t size, uint32_t caps, const char *function_name) {
+    printf("alloc failed size %d function %s caps: ", size, function_name);
+    if(caps & MALLOC_CAP_SPIRAM) printf("spiram ");
+    if(caps & MALLOC_CAP_INTERNAL) printf("internal ");
+    if(caps & MALLOC_CAP_32BIT) printf("32bit ");
+    if(caps & MALLOC_CAP_DEFAULT) printf("default ");
+    if(caps & MALLOC_CAP_IRAM_8BIT) printf("iram8bit ");
+    if(caps & MALLOC_CAP_RTCRAM) printf("rtcram ");
+    if(caps & MALLOC_CAP_8BIT) printf("8bit ");
+    if(caps & MALLOC_CAP_EXEC) printf("exec ");
+    if(caps & MALLOC_CAP_DMA) printf("dma ");
+    printf("\n");
+}
+
+
+
+float compute_cpu_usage(uint8_t debug) {
+    TaskStatus_t *pxTaskStatusArray;
+    volatile UBaseType_t uxArraySize, x, i;
+
+    const char* const tasks[] = {
+         "IDLE0", "IDLE1", "Tmr Svc", "ipc0", "ipc1", "main", "wifi", "esp_timer", "sys_evt", "tiT",
+         DISPLAY_TASK_NAME, USB_TASK_NAME, TOUCHSCREEN_TASK_NAME, TULIP_MP_TASK_NAME, MIDI_TASK_NAME, ALLES_TASK_NAME,
+         ALLES_PARSE_TASK_NAME, ALLES_RECEIVE_TASK_NAME, ALLES_RENDER_TASK_NAME, ALLES_FILL_BUFFER_TASK_NAME, SEQUENCER_TASK_NAME, 0
+    };
+    const uint8_t cores[] = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, DISPLAY_TASK_COREID, USB_TASK_COREID, TOUCHSCREEN_TASK_COREID, TULIP_MP_TASK_COREID,
+        MIDI_TASK_COREID, ALLES_TASK_COREID, ALLES_PARSE_TASK_COREID, ALLES_RECEIVE_TASK_COREID, ALLES_RENDER_TASK_COREID, ALLES_FILL_BUFFER_TASK_COREID, 
+        SEQUENCER_TASK_COREID};
+
+    uxArraySize = uxTaskGetNumberOfTasks();
+    pxTaskStatusArray = pvPortMalloc( uxArraySize * sizeof( TaskStatus_t ) );
+    uxArraySize = uxTaskGetSystemState( pxTaskStatusArray, uxArraySize, NULL );
+    if(debug) {
+        printf("%d tasks running now\n", uxArraySize);
+        for(x=0; x<uxArraySize; x++) { // for each task
+            printf("_%s_ ", pxTaskStatusArray[x].pcTaskName);
+        }
+        printf("\n");
+    }
+    
+    unsigned long counter_since_last[MAX_TASKS];
+    unsigned long ulTotalRunTime_per_core[2];
+    ulTotalRunTime_per_core[0] = 0;
+    ulTotalRunTime_per_core[1] = 0;
+
+    // We have to check for the names we want to track
+    for(i=0;i<MAX_TASKS;i++) { // for each name
+        counter_since_last[i] = 0;
+        for(x=0; x<uxArraySize; x++) { // for each task
+            if(strcmp(pxTaskStatusArray[x].pcTaskName, tasks[i])==0) {
+                counter_since_last[i] = pxTaskStatusArray[x].ulRunTimeCounter - last_task_counters[i];
+                last_task_counters[i] = pxTaskStatusArray[x].ulRunTimeCounter;
+                ulTotalRunTime_per_core[cores[i]]= ulTotalRunTime_per_core[cores[i]] + counter_since_last[i];
+            }
+        }
+    }
+    if(debug) {
+        printf("------ CPU usage since last call to tulip.cpu()\n");
+        for(i=0;i<MAX_TASKS;i++) {
+            printf("%d %-15s\t%-15ld\t\t%2.2f%%\n", cores[i], tasks[i], counter_since_last[i], ((float)counter_since_last[i])/ulTotalRunTime_per_core[cores[i]] * 100.0);
+        }   
+    }
+    vPortFree(pxTaskStatusArray);
+
+    // Also print heap info
+    if(debug){
+        fprintf(stderr, "SPIRAM:\n "); fflush(stderr);
+        heap_caps_print_heap_info(MALLOC_CAP_SPIRAM);
+
+        fprintf(stderr, "INTERNAL:\n "); fflush(stderr);
+        heap_caps_print_heap_info(MALLOC_CAP_INTERNAL);
+    }
+
+    unsigned long freeTime = counter_since_last[0] + counter_since_last[1]; // add IDLE0 + IDLE1 
+    unsigned long ulTotalRunTime = ulTotalRunTime_per_core[0] + ulTotalRunTime_per_core[1]; // add total counts 
+    return 100.0 - (((float)freeTime/(float)ulTotalRunTime) * 100.0); // return CPU usage
+
+}
+
+
+int vprintf_null(const char *format, va_list ap) {
+    // do nothing: this is used as a log target during raw repl mode
+    return 0;
+}
+
+extern void setup_lvgl();
+uint8_t lvgl_setup = 0;
+
+void mp_task(void *pvParameter) {
+    volatile uint32_t sp = (uint32_t)esp_cpu_get_sp();
+    //volatile uint32_t sp = (uint32_t)get_sp();
+    #if MICROPY_PY_THREAD
+    mp_thread_init(pxTaskGetStackStart(NULL), TULIP_MP_TASK_STACK_SIZE / sizeof(uintptr_t));
+    #endif
+    
+    #if CONFIG_USB_ENABLED
+    usb_init();
+    #elif CONFIG_ESP_CONSOLE_USB_SERIAL_JTAG
+    #ifdef TDECK
+    fprintf(stderr, "init jtag\n");
+    usb_serial_jtag_init();
+    #endif
+    #endif
+
+    #if MICROPY_HW_ENABLE_UART_REPL
+    fprintf(stderr, "init uart repl\n");
+    uart_stdout_init();
+    #endif
+    
+    machine_init();
+
+    //esp_err_t err = esp_event_loop_create_default();
+    //if (err != ESP_OK) {
+    //    ESP_LOGE("esp_init", "can't create event loop: 0x%x\n", err);
+    //}
+
+    heap_caps_register_failed_alloc_callback(esp_alloc_failed);
+    uint32_t caps = MALLOC_CAP_8BIT | MALLOC_CAP_SPIRAM;
+    size_t mp_task_heap_size = MP_TASK_HEAP_SIZE; 
+    void *mp_task_heap = heap_caps_malloc(mp_task_heap_size, caps);
+
+soft_reset:
+
+    // initialise the stack pointer for the main thread
+    mp_stack_set_top((void *)sp);
+    mp_stack_set_limit(TULIP_MP_TASK_STACK_SIZE - MP_TASK_STACK_LIMIT_MARGIN);
+
+    gc_init(mp_task_heap, mp_task_heap + mp_task_heap_size);
+    mp_init();
+
+    mp_obj_list_append(mp_sys_path, MP_OBJ_NEW_QSTR(MP_QSTR__slash_lib));
+
+    readline_init0();
+
+    if(!lvgl_setup) {
+        setup_lvgl();
+        lvgl_setup = 1;
+    }
+    
+    MP_STATE_PORT(native_code_pointers) = MP_OBJ_NULL;
+    
+    // initialise peripherals
+    machine_pins_init();
+    #if MICROPY_PY_MACHINE_I2S
+    //machine_i2s_init0();
+    #endif
+    
+    // run boot-up scripts
+    pyexec_frozen_module("_boot.py", false);
+    
+    pyexec_file_if_exists("boot.py");
+    
+    if (pyexec_mode_kind == PYEXEC_MODE_FRIENDLY_REPL) {
+            int ret = pyexec_file_if_exists("main.py");
+            if (ret & PYEXEC_FORCED_EXIT) {
+                goto soft_reset_exit;
+        }
+    }
+
+    for (;;) {
+        if (pyexec_mode_kind == PYEXEC_MODE_RAW_REPL) {
+            //vprintf_like_t vprintf_log = esp_log_set_vprintf(vprintf_null);
+            if (pyexec_raw_repl() != 0) {
+                break;
+            }
+            //esp_log_set_vprintf(vprintf_log);
+        } else {
+            if (pyexec_friendly_repl() != 0) {
+                break;
+            }
+        }
+    }
+
+soft_reset_exit:
+
+    #if MICROPY_BLUETOOTH_NIMBLE
+    mp_bluetooth_deinit();
+    #endif
+
+    #if MICROPY_ESPNOW
+    espnow_deinit(mp_const_none);
+    MP_STATE_PORT(espnow_singleton) = NULL;
+    #endif
+
+    machine_timer_deinit_all();
+
+    #if MICROPY_PY_THREAD
+    mp_thread_deinit();
+    #endif
+
+    // Free any native code pointers that point to iRAM.
+    if (MP_STATE_PORT(native_code_pointers) != MP_OBJ_NULL) {
+        size_t len;
+        mp_obj_t *items;
+        mp_obj_list_get(MP_STATE_PORT(native_code_pointers), &len, &items);
+        for (size_t i = 0; i < len; ++i) {
+            heap_caps_free(MP_OBJ_TO_PTR(items[i]));
+        }
+    }
+
+    gc_sweep_all();
+
+    mp_hal_stdout_tx_str("MPY: soft reboot\r\n");
+    esp_restart();
+    // deinitialise peripherals
+    machine_pwm_deinit_all();
+    // TODO: machine_rmt_deinit_all();
+    machine_pins_deinit();
+    machine_deinit();
+    #if MICROPY_PY_USOCKET_EVENTS
+    usocket_events_deinit();
+    #endif
+
+    mp_deinit();
+    fflush(stdout);
+    goto soft_reset;
+}
+
+void boardctrl_startup(void) {
+    esp_err_t ret = nvs_flash_init();
+    if (ret == ESP_ERR_NVS_NO_FREE_PAGES || ret == ESP_ERR_NVS_NEW_VERSION_FOUND) {
+        nvs_flash_erase();
+        nvs_flash_init();
+    }
+}
+
+#ifdef TULIP_DIY
+extern void ft5x06_init();
+extern void run_ft5x06();
+#elif defined MAKERFABS
+extern void run_gt911();
+#endif
+
+extern void run_midi();
+
+
+#ifdef TDECK
+extern void run_tdeck_keyboard();
+extern void run_gt911();
+#endif
+
+uint8_t * xStack;
+StaticTask_t static_mp_handle;
+
+void app_main(void) {
+    // Hook for a board to run code at start up.
+    // This defaults to initialising NVS.
+    MICROPY_BOARD_STARTUP();
+    for(uint8_t i=0;i<MAX_TASKS;i++) last_task_counters[i] = 0;
+
+    // Grab the idle tasks
+    idle_0_handle = xTaskGetIdleTaskHandleForCPU(0);
+    idle_1_handle = xTaskGetIdleTaskHandleForCPU(1);
+
+
+    #ifdef TDECK
+    // turn on TDeck peripheral 
+    gpio_config_t peri_gpio_config = {
+        .mode = GPIO_MODE_OUTPUT,
+        .pin_bit_mask = 1ULL << TDECK_PERI_GPIO
+    };
+    gpio_config(&peri_gpio_config);
+    gpio_set_level(TDECK_PERI_GPIO, 1);
+    delay_ms(500);
+    #endif
+
+    #ifndef TDECK
+    fprintf(stderr,"Starting MIDI on core %d\n", MIDI_TASK_COREID);
+    xTaskCreatePinnedToCore(run_midi, MIDI_TASK_NAME, MIDI_TASK_STACK_SIZE / sizeof(StackType_t), NULL, MIDI_TASK_PRIORITY, &midi_handle, MIDI_TASK_COREID);
+    fflush(stderr);
+    delay_ms(100);
+    #endif
+    
+
+    #ifndef TULIP4_R10_V0 // v0 doesn't do usb
+    #ifndef TDECK // TDECK doesn't send power to USB
+    fprintf(stderr,"Starting USB host on core %d\n", USB_TASK_COREID);
+    xTaskCreatePinnedToCore(run_usb, USB_TASK_NAME, (USB_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, USB_TASK_PRIORITY, &usb_handle, USB_TASK_COREID);
+    fflush(stderr);
+    delay_ms(100);
+    #endif
+    #endif
+
+    fprintf(stderr,"Starting display on core %d\n", DISPLAY_TASK_COREID);
+    #ifdef TDECK
+    delay_ms(100);
+    xTaskCreatePinnedToCore(run_tdeck_display, DISPLAY_TASK_NAME, (DISPLAY_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, DISPLAY_TASK_PRIORITY, &display_handle, DISPLAY_TASK_COREID);
+    #else
+    xTaskCreatePinnedToCore(run_esp32s3_display, DISPLAY_TASK_NAME, (DISPLAY_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, DISPLAY_TASK_PRIORITY, &display_handle, DISPLAY_TASK_COREID);
+    #endif
+    fflush(stderr);
+    delay_ms(100);
+
+    fprintf(stderr,"Starting touchscreen on core %d \n", TOUCHSCREEN_TASK_COREID);
+    #ifdef TULIP_DIY
+    ft5x06_init();
+    xTaskCreatePinnedToCore(run_ft5x06, TOUCHSCREEN_TASK_NAME, (TOUCHSCREEN_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, TOUCHSCREEN_TASK_PRIORITY, &touchscreen_handle, TOUCHSCREEN_TASK_COREID);
+    #elif defined MAKERFABS
+    xTaskCreatePinnedToCore(run_gt911, TOUCHSCREEN_TASK_NAME, (TOUCHSCREEN_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, TOUCHSCREEN_TASK_PRIORITY, &touchscreen_handle, TOUCHSCREEN_TASK_COREID);
+    #elif defined TDECK
+    delay_ms(500);
+    xTaskCreatePinnedToCore(run_gt911, TOUCHSCREEN_TASK_NAME, (TOUCHSCREEN_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, TOUCHSCREEN_TASK_PRIORITY, &touchscreen_handle, TOUCHSCREEN_TASK_COREID);
+    #endif
+    fflush(stderr);
+    delay_ms(100);
+
+    fprintf(stderr,"Starting Alles on core %d\n", ALLES_TASK_COREID);
+    run_alles();
+
+    //xTaskCreatePinnedToCore(run_alles, ALLES_TASK_NAME, (ALLES_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, ALLES_TASK_PRIORITY, &alles_handle, ALLES_TASK_COREID);
+    fflush(stderr);
+    delay_ms(500);
+    
+    fprintf(stderr,"Starting MicroPython on core %d\n", TULIP_MP_TASK_COREID);
+    xTaskCreatePinnedToCore(mp_task, TULIP_MP_TASK_NAME, (TULIP_MP_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, TULIP_MP_TASK_PRIORITY, &tulip_mp_handle, TULIP_MP_TASK_COREID);
+    fflush(stderr);
+    delay_ms(100);
+
+    #ifdef TDECK
+    delay_ms(3000); // wait for touchscreen
+    fprintf(stderr,"Starting T-Deck keyboard on core %d\n", USB_TASK_COREID);
+    xTaskCreatePinnedToCore(run_tdeck_keyboard, USB_TASK_NAME, (USB_TASK_STACK_SIZE) / sizeof(StackType_t), NULL, USB_TASK_PRIORITY, &usb_handle, USB_TASK_COREID);
+    fflush(stderr);
+    delay_ms(10);
+    #endif
+
+    tsequencer_init();
+
+
+}
+
+void nlr_jump_fail(void *val) {
+    printf("NLR jump failed, val=%p\n", val);
+    esp_restart();
+}
+
+// modussl_mbedtls uses this function but it's not enabled in ESP IDF
+void mbedtls_debug_set_threshold(int threshold) {
+    (void)threshold;
+}
+
+void *esp_native_code_commit(void *buf, size_t len, void *reloc) {
+    len = (len + 3) & ~3;
+    uint32_t *p = heap_caps_malloc(len, MALLOC_CAP_EXEC);
+    if (p == NULL) {
+        m_malloc_fail(len);
+    }
+    if (MP_STATE_PORT(native_code_pointers) == MP_OBJ_NULL) {
+        MP_STATE_PORT(native_code_pointers) = mp_obj_new_list(0, NULL);
+    }
+    mp_obj_list_append(MP_STATE_PORT(native_code_pointers), MP_OBJ_TO_PTR(p));
+    if (reloc) {
+        mp_native_relocate(reloc, buf, (uintptr_t)p);
+    }
+    memcpy(p, buf, len);
+    return p;
+}
+
+MP_REGISTER_ROOT_POINTER(mp_obj_t native_code_pointers);
diff --git a/tulip/amyboard/main/CMakeLists.txt b/tulip/amyboard/main/CMakeLists.txt
new file mode 100644
index 000000000..beede3bbf
--- /dev/null
+++ b/tulip/amyboard/main/CMakeLists.txt
@@ -0,0 +1,5 @@
+# Set location of base MicroPython directory.
+# Set location of base MicroPython directory.
+
+
+include(../esp32_common.cmake)
diff --git a/tulip/amyboard/main/idf_component.yml b/tulip/amyboard/main/idf_component.yml
new file mode 100644
index 000000000..59dafdb94
--- /dev/null
+++ b/tulip/amyboard/main/idf_component.yml
@@ -0,0 +1,7 @@
+## IDF Component Manager Manifest File
+dependencies:
+  espressif/esp_lcd_touch_ft5x06: "^1.0.5~1"
+  idf:
+    version: ">=5.0.2"
+  espressif/esp_tinyusb: "~1.0.0"
+  espressif/mdns: "~1.1.0"
diff --git a/tulip/amyboard/modules/_boot.py b/tulip/amyboard/modules/_boot.py
new file mode 100644
index 000000000..651fc7b10
--- /dev/null
+++ b/tulip/amyboard/modules/_boot.py
@@ -0,0 +1,13 @@
+import gc
+import os
+from flashbdev import bdev
+
+try:
+    if bdev:
+        os.mount(bdev, "/")
+except OSError:
+    import inisetup
+
+    vfs = inisetup.setup()
+
+gc.collect()
diff --git a/tulip/amyboard/modules/apa106.py b/tulip/amyboard/modules/apa106.py
new file mode 100644
index 000000000..ef971d78b
--- /dev/null
+++ b/tulip/amyboard/modules/apa106.py
@@ -0,0 +1,8 @@
+# APA106driver for MicroPython on ESP32
+# MIT license; Copyright (c) 2016 Damien P. George
+
+from neopixel import NeoPixel
+
+
+class APA106(NeoPixel):
+    ORDER = (0, 1, 2, 3)
diff --git a/tulip/amyboard/modules/espnow.py b/tulip/amyboard/modules/espnow.py
new file mode 100644
index 000000000..6956a3a93
--- /dev/null
+++ b/tulip/amyboard/modules/espnow.py
@@ -0,0 +1,30 @@
+# espnow module for MicroPython on ESP32
+# MIT license; Copyright (c) 2022 Glenn Moloney @glenn20
+
+from _espnow import *
+
+
+class ESPNow(ESPNowBase):
+    # Static buffers for alloc free receipt of messages with ESPNow.irecv().
+    _data = [None, bytearray(MAX_DATA_LEN)]
+    _none_tuple = (None, None)
+
+    def __init__(self):
+        super().__init__()
+
+    def irecv(self, timeout_ms=None):
+        n = self.recvinto(self._data, timeout_ms)
+        return self._data if n else self._none_tuple
+
+    def recv(self, timeout_ms=None):
+        n = self.recvinto(self._data, timeout_ms)
+        return [bytes(x) for x in self._data] if n else self._none_tuple
+
+    def irq(self, callback):
+        super().irq(callback, self)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return self.irecv()  # Use alloc free irecv() method
diff --git a/tulip/amyboard/modules/flashbdev.py b/tulip/amyboard/modules/flashbdev.py
new file mode 100644
index 000000000..1ee6ff779
--- /dev/null
+++ b/tulip/amyboard/modules/flashbdev.py
@@ -0,0 +1,7 @@
+from esp32 import Partition
+
+# MicroPython's partition table uses "vfs", TinyUF2 uses "ffat".
+bdev = Partition.find(Partition.TYPE_DATA, label="vfs")
+if not bdev:
+    bdev = Partition.find(Partition.TYPE_DATA, label="ffat", block_size=512)
+bdev = bdev[0] if bdev else None
diff --git a/tulip/amyboard/modules/inisetup.py b/tulip/amyboard/modules/inisetup.py
new file mode 100644
index 000000000..c8a33d582
--- /dev/null
+++ b/tulip/amyboard/modules/inisetup.py
@@ -0,0 +1,53 @@
+import os
+from flashbdev import bdev
+
+
+def check_bootsec():
+    buf = bytearray(bdev.ioctl(5, 0))  # 5 is SEC_SIZE
+    bdev.readblocks(0, buf)
+    empty = True
+    for b in buf:
+        if b != 0xFF:
+            empty = False
+            break
+    if empty:
+        return True
+    fs_corrupted()
+
+
+def fs_corrupted():
+    import time
+    import micropython
+
+    # Allow this loop to be stopped via Ctrl-C.
+    micropython.kbd_intr(3)
+
+    while 1:
+        print(
+            """\
+The filesystem appears to be corrupted. If you had important data there, you
+may want to make a flash snapshot to try to recover it. Otherwise, perform
+factory reprogramming of MicroPython firmware (completely erase flash, followed
+by firmware programming).
+"""
+        )
+        time.sleep(3)
+
+
+def setup():
+    check_bootsec()
+    print("Performing initial setup")
+    os.VfsLfs2.mkfs(bdev)
+    vfs = os.VfsLfs2(bdev)
+    os.mount(vfs, "/")
+    with open("boot.py", "w") as f:
+        f.write(
+            """\
+# This file is executed on every boot (including wake-boot from deepsleep)
+#import esp
+#esp.osdebug(None)
+#import webrepl
+#webrepl.start()
+"""
+        )
+    return vfs
diff --git a/tulip/amyboard/mpconfigport.h b/tulip/amyboard/mpconfigport.h
new file mode 100644
index 000000000..fc19d5fbc
--- /dev/null
+++ b/tulip/amyboard/mpconfigport.h
@@ -0,0 +1,405 @@
+// Options to control how MicroPython is built for this port,
+// overriding defaults in py/mpconfig.h.
+
+// Board-specific definitions
+#include "mpconfigboard.h"
+
+#include <stdint.h>
+#include <alloca.h>
+#include "esp_random.h"
+#include "esp_system.h"
+#include "freertos/FreeRTOS.h"
+#include "driver/i2s_std.h"
+#include "esp_wifi_types.h"
+
+
+// This is Tulip specific stuff. Unfortunately we cannot override some of this in ports/esp32, 
+// so we have to copy it over there as a build step.
+#define MICROPY_HW_I2C0_SCL                 (I2C_SCL)
+#define MICROPY_HW_I2C0_SDA                 (I2C_SDA)
+
+#define MICROPY_HW_ENABLE_SDCARD            (0)
+#define MICROPY_PY_MACHINE_I2S              (0)
+#define MICROPY_PY_BLUETOOTH (0)
+#define MICROPY_BLUETOOTH_NIMBLE (0)
+#ifdef TDECK
+#define MICROPY_HW_USB_CDC (0)
+#define MICROPY_HW_ESP_USB_SERIAL_JTAG (1)
+#else
+#define MICROPY_HW_USB_CDC (1)
+#define MICROPY_HW_ESP_USB_SERIAL_JTAG (0)
+#endif
+#define MICROPY_HW_ENABLE_USBDEV (1)
+#define MICROPY_ENABLE_SCHEDULER (1)
+#define MICROPY_SCHEDULER_DEPTH             (128)
+
+
+#ifndef MICROPY_CONFIG_ROM_LEVEL
+#define MICROPY_CONFIG_ROM_LEVEL            (MICROPY_CONFIG_ROM_LEVEL_EXTRA_FEATURES)
+#endif
+
+// object representation and NLR handling
+#define MICROPY_OBJ_REPR                    (MICROPY_OBJ_REPR_A)
+#if CONFIG_IDF_TARGET_ARCH_XTENSA
+#define MICROPY_NLR_SETJMP                  (1)
+#endif
+
+// memory allocation policies
+#define MICROPY_ALLOC_PATH_MAX              (128)
+
+// Initial Python heap size.  This starts small but adds new heap areas on demand due to
+// the settings MICROPY_GC_SPLIT_HEAP and MICROPY_GC_SPLIT_HEAP_AUTO.  The value is
+// different for different MCUs and is chosen so they can grow the heap once (double it)
+// and still have enough internal RAM to start WiFi and make a HTTPS request.
+#ifndef MICROPY_GC_INITIAL_HEAP_SIZE
+#if CONFIG_IDF_TARGET_ESP32
+#define MICROPY_GC_INITIAL_HEAP_SIZE        (56 * 1024)
+#elif CONFIG_IDF_TARGET_ESP32S2 && !CONFIG_SPIRAM
+#define MICROPY_GC_INITIAL_HEAP_SIZE        (36 * 1024)
+#else
+#define MICROPY_GC_INITIAL_HEAP_SIZE        (64 * 1024)
+#endif
+#endif
+
+// emitters
+#define MICROPY_PERSISTENT_CODE_LOAD        (1)
+#if CONFIG_IDF_TARGET_ARCH_RISCV
+#if CONFIG_ESP_SYSTEM_PMP_IDRAM_SPLIT
+#define MICROPY_EMIT_RV32                   (0)
+#else
+#define MICROPY_EMIT_RV32                   (1)
+#endif
+#else
+#define MICROPY_EMIT_XTENSAWIN              (1)
+#endif
+
+// optimisations
+#ifndef MICROPY_OPT_COMPUTED_GOTO
+#define MICROPY_OPT_COMPUTED_GOTO           (1)
+#endif
+
+// Python internal features
+#define MICROPY_READER_VFS                  (1)
+#define MICROPY_ENABLE_GC                   (1)
+#define MICROPY_STACK_CHECK_MARGIN          (1024)
+#define MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF (1)
+#define MICROPY_LONGINT_IMPL                (MICROPY_LONGINT_IMPL_MPZ)
+#define MICROPY_ERROR_REPORTING             (MICROPY_ERROR_REPORTING_NORMAL)
+#define MICROPY_WARNINGS                    (1)
+#define MICROPY_FLOAT_IMPL                  (MICROPY_FLOAT_IMPL_FLOAT)
+#define MICROPY_STREAMS_POSIX_API           (1)
+#define MICROPY_USE_INTERNAL_ERRNO          (0) // errno.h from xtensa-esp32-elf/sys-include/sys
+#define MICROPY_USE_INTERNAL_PRINTF         (0) // ESP32 SDK requires its own printf
+#define MICROPY_VFS                         (1)
+
+// control over Python builtins
+#define MICROPY_PY_STR_BYTES_CMP_WARN       (1)
+#define MICROPY_PY_ALL_INPLACE_SPECIAL_METHODS (1)
+#define MICROPY_PY_BUILTINS_HELP_TEXT       esp32_help_text
+#define MICROPY_PY_IO_BUFFEREDWRITER        (1)
+#define MICROPY_PY_TIME_GMTIME_LOCALTIME_MKTIME (1)
+#define MICROPY_PY_TIME_TIME_TIME_NS        (1)
+#define MICROPY_PY_TIME_INCLUDEFILE         "ports/esp32/modtime.c"
+#define MICROPY_PY_THREAD                   (1)
+#define MICROPY_PY_THREAD_GIL               (1)
+#define MICROPY_PY_THREAD_GIL_VM_DIVISOR    (32)
+
+#define MICROPY_GC_SPLIT_HEAP               (1)
+#define MICROPY_GC_SPLIT_HEAP_AUTO          (1)
+
+// extended modules
+#ifndef MICROPY_PY_ESPNOW
+#define MICROPY_PY_ESPNOW                   (1)
+#endif
+#ifndef MICROPY_PY_BLUETOOTH
+#define MICROPY_PY_BLUETOOTH                (1)
+#define MICROPY_PY_BLUETOOTH_USE_SYNC_EVENTS (1)
+#define MICROPY_PY_BLUETOOTH_USE_SYNC_EVENTS_WITH_INTERLOCK (1)
+// Event stack size is the RTOS stack size minus an allowance for the stack used
+// by the NimBLE functions that call into invoke_irq_handler().
+// MICROPY_STACK_CHECK_MARGIN is further subtracted from this value to set the stack limit.
+#define MICROPY_PY_BLUETOOTH_SYNC_EVENT_STACK_SIZE (CONFIG_BT_NIMBLE_TASK_STACK_SIZE - 1024)
+#define MICROPY_PY_BLUETOOTH_ENABLE_CENTRAL_MODE (1)
+#define MICROPY_PY_BLUETOOTH_ENABLE_PAIRING_BONDING (1)
+#define MICROPY_BLUETOOTH_NIMBLE            (1)
+#define MICROPY_BLUETOOTH_NIMBLE_BINDINGS_ONLY (1)
+#endif
+#define MICROPY_PY_HASHLIB_MD5              (1)
+#define MICROPY_PY_HASHLIB_SHA1             (1)
+#define MICROPY_PY_HASHLIB_SHA256           (1)
+#define MICROPY_PY_CRYPTOLIB                (1)
+#define MICROPY_PY_RANDOM_SEED_INIT_FUNC    (esp_random())
+#define MICROPY_PY_OS_INCLUDEFILE           "ports/esp32/modos.c"
+#define MICROPY_PY_OS_DUPTERM               (1)
+#define MICROPY_PY_OS_DUPTERM_NOTIFY        (1)
+#define MICROPY_PY_OS_SYNC                  (1)
+#define MICROPY_PY_OS_UNAME                 (1)
+#define MICROPY_PY_OS_URANDOM               (1)
+#define MICROPY_PY_MACHINE                  (1)
+#define MICROPY_PY_MACHINE_INCLUDEFILE      "ports/esp32/modmachine.c"
+#define MICROPY_PY_MACHINE_RESET            (1)
+#define MICROPY_PY_MACHINE_BARE_METAL_FUNCS (1)
+#define MICROPY_PY_MACHINE_DISABLE_IRQ_ENABLE_IRQ (1)
+#define MICROPY_PY_MACHINE_ADC              (1)
+#define MICROPY_PY_MACHINE_ADC_INCLUDEFILE  "ports/esp32/machine_adc.c"
+#define MICROPY_PY_MACHINE_ADC_ATTEN_WIDTH  (1)
+#define MICROPY_PY_MACHINE_ADC_INIT         (1)
+#define MICROPY_PY_MACHINE_ADC_READ         (1)
+#define MICROPY_PY_MACHINE_ADC_READ_UV      (1)
+#define MICROPY_PY_MACHINE_ADC_BLOCK        (1)
+#define MICROPY_PY_MACHINE_ADC_BLOCK_INCLUDEFILE "ports/esp32/machine_adc_block.c"
+#define MICROPY_PY_MACHINE_PIN_MAKE_NEW     mp_pin_make_new
+#define MICROPY_PY_MACHINE_BITSTREAM        (1)
+#define MICROPY_PY_MACHINE_DHT_READINTO     (1)
+#define MICROPY_PY_MACHINE_PULSE            (1)
+#define MICROPY_PY_MACHINE_PWM              (1)
+#define MICROPY_PY_MACHINE_PWM_DUTY         (1)
+#define MICROPY_PY_MACHINE_PWM_INCLUDEFILE  "ports/esp32/machine_pwm.c"
+#define MICROPY_PY_MACHINE_I2C              (1)
+#define MICROPY_PY_MACHINE_I2C_TRANSFER_WRITE1 (1)
+#define MICROPY_PY_MACHINE_SOFTI2C          (1)
+#define MICROPY_PY_MACHINE_SPI              (1)
+#define MICROPY_PY_MACHINE_SOFTSPI          (1)
+#ifndef MICROPY_PY_MACHINE_DAC
+#define MICROPY_PY_MACHINE_DAC              (SOC_DAC_SUPPORTED)
+#endif
+#ifndef MICROPY_PY_MACHINE_I2S
+#define MICROPY_PY_MACHINE_I2S              (SOC_I2S_SUPPORTED)
+#endif
+#define MICROPY_PY_MACHINE_I2S_INCLUDEFILE  "ports/esp32/machine_i2s.c"
+#define MICROPY_PY_MACHINE_I2S_FINALISER    (1)
+#define MICROPY_PY_MACHINE_I2S_CONSTANT_RX  (I2S_DIR_RX)
+#define MICROPY_PY_MACHINE_I2S_CONSTANT_TX  (I2S_DIR_TX)
+#define MICROPY_PY_MACHINE_UART             (1)
+#define MICROPY_PY_MACHINE_UART_INCLUDEFILE "ports/esp32/machine_uart.c"
+#define MICROPY_PY_MACHINE_UART_SENDBREAK   (1)
+#define MICROPY_PY_MACHINE_UART_IRQ         (1)
+#define MICROPY_PY_MACHINE_WDT              (1)
+#define MICROPY_PY_MACHINE_WDT_INCLUDEFILE  "ports/esp32/machine_wdt.c"
+#define MICROPY_PY_NETWORK (1)
+#ifndef MICROPY_PY_NETWORK_HOSTNAME_DEFAULT
+#if CONFIG_IDF_TARGET_ESP32
+#define MICROPY_PY_NETWORK_HOSTNAME_DEFAULT "mpy-esp32"
+#elif CONFIG_IDF_TARGET_ESP32S2
+#define MICROPY_PY_NETWORK_HOSTNAME_DEFAULT "mpy-esp32s2"
+#elif CONFIG_IDF_TARGET_ESP32S3
+#define MICROPY_PY_NETWORK_HOSTNAME_DEFAULT "mpy-esp32s3"
+#elif CONFIG_IDF_TARGET_ESP32C3
+#define MICROPY_PY_NETWORK_HOSTNAME_DEFAULT "mpy-esp32c3"
+#elif CONFIG_IDF_TARGET_ESP32C6
+#define MICROPY_PY_NETWORK_HOSTNAME_DEFAULT "mpy-esp32c6"
+#endif
+#endif
+#define MICROPY_PY_NETWORK_INCLUDEFILE      "ports/esp32/modnetwork.h"
+#define MICROPY_PY_NETWORK_MODULE_GLOBALS_INCLUDEFILE "ports/esp32/modnetwork_globals.h"
+#ifndef MICROPY_PY_NETWORK_WLAN
+#define MICROPY_PY_NETWORK_WLAN             (1)
+#endif
+#ifndef MICROPY_HW_ENABLE_SDCARD
+#define MICROPY_HW_ENABLE_SDCARD            (1)
+#endif
+#define MICROPY_HW_SOFTSPI_MIN_DELAY        (0)
+#define MICROPY_HW_SOFTSPI_MAX_BAUDRATE     (esp_rom_get_cpu_ticks_per_us() * 1000000 / 200) // roughly
+#define MICROPY_PY_SSL                      (1)
+#define MICROPY_SSL_MBEDTLS                 (1)
+#define MICROPY_PY_WEBSOCKET                (1)
+#define MICROPY_PY_WEBREPL                  (1)
+#define MICROPY_PY_ONEWIRE                  (1)
+#define MICROPY_PY_SOCKET_EVENTS            (MICROPY_PY_WEBREPL)
+#define MICROPY_PY_BLUETOOTH_RANDOM_ADDR    (1)
+
+// fatfs configuration
+#define MICROPY_FATFS_ENABLE_LFN            (1)
+#define MICROPY_FATFS_RPATH                 (2)
+#define MICROPY_FATFS_MAX_SS                (4096)
+#define MICROPY_FATFS_LFN_CODE_PAGE         437 /* 1=SFN/ANSI 437=LFN/U.S.(OEM) */
+
+// task size
+#ifndef MICROPY_TASK_STACK_SIZE
+#define MICROPY_TASK_STACK_SIZE             (16 * 1024)
+#endif
+
+#define MP_STATE_PORT MP_STATE_VM
+
+#ifndef MICROPY_HW_ENABLE_USBDEV
+#define MICROPY_HW_ENABLE_USBDEV            (SOC_USB_OTG_SUPPORTED)
+#endif
+
+#if MICROPY_HW_ENABLE_USBDEV
+#define MICROPY_SCHEDULER_STATIC_NODES        (1)
+#define MICROPY_HW_USB_CDC_DTR_RTS_BOOTLOADER (1)
+
+#ifndef MICROPY_HW_USB_VID
+#define USB_ESPRESSIF_VID 0x303A
+#if CONFIG_TINYUSB_DESC_USE_ESPRESSIF_VID
+#define MICROPY_HW_USB_VID  (USB_ESPRESSIF_VID)
+#else
+#define MICROPY_HW_USB_VID  (CONFIG_TINYUSB_DESC_CUSTOM_VID)
+#endif
+#endif
+
+#ifndef MICROPY_HW_USB_PID
+#if CONFIG_TINYUSB_DESC_USE_DEFAULT_PID
+#define _PID_MAP(itf, n) ((CFG_TUD_##itf) << (n))
+// A combination of interfaces must have a unique product id, since PC will save device driver after the first plug.
+// Same VID/PID with different interface e.g MSC (first), then CDC (later) will possibly cause system error on PC.
+// Auto ProductID layout's Bitmap:
+//   [MSB]         HID | MSC | CDC          [LSB]
+#define USB_TUSB_PID (0x4000 | _PID_MAP(CDC, 0) | _PID_MAP(MSC, 1) | _PID_MAP(HID, 2) | \
+    _PID_MAP(MIDI, 3))  // | _PID_MAP(AUDIO, 4) | _PID_MAP(VENDOR, 5) )
+#define MICROPY_HW_USB_PID  (USB_TUSB_PID)
+#else
+#define MICROPY_HW_USB_PID  (CONFIG_TINYUSB_DESC_CUSTOM_PID)
+#endif
+#endif
+
+#ifndef MICROPY_HW_USB_MANUFACTURER_STRING
+#ifdef CONFIG_TINYUSB_DESC_MANUFACTURER_STRING
+#define MICROPY_HW_USB_MANUFACTURER_STRING CONFIG_TINYUSB_DESC_MANUFACTURER_STRING
+#else
+#define MICROPY_HW_USB_MANUFACTURER_STRING "MicroPython"
+#endif
+#endif
+
+#ifndef MICROPY_HW_USB_PRODUCT_FS_STRING
+#ifdef CONFIG_TINYUSB_DESC_PRODUCT_STRING
+#define MICROPY_HW_USB_PRODUCT_FS_STRING CONFIG_TINYUSB_DESC_PRODUCT_STRING
+#else
+#define MICROPY_HW_USB_PRODUCT_FS_STRING "Board in FS mode"
+#endif
+#endif
+
+#endif // MICROPY_HW_ENABLE_USBDEV
+
+// Enable stdio over native USB peripheral CDC via TinyUSB
+#ifndef MICROPY_HW_USB_CDC
+#define MICROPY_HW_USB_CDC                  (MICROPY_HW_ENABLE_USBDEV)
+#endif
+
+// Enable stdio over USB Serial/JTAG peripheral
+#ifndef MICROPY_HW_ESP_USB_SERIAL_JTAG
+#define MICROPY_HW_ESP_USB_SERIAL_JTAG      (SOC_USB_SERIAL_JTAG_SUPPORTED && !MICROPY_HW_USB_CDC)
+#endif
+
+#if MICROPY_HW_USB_CDC && MICROPY_HW_ESP_USB_SERIAL_JTAG
+#error "Invalid build config: Can't enable both native USB and USB Serial/JTAG peripheral"
+#endif
+
+// type definitions for the specific machine
+
+#define MICROPY_MAKE_POINTER_CALLABLE(p) ((void *)((mp_uint_t)(p)))
+#if SOC_CPU_IDRAM_SPLIT_USING_PMP && !CONFIG_ESP_SYSTEM_PMP_IDRAM_SPLIT
+// On targets with this configuration all RAM is executable so no need for a custom commit function.
+#else
+void *esp_native_code_commit(void *, size_t, void *);
+#define MP_PLAT_COMMIT_EXEC(buf, len, reloc) esp_native_code_commit(buf, len, reloc)
+#endif
+#define MP_SSIZE_MAX (0x7fffffff)
+
+#if MICROPY_PY_SOCKET_EVENTS
+#define MICROPY_PY_SOCKET_EVENTS_HANDLER extern void socket_events_handler(void); socket_events_handler();
+#else
+#define MICROPY_PY_SOCKET_EVENTS_HANDLER
+#endif
+
+#if MICROPY_PY_THREAD
+#define MICROPY_EVENT_POLL_HOOK \
+    do { \
+        extern void mp_handle_pending(bool); \
+        mp_handle_pending(true); \
+        MICROPY_PY_SOCKET_EVENTS_HANDLER \
+        MP_THREAD_GIL_EXIT(); \
+        ulTaskNotifyTake(pdFALSE, 1); \
+        MP_THREAD_GIL_ENTER(); \
+    } while (0);
+#else
+#define MICROPY_EVENT_POLL_HOOK \
+    do { \
+        extern void mp_handle_pending(bool); \
+        mp_handle_pending(true); \
+        MICROPY_PY_SOCKET_EVENTS_HANDLER \
+        asm ("waiti 0"); \
+    } while (0);
+#endif
+
+// Functions that should go in IRAM
+// For ESP32 with SPIRAM workaround, firmware is larger and uses more static IRAM,
+// so in that configuration don't put too many functions in IRAM.
+#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND)
+#define MICROPY_WRAP_MP_BINARY_OP(f) IRAM_ATTR f
+#endif
+#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) IRAM_ATTR f
+#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) IRAM_ATTR f
+#define MICROPY_WRAP_MP_LOAD_NAME(f) IRAM_ATTR f
+#define MICROPY_WRAP_MP_MAP_LOOKUP(f) IRAM_ATTR f
+#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) IRAM_ATTR f
+#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) IRAM_ATTR f
+#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) IRAM_ATTR f
+
+#define UINT_FMT "%u"
+#define INT_FMT "%d"
+
+typedef int32_t mp_int_t; // must be pointer size
+typedef uint32_t mp_uint_t; // must be pointer size
+typedef long mp_off_t;
+// ssize_t, off_t as required by POSIX-signatured functions in stream.h
+#include <sys/types.h>
+
+// board specifics
+#define MICROPY_PY_SYS_PLATFORM "esp32"
+
+// ESP32-S3 extended IO for 47 & 48
+#ifndef MICROPY_HW_ESP32S3_EXTENDED_IO
+#define MICROPY_HW_ESP32S3_EXTENDED_IO      (1)
+#endif
+
+#ifndef MICROPY_HW_ENABLE_MDNS_QUERIES
+#define MICROPY_HW_ENABLE_MDNS_QUERIES      (1)
+#endif
+
+#ifndef MICROPY_HW_ENABLE_MDNS_RESPONDER
+#define MICROPY_HW_ENABLE_MDNS_RESPONDER    (1)
+#endif
+
+#ifndef MICROPY_BOARD_ENTER_BOOTLOADER
+// RTC has a register to trigger bootloader on these targets
+#if CONFIG_IDF_TARGET_ESP32S2 || CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32C2 || CONFIG_IDF_TARGET_ESP32C3
+#define MICROPY_ESP32_USE_BOOTLOADER_RTC    (1)
+#define MICROPY_BOARD_ENTER_BOOTLOADER(nargs, args) machine_bootloader_rtc()
+#endif
+#endif
+
+#ifdef MICROPY_BOARD_ENTER_BOOTLOADER
+#define MICROPY_PY_MACHINE_BOOTLOADER       (1)
+#else
+#define MICROPY_PY_MACHINE_BOOTLOADER       (0)
+#endif
+
+// Workaround for upstream bug https://github.com/espressif/esp-idf/issues/14273
+// Can be removed if a fix is available in supported ESP-IDF versions.
+#define MICROPY_PY_MATH_GAMMA_FIX_NEGINF (1)
+
+#ifndef MICROPY_BOARD_STARTUP
+#define MICROPY_BOARD_STARTUP boardctrl_startup
+#endif
+
+void boardctrl_startup(void);
+
+#ifndef MICROPY_PY_NETWORK_LAN
+#if CONFIG_IDF_TARGET_ESP32 || (CONFIG_ETH_USE_SPI_ETHERNET && (CONFIG_ETH_SPI_ETHERNET_KSZ8851SNL || CONFIG_ETH_SPI_ETHERNET_DM9051 || CONFIG_ETH_SPI_ETHERNET_W5500))
+#define MICROPY_PY_NETWORK_LAN              (1)
+#else
+#define MICROPY_PY_NETWORK_LAN              (0)
+#endif
+#endif
+
+#if MICROPY_PY_NETWORK_LAN && CONFIG_ETH_USE_SPI_ETHERNET
+#ifndef MICROPY_PY_NETWORK_LAN_SPI_CLOCK_SPEED_MZ
+#define MICROPY_PY_NETWORK_LAN_SPI_CLOCK_SPEED_MZ       (20)
+#endif
+#endif
+
+// The minimum string length threshold for string printing to stdout operations to be GIL-aware.
+#ifndef MICROPY_PY_STRING_TX_GIL_THRESHOLD
+#define MICROPY_PY_STRING_TX_GIL_THRESHOLD  (20)
+#endif
diff --git a/tulip/amyboard/mphalport.c b/tulip/amyboard/mphalport.c
new file mode 100644
index 000000000..8a6687946
--- /dev/null
+++ b/tulip/amyboard/mphalport.c
@@ -0,0 +1,275 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * Development of the code in this file was sponsored by Microbric Pty Ltd
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2014 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "esp_timer.h"
+
+#include "py/obj.h"
+#include "py/objstr.h"
+#include "py/stream.h"
+#include "py/mpstate.h"
+#include "py/mphal.h"
+#include "extmod/misc.h"
+#include "shared/timeutils/timeutils.h"
+#include "shared/runtime/pyexec.h"
+#ifndef TDECK
+#include "shared/tinyusb/mp_usbd.h"
+#include "shared/tinyusb/mp_usbd_cdc.h"
+#else
+#include "usb_serial_jtag.h"
+#endif
+#include "usb.h"
+#include "uart.h"
+#include "display.h"
+#if MICROPY_PY_STRING_TX_GIL_THRESHOLD < 0
+#error "MICROPY_PY_STRING_TX_GIL_THRESHOLD must be positive"
+#endif
+
+TaskHandle_t mp_main_task_handle;
+
+static uint8_t stdin_ringbuf_array[260];
+ringbuf_t stdin_ringbuf = {stdin_ringbuf_array, sizeof(stdin_ringbuf_array), 0, 0};
+
+portMUX_TYPE mp_atomic_mux = portMUX_INITIALIZER_UNLOCKED;
+
+// Check the ESP-IDF error code and raise an OSError if it's not ESP_OK.
+#if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_NORMAL
+void check_esp_err_(esp_err_t code)
+#else
+void check_esp_err_(esp_err_t code, const char *func, const int line, const char *file)
+#endif
+{
+    if (code != ESP_OK) {
+        // map esp-idf error code to posix error code
+        uint32_t pcode = -code;
+        switch (code) {
+            case ESP_ERR_NO_MEM:
+                pcode = MP_ENOMEM;
+                break;
+            case ESP_ERR_TIMEOUT:
+                pcode = MP_ETIMEDOUT;
+                break;
+            case ESP_ERR_NOT_SUPPORTED:
+                pcode = MP_EOPNOTSUPP;
+                break;
+        }
+        // construct string object
+        mp_obj_str_t *o_str = m_new_obj_maybe(mp_obj_str_t);
+        if (o_str == NULL) {
+            mp_raise_OSError(pcode);
+            return;
+        }
+        o_str->base.type = &mp_type_str;
+        #if MICROPY_ERROR_REPORTING > MICROPY_ERROR_REPORTING_NORMAL
+        char err_msg[64];
+        esp_err_to_name_r(code, err_msg, sizeof(err_msg));
+        vstr_t vstr;
+        vstr_init(&vstr, 80);
+        vstr_printf(&vstr, "0x%04X %s in function '%s' at line %d in file '%s'", code, err_msg, func, line, file);
+        o_str->data = (const byte *)vstr_null_terminated_str(&vstr);
+        #else
+        o_str->data = (const byte *)esp_err_to_name(code); // esp_err_to_name ret's ptr to const str
+        #endif
+        o_str->len = strlen((char *)o_str->data);
+        o_str->hash = qstr_compute_hash(o_str->data, o_str->len);
+        // raise
+        mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(pcode), MP_OBJ_FROM_PTR(o_str)};
+        nlr_raise(mp_obj_exception_make_new(&mp_type_OSError, 2, 0, args));
+    }
+}
+
+uintptr_t mp_hal_stdio_poll(uintptr_t poll_flags) {
+    uintptr_t ret = 0;
+    #if MICROPY_HW_ESP_USB_SERIAL_JTAG
+    usb_serial_jtag_poll_rx();
+    if ((poll_flags & MP_STREAM_POLL_RD) && ringbuf_peek(&stdin_ringbuf) != -1) {
+        ret |= MP_STREAM_POLL_RD;
+    }
+    if (poll_flags & MP_STREAM_POLL_WR) {
+        ret |= MP_STREAM_POLL_WR;
+    }
+    #endif
+    #if MICROPY_HW_USB_CDC
+    ret |= mp_usbd_cdc_poll_interfaces(poll_flags);
+    #endif
+    #if MICROPY_PY_OS_DUPTERM
+    ret |= mp_os_dupterm_poll(poll_flags);
+    #endif
+    return ret;
+}
+
+int mp_hal_stdin_rx_chr(void) {
+    for (;;) {
+        #if MICROPY_HW_ESP_USB_SERIAL_JTAG
+        usb_serial_jtag_poll_rx();
+        #endif
+        #if MICROPY_HW_USB_CDC
+        mp_usbd_cdc_poll_interfaces(0);
+        #endif
+        int c = ringbuf_get(&stdin_ringbuf);
+        if (c != -1) {
+            return c;
+        }
+        MICROPY_EVENT_POLL_HOOK
+    }
+}
+
+mp_uint_t mp_hal_stdout_tx_strn(const char *str, size_t len) {
+    if(len) {
+        display_tfb_str((unsigned char*)str, len, 0, tfb_fg_pal_color, tfb_bg_pal_color);
+    }
+    // Only release the GIL if many characters are being sent
+    mp_uint_t ret = len;
+    bool did_write = false;
+    #if MICROPY_HW_ENABLE_UART_REPL || CONFIG_ESP_CONSOLE_USB_SERIAL_JTAG_ENABLED
+    bool release_gil = len > MICROPY_PY_STRING_TX_GIL_THRESHOLD;
+    #if MICROPY_DEBUG_PRINTERS && MICROPY_DEBUG_VERBOSE && MICROPY_PY_THREAD_GIL
+    // If verbose debug output is enabled some strings are printed before the
+    // GIL mutex is set up.  When that happens, no Python code is running and
+    // therefore the interpreter doesn't care about the GIL not being ready.
+    release_gil = release_gil && (MP_STATE_VM(gil_mutex).handle != NULL);
+    #endif
+    if (release_gil) {
+        MP_THREAD_GIL_EXIT();
+    }
+    #if MICROPY_HW_ESP_USB_SERIAL_JTAG
+    usb_serial_jtag_tx_strn(str, len);
+    did_write = true;
+    #endif
+    #if MICROPY_HW_ENABLE_UART_REPL
+    uart_stdout_tx_strn(str, len);
+    did_write = true;
+    #endif
+    if (release_gil) {
+        MP_THREAD_GIL_ENTER();
+    }
+    #endif // MICROPY_HW_ENABLE_UART_REPL || CONFIG_ESP_CONSOLE_USB_SERIAL_JTAG_ENABLED
+    #if MICROPY_HW_USB_CDC
+    mp_uint_t cdc_res = mp_usbd_cdc_tx_strn(str, len);
+    if (cdc_res > 0) {
+        did_write = true;
+        ret = MIN(cdc_res, ret);
+    }
+    #endif
+    int dupterm_res = mp_os_dupterm_tx_strn(str, len);
+    if (dupterm_res >= 0) {
+        did_write = true;
+        ret = MIN((mp_uint_t)dupterm_res, ret);
+    }
+    return did_write ? ret : 0;
+}
+
+uint32_t mp_hal_ticks_ms(void) {
+    return esp_timer_get_time() / 1000;
+}
+
+uint32_t mp_hal_ticks_us(void) {
+    return esp_timer_get_time();
+}
+
+void mp_hal_delay_ms(uint32_t ms) {
+    uint64_t us = (uint64_t)ms * 1000ULL;
+    uint64_t dt;
+    uint64_t t0 = esp_timer_get_time();
+    for (;;) {
+        mp_handle_pending(true);
+        MICROPY_PY_SOCKET_EVENTS_HANDLER
+        MP_THREAD_GIL_EXIT();
+        uint64_t t1 = esp_timer_get_time();
+        dt = t1 - t0;
+        if (dt + portTICK_PERIOD_MS * 1000ULL >= us) {
+            // doing a vTaskDelay would take us beyond requested delay time
+            taskYIELD();
+            MP_THREAD_GIL_ENTER();
+            t1 = esp_timer_get_time();
+            dt = t1 - t0;
+            break;
+        } else {
+            ulTaskNotifyTake(pdFALSE, 1);
+            MP_THREAD_GIL_ENTER();
+        }
+    }
+    if (dt < us) {
+        // do the remaining delay accurately
+        mp_hal_delay_us(us - dt);
+    }
+}
+
+void mp_hal_delay_us(uint32_t us) {
+    // these constants are tested for a 240MHz clock
+    const uint32_t this_overhead = 5;
+    const uint32_t pend_overhead = 150;
+
+    // return if requested delay is less than calling overhead
+    if (us < this_overhead) {
+        return;
+    }
+    us -= this_overhead;
+
+    uint64_t t0 = esp_timer_get_time();
+    for (;;) {
+        uint64_t dt = esp_timer_get_time() - t0;
+        if (dt >= us) {
+            return;
+        }
+        if (dt + pend_overhead < us) {
+            // we have enough time to service pending events
+            // (don't use MICROPY_EVENT_POLL_HOOK because it also yields)
+            mp_handle_pending(true);
+        }
+    }
+}
+
+uint64_t mp_hal_time_ns(void) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    uint64_t ns = tv.tv_sec * 1000000000ULL;
+    ns += (uint64_t)tv.tv_usec * 1000ULL;
+    return ns;
+}
+
+// Wake up the main task if it is sleeping.
+void mp_hal_wake_main_task(void) {
+    xTaskNotifyGive(mp_main_task_handle);
+}
+
+// Wake up the main task if it is sleeping, to be called from an ISR.
+void mp_hal_wake_main_task_from_isr(void) {
+    /*
+    BaseType_t xHigherPriorityTaskWoken = pdFALSE;
+    vTaskNotifyGiveFromISR(mp_main_task_handle, &xHigherPriorityTaskWoken);
+    if (xHigherPriorityTaskWoken == pdTRUE) {
+        portYIELD_FROM_ISR();
+    }
+    */
+}
diff --git a/tulip/amyboard/pins.h b/tulip/amyboard/pins.h
new file mode 100644
index 000000000..b70af5a3a
--- /dev/null
+++ b/tulip/amyboard/pins.h
@@ -0,0 +1,31 @@
+// pins.h
+// pins (and other MCU constants) 
+
+#define SPI_LL_DATA_MAX_BIT_LEN (1 << 18)
+
+#define TOUCH_INT 16
+#define I2C_SCL 8
+#define I2C_SDA 18
+#define I2C_NUM I2C_NUM_0
+#define I2C_CLK_FREQ 400000
+#define TOUCH_RST -1
+#define ESP_INTR_FLAG_DEFAULT 0
+#define CONFIG_I2S_LRCLK 5
+#define CONFIG_I2S_BCLK 7
+#define CONFIG_I2S_DIN 6
+#define CONFIG_I2S_NUM 0
+#define MIDI_OUT_PIN UART_PIN_NO_CHANGE
+#define MIDI_IN_PIN UART_PIN_NO_CHANGE 
+
+
+#endif // MAKERFABS
+
+
+
+
+
+
+
+
+
+
diff --git a/tulip/amyboard/tasks.h b/tulip/amyboard/tasks.h
new file mode 100644
index 000000000..5627493bc
--- /dev/null
+++ b/tulip/amyboard/tasks.h
@@ -0,0 +1,87 @@
+#ifdef ESP_PLATFORM
+#include "esp_task.h"
+#ifndef TASKS_H
+#define TASKS_H
+#include "alles.h"
+
+// All of the ESP tasks that we create in one place
+//#define DISPLAY_TASK_PRIORITY (ESP_TASK_PRIO_MAX - 2) //(ESP_TASK_PRIO_MIN + 5)
+#define DISPLAY_TASK_PRIORITY (ESP_TASK_PRIO_MIN)
+
+#define USB_TASK_PRIORITY (ESP_TASK_PRIO_MIN + 1)
+#define TOUCHSCREEN_TASK_PRIORITY (ESP_TASK_PRIO_MIN + 1)
+
+//#define SEQUENCER_TASK_PRIORITY (ESP_TASK_PRIO_MIN + 1) // Can be low because it sets its own timers?
+#define SEQUENCER_TASK_PRIORITY (ESP_TASK_PRIO_MAX - 1) // Can be low because it sets its own timers?
+
+//#define TULIP_MP_TASK_PRIORITY (ESP_TASK_PRIO_MAX - 2)
+#define TULIP_MP_TASK_PRIORITY (ESP_TASK_PRIO_MIN + 1)
+
+#define MIDI_TASK_PRIORITY (ESP_TASK_PRIO_MAX - 2)
+
+//#define ALLES_TASK_PRIORITY (ESP_TASK_PRIO_MIN + 2)
+
+#define ALLES_PARSE_TASK_PRIORITY (ESP_TASK_PRIO_MIN +2)
+#define ALLES_RECEIVE_TASK_PRIORITY (ESP_TASK_PRIO_MIN + 3)
+#define ALLES_RENDER_TASK_PRIORITY (ESP_TASK_PRIO_MAX )
+#define ALLES_FILL_BUFFER_TASK_PRIORITY (ESP_TASK_PRIO_MAX )
+
+// Since display is on core0, things on core0 will be slower than things on core1
+#define DISPLAY_TASK_COREID (0)
+#define USB_TASK_COREID (1)
+#define TOUCHSCREEN_TASK_COREID  (0)
+#define TULIP_MP_TASK_COREID (1)
+#define SEQUENCER_TASK_COREID (0)
+#define MIDI_TASK_COREID (0)
+#define ALLES_TASK_COREID (1)
+#define ALLES_PARSE_TASK_COREID (0)
+#define ALLES_RECEIVE_TASK_COREID (1)
+#define ALLES_RENDER_TASK_COREID (0)
+#define ALLES_FILL_BUFFER_TASK_COREID (1)
+
+#define DISPLAY_TASK_STACK_SIZE    (4 * 1024) 
+#define USB_TASK_STACK_SIZE    (4 * 1024) 
+#define TOUCHSCREEN_TASK_STACK_SIZE (4 * 1024)
+#define TULIP_MP_TASK_STACK_SIZE      (32 * 1024)
+#define SEQUENCER_TASK_STACK_SIZE (2 * 1024)
+#define MIDI_TASK_STACK_SIZE (4 * 1024)
+#define ALLES_TASK_STACK_SIZE    (4 * 1024) 
+#define ALLES_PARSE_TASK_STACK_SIZE (8 * 1024)
+#define ALLES_RECEIVE_TASK_STACK_SIZE (4 * 1024)
+#define ALLES_RENDER_TASK_STACK_SIZE (8 * 1024)
+#define ALLES_FILL_BUFFER_TASK_STACK_SIZE (8 * 1024)
+
+#define MP_TASK_HEAP_SIZE (2 * 1024 * 1024)
+
+#define DISPLAY_TASK_NAME           "display_task"
+#define USB_TASK_NAME               "usb_task"
+#define TOUCHSCREEN_TASK_NAME       "tscreen_task"
+#define SEQUENCER_TASK_NAME         "seq_task"
+#define TULIP_MP_TASK_NAME          "tulip_mp_task"
+#define MIDI_TASK_NAME              "midi_task"
+#define ALLES_TASK_NAME             "alles_task"
+#define ALLES_PARSE_TASK_NAME       "alles_par_task"
+#define ALLES_RECEIVE_TASK_NAME     "alles_rec_task"
+#define ALLES_RENDER_TASK_NAME      "alles_r_task"
+#define ALLES_FILL_BUFFER_TASK_NAME "alles_fb_task"
+
+#define MAX_TASKS 21 // includes system tasks
+
+extern TaskHandle_t display_handle;
+extern TaskHandle_t usb_handle;
+extern TaskHandle_t sequencer_handle;
+extern TaskHandle_t touchscreen_handle;
+extern TaskHandle_t tulip_mp_handle;
+extern TaskHandle_t midi_handle;
+extern TaskHandle_t alles_handle;
+extern TaskHandle_t alles_parse_handle;
+extern TaskHandle_t alles_receive_handle;
+extern TaskHandle_t amy_render_handle;
+extern TaskHandle_t alles_fill_buffer_handle;
+extern TaskHandle_t idle_0_handle;
+extern TaskHandle_t idle_1_handle;
+// For CPU usage
+extern unsigned long last_task_counters[MAX_TASKS];
+#endif
+
+#endif
diff --git a/tulip/amyrepl/Makefile b/tulip/amyrepl/Makefile
new file mode 100644
index 000000000..e82d15b61
--- /dev/null
+++ b/tulip/amyrepl/Makefile
@@ -0,0 +1,208 @@
+################################################################################
+# Initial setup of Makefile environment.
+
+TOP = ../../micropython
+
+# Set parallel flag to # of CPUs
+CPUS ?= $(shell sysctl -n hw.ncpu || echo 1)
+MAKEFLAGS += --jobs=$(CPUS)
+
+# Select the variant to build for:
+ifdef VARIANT_DIR
+# Custom variant path - remove trailing slash and get the final component of
+# the path as the variant name.
+VARIANT ?= $(notdir $(VARIANT_DIR:/=))
+else
+# If not given on the command line, then default to standard.
+VARIANT ?= standard
+VARIANT_DIR ?= variants/$(VARIANT)
+endif
+
+ifeq ($(wildcard $(VARIANT_DIR)/.),)
+$(error Invalid VARIANT specified: $(VARIANT_DIR))
+endif
+
+# If the build directory is not given, make it reflect the variant name.
+BUILD ?= build-$(VARIANT)/tulip/obj
+
+include $(TOP)/py/mkenv.mk
+include $(VARIANT_DIR)/mpconfigvariant.mk
+
+# Use the default frozen manifest, variants may override this.
+FROZEN_MANIFEST ?= variants/manifest.py
+
+# Qstr definitions (must come before including py.mk).
+QSTR_DEFS = qstrdefsport.h
+
+# Include py core make definitions.
+include $(TOP)/py/py.mk
+include $(TOP)/extmod/extmod.mk
+
+################################################################################
+# Project specific settings and compiler/linker flags.
+
+CC = emcc
+LD = emcc
+NODE ?= node
+TERSER ?= npx terser
+
+INC += -I.
+INC += -I$(TOP)
+INC += -I$(BUILD)
+INC += -I$(VARIANT_DIR)
+INC += -I../shared/
+INC += -I../shared/ulab/code/
+INC += -I../shared/desktop/
+
+
+ULAB_DIR = ../shared/ulab/code
+EXTMOD_SRC_C += $(addprefix $(ULAB_DIR)/, \
+	scipy/integrate/integrate.c \
+	scipy/linalg/linalg.c \
+	scipy/optimize/optimize.c \
+	scipy/signal/signal.c \
+	scipy/special/special.c \
+	ndarray_operators.c \
+	ulab_tools.c \
+	ndarray.c \
+	numpy/ndarray/ndarray_iter.c \
+	ndarray_properties.c \
+	numpy/approx.c \
+	numpy/bitwise.c \
+	numpy/compare.c \
+	numpy/carray/carray.c \
+	numpy/carray/carray_tools.c \
+	numpy/create.c \
+	numpy/fft/fft.c \
+	numpy/fft/fft_tools.c \
+	numpy/filter.c \
+	numpy/io/io.c \
+	numpy/linalg/linalg.c \
+	numpy/linalg/linalg_tools.c \
+	numpy/numerical.c \
+	numpy/poly.c \
+	numpy/random/random.c \
+	numpy/stats.c \
+	numpy/transform.c \
+	numpy/vector.c \
+	numpy/numpy.c \
+	scipy/scipy.c \
+	user/user.c \
+	utils/utils.c \
+	ulab.c \
+	)
+
+
+CFLAGS += -std=c99 -Wall -Werror -Wdouble-promotion -Wfloat-conversion
+CFLAGS += -Os -DNDEBUG  -DMODULE_ULAB_ENABLED=1 -DAMY_IS_EXTERNAL
+CFLAGS += $(INC)
+
+EXPORTED_FUNCTIONS_EXTRA += ,\
+	_mp_js_do_exec,\
+	_mp_js_do_exec_async,\
+	_mp_js_do_import,\
+	_mp_js_register_js_module,\
+	_proxy_c_free_obj,\
+	_proxy_c_init,\
+	_proxy_c_to_js_call,\
+	_proxy_c_to_js_delete_attr,\
+	_proxy_c_to_js_dir,\
+	_proxy_c_to_js_get_array,\
+	_proxy_c_to_js_get_dict,\
+	_proxy_c_to_js_get_iter,\
+	_proxy_c_to_js_get_type,\
+	_proxy_c_to_js_has_attr,\
+	_proxy_c_to_js_iternext,\
+	_proxy_c_to_js_lookup_attr,\
+	_proxy_c_to_js_resume,\
+	_proxy_c_to_js_store_attr,\
+	_proxy_convert_mp_to_js_obj_cside
+
+EXPORTED_RUNTIME_METHODS_EXTRA += ,\
+	PATH,\
+	PATH_FS,\
+	UTF8ToString,\
+	getValue,\
+	lengthBytesUTF8,\
+	setValue,\
+	stringToUTF8
+
+JSFLAGS += -s EXPORTED_FUNCTIONS="\
+	_free,\
+	_malloc,\
+	_mp_js_init,\
+	_mp_js_repl_init,\
+	_mp_js_repl_process_char,\
+	_mp_hal_get_interrupt_char,\
+	_mp_sched_keyboard_interrupt$(EXPORTED_FUNCTIONS_EXTRA)"
+JSFLAGS += -s EXPORTED_RUNTIME_METHODS="\
+	ccall,\
+	cwrap,\
+	FS$(EXPORTED_RUNTIME_METHODS_EXTRA)"
+JSFLAGS += --js-library library.js
+JSFLAGS += -s SUPPORT_LONGJMP=emscripten
+JSFLAGS += -s INITIAL_MEMORY=128mb -s TOTAL_STACK=64mb -s ALLOW_MEMORY_GROWTH=1 
+JSFLAGS += -s MODULARIZE -s EXPORT_NAME=_createMicroPythonModule -s ASYNCIFY -s ASYNCIFY_STACK_SIZE=128000 -s ASSERTIONS
+################################################################################
+# Source files and libraries.
+
+SRC_SHARED = $(addprefix shared/,\
+	runtime/interrupt_char.c \
+	runtime/stdout_helpers.c \
+	runtime/pyexec.c \
+	readline/readline.c \
+	timeutils/timeutils.c \
+	)
+
+SRC_C += \
+	lexer_dedent.c \
+	main.c \
+	modjs.c \
+	modjsffi.c \
+	mphalport.c \
+	objjsproxy.c \
+	proxy_c.c \
+
+# List of sources for qstr extraction.
+SRC_QSTR += $(SRC_C) $(SRC_SHARED) $(EXTMOD_SRC_C)
+
+SRC_JS += \
+	api.js \
+	objpyproxy.js \
+	proxy_js.js \
+
+OBJ += $(PY_O)
+OBJ += $(addprefix $(BUILD)/, $(SRC_SHARED:.c=.o))
+OBJ += $(addprefix $(BUILD)/, $(SRC_C:.c=.o))
+OBJ += $(addprefix $(BUILD)/, $(EXTMOD_SRC_C:.c=.o))
+
+################################################################################
+# Main targets.
+
+.PHONY: all repl min test test_min
+
+all: $(BUILD)/micropython.mjs
+
+$(BUILD)/micropython.mjs: $(OBJ) library.js $(SRC_JS)
+	$(ECHO) "LINK $@"
+	$(Q)emcc $(LDFLAGS) -o $@ $(OBJ) $(JSFLAGS)
+	$(Q)cat $(SRC_JS) >> $@
+
+$(BUILD)/micropython.min.mjs: $(BUILD)/micropython.mjs
+	$(TERSER) $< --compress --module -o $@
+
+repl: $(BUILD)/micropython.mjs
+	$(NODE) $<
+
+min: $(BUILD)/micropython.min.mjs
+
+test: $(BUILD)/micropython.mjs $(TOP)/tests/run-tests.py
+	cd $(TOP)/tests && MICROPY_MICROPYTHON_MJS=../ports/webassembly/$< ./run-tests.py --target webassembly
+
+test_min: $(BUILD)/micropython.min.mjs $(TOP)/tests/run-tests.py
+	cd $(TOP)/tests && MICROPY_MICROPYTHON_MJS=../ports/webassembly/$< ./run-tests.py --target webassembly
+
+################################################################################
+# Remaining make rules.
+
+include $(TOP)/py/mkrules.mk
diff --git a/tulip/amyrepl/README.md b/tulip/amyrepl/README.md
new file mode 100644
index 000000000..8a3029aa0
--- /dev/null
+++ b/tulip/amyrepl/README.md
@@ -0,0 +1,187 @@
+MicroPython WebAssembly
+=======================
+
+MicroPython for [WebAssembly](https://webassembly.org/).
+
+Dependencies
+------------
+
+Building the webassembly port bears the same requirements as the standard
+MicroPython ports with the addition of Emscripten, and optionally terser for
+the minified file.
+
+The output includes `micropython.mjs` (a JavaScript wrapper for the
+MicroPython runtime) and `micropython.wasm` (actual MicroPython compiled to
+WASM).
+
+Build instructions
+------------------
+
+In order to build `micropython.mjs`, run:
+
+    $ make
+
+To generate the minified file `micropython.min.mjs`, run:
+
+    $ make min
+
+Running with Node.js
+--------------------
+
+Access the repl with:
+
+    $ make repl
+
+This is the same as running:
+
+    $ node build-standard/micropython.mjs
+
+The initial MicroPython GC heap size may be modified using:
+
+    $ node build-standard/micropython.mjs -X heapsize=64k
+
+Where stack size may be represented in bytes, or have a `k` or `m` suffix.
+
+MicroPython scripts may be executed using:
+
+    $ node build-standard/micropython.mjs hello.py
+
+Alternatively `micropython.mjs` may by accessed by other JavaScript programs in node
+using the require command and the general API outlined below. For example:
+
+```javascript
+const mp_mjs = await import("micropython.mjs");
+const mp = await mp_mjs.loadMicroPython();
+
+mp.runPython("print('hello world')");
+```
+
+Or without await notation:
+
+```javascript
+import("micropython.mjs").then((mp_mjs) => {
+    mp_mjs.loadMicroPython().then((mp) => {
+        mp.runPython("print('hello world')");
+    });
+});
+```
+
+Running with HTML
+-----------------
+
+The following code demonstrates the simplest way to load `micropython.mjs` in a
+browser, create an interpreter context, and run some Python code:
+
+```html
+<!doctype html>
+<html>
+  <head>
+    <script src="build-standard/micropython.mjs" type="module"></script>
+  </head>
+  <body>
+    <script type="module">
+      const mp = await loadMicroPython();
+      mp.runPython("print('hello world')");
+    </script>
+  </body>
+</html>
+```
+
+The output in the above example will go to the JavaScript console.  It's possible
+to instead capture the output and print it somewhere else, for example in an
+HTML element.  The following example shows how to do this, and also demonstrates
+the use of top-level await and the `js` module:
+
+```html
+<!doctype html>
+<html>
+  <head>
+    <script src="build-standard/micropython.mjs" type="module"></script>
+  </head>
+  <body>
+    <pre id="micropython-stdout"></pre>
+    <script type="module">
+      const stdoutWriter = (line) => {
+        document.getElementById("micropython-stdout").innerText += line + "\n";
+      };
+      const mp = await loadMicroPython({stdout:stdoutWriter});
+      await mp.runPythonAsync(`
+        import js
+        url = "https://api.github.com/users/micropython"
+        print(f"fetching {url}...")
+        res = await js.fetch(url)
+        json = await res.json()
+        for i in dir(json):
+          print(f"{i}: {json[i]}")
+      `);
+    </script>
+  </body>
+</html>
+```
+
+MicroPython code execution will suspend the browser so be sure to atomize usage
+within this environment. Unfortunately interrupts have not been implemented for the
+browser.
+
+Testing
+-------
+
+Run the test suite using:
+
+    $ make test
+
+API
+---
+
+The following functions have been exposed to JavaScript through the interpreter
+context, created and returned by `loadMicroPython()`.
+
+- `PyProxy`: the type of the object that proxies Python objects.
+
+- `FS`: the Emscripten filesystem object.
+
+- `globals`: an object exposing the globals from the Python `__main__` module,
+  with methods `get(key)`, `set(key, value)` and `delete(key)`.
+
+- `registerJsModule(name, module)`: register a JavaScript object as importable
+  from Python with the given name.
+
+- `pyimport`: import a Python module and return it.
+
+- `runPython(code)`: execute Python code and return the result.
+
+- `runPythonAsync(code)`: execute Python code and return the result, allowing for
+  top-level await expressions (this call must be await'ed on the JavaScript side).
+
+- `replInit()`: initialise the REPL.
+
+- `replProcessChar(chr)`: process an incoming character at the REPL.
+
+- `replProcessCharWithAsyncify(chr)`: process an incoming character at the REPL,
+  for use when ASYNCIFY is enabled.
+
+Type conversions
+----------------
+
+Read-only objects (booleanns, numbers, strings, etc) are converted when passed between
+Python and JavaScript.  The conversions are:
+
+- JavaScript `null` converts to/from Python `None`.
+- JavaScript `undefined` converts to/from Python `js.undefined`.
+
+The conversion between `null` and `None` matches the behaviour of the Python `json`
+module.
+
+Proxying
+--------
+
+A Python `dict` instance is proxied such that:
+
+    for (const key in dict) {
+        print(key, dict[key]);
+    }
+
+works as expected on the JavaScript side and iterates through the keys of the
+Python `dict`.  Furthermore, when JavaScript accesses a key that does not exist
+in the Python dict, the JavaScript code receives `undefined` instead of a
+`KeyError` exception being raised.
diff --git a/tulip/amyrepl/api.js b/tulip/amyrepl/api.js
new file mode 100644
index 000000000..5510058cf
--- /dev/null
+++ b/tulip/amyrepl/api.js
@@ -0,0 +1,298 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Options:
+// - pystack: size in words of the MicroPython Python stack.
+// - heapsize: size in bytes of the MicroPython GC heap.
+// - url: location to load `micropython.mjs`.
+// - stdin: function to return input characters.
+// - stdout: function that takes one argument, and is passed lines of stdout
+//   output as they are produced.  By default this is handled by Emscripten
+//   and in a browser goes to console, in node goes to process.stdout.write.
+// - stderr: same behaviour as stdout but for error output.
+// - linebuffer: whether to buffer line-by-line to stdout/stderr.
+export async function loadMicroPython(options) {
+    const { pystack, heapsize, url, stdin, stdout, stderr, linebuffer } =
+        Object.assign(
+            { pystack: 2 * 1024, heapsize: 1024 * 1024, linebuffer: true },
+            options,
+        );
+    let Module = {};
+    Module.locateFile = (path, scriptDirectory) =>
+        url || scriptDirectory + path;
+    Module._textDecoder = new TextDecoder();
+    if (stdin !== undefined) {
+        Module.stdin = stdin;
+    }
+    if (stdout !== undefined) {
+        if (linebuffer) {
+            Module._stdoutBuffer = [];
+            Module.stdout = (c) => {
+                if (c === 10) {
+                    stdout(
+                        Module._textDecoder.decode(
+                            new Uint8Array(Module._stdoutBuffer),
+                        ),
+                    );
+                    Module._stdoutBuffer = [];
+                } else {
+                    Module._stdoutBuffer.push(c);
+                }
+            };
+        } else {
+            Module.stdout = (c) => stdout(new Uint8Array([c]));
+        }
+    }
+    if (stderr !== undefined) {
+        if (linebuffer) {
+            Module._stderrBuffer = [];
+            Module.stderr = (c) => {
+                if (c === 10) {
+                    stderr(
+                        Module._textDecoder.decode(
+                            new Uint8Array(Module._stderrBuffer),
+                        ),
+                    );
+                    Module._stderrBuffer = [];
+                } else {
+                    Module._stderrBuffer.push(c);
+                }
+            };
+        } else {
+            Module.stderr = (c) => stderr(new Uint8Array([c]));
+        }
+    }
+    Module = await _createMicroPythonModule(Module);
+    globalThis.Module = Module;
+    proxy_js_init();
+    const pyimport = (name) => {
+        const value = Module._malloc(3 * 4);
+        Module.ccall(
+            "mp_js_do_import",
+            "null",
+            ["string", "pointer"],
+            [name, value],
+        );
+        return proxy_convert_mp_to_js_obj_jsside_with_free(value);
+    };
+    Module.ccall(
+        "mp_js_init",
+        "null",
+        ["number", "number"],
+        [pystack, heapsize],
+    );
+    Module.ccall("proxy_c_init", "null", [], []);
+    return {
+        _module: Module,
+        PyProxy: PyProxy,
+        FS: Module.FS,
+        globals: {
+            __dict__: pyimport("__main__").__dict__,
+            get(key) {
+                return this.__dict__[key];
+            },
+            set(key, value) {
+                this.__dict__[key] = value;
+            },
+            delete(key) {
+                delete this.__dict__[key];
+            },
+        },
+        registerJsModule(name, module) {
+            const value = Module._malloc(3 * 4);
+            proxy_convert_js_to_mp_obj_jsside(module, value);
+            Module.ccall(
+                "mp_js_register_js_module",
+                "null",
+                ["string", "pointer"],
+                [name, value],
+            );
+            Module._free(value);
+        },
+        pyimport: pyimport,
+        runPython(code) {
+            const len = Module.lengthBytesUTF8(code);
+            const buf = Module._malloc(len + 1);
+            Module.stringToUTF8(code, buf, len + 1);
+            const value = Module._malloc(3 * 4);
+            Module.ccall(
+                "mp_js_do_exec",
+                "number",
+                ["pointer", "number", "pointer"],
+                [buf, len, value],
+            );
+            Module._free(buf);
+            return proxy_convert_mp_to_js_obj_jsside_with_free(value);
+        },
+        runPythonAsync(code) {
+            const len = Module.lengthBytesUTF8(code);
+            const buf = Module._malloc(len + 1);
+            Module.stringToUTF8(code, buf, len + 1);
+            const value = Module._malloc(3 * 4);
+            Module.ccall(
+                "mp_js_do_exec_async",
+                "number",
+                ["pointer", "number", "pointer"],
+                [buf, len, value],
+                { async: true },
+            );
+            Module._free(buf);
+            const ret = proxy_convert_mp_to_js_obj_jsside_with_free(value);
+            if (ret instanceof PyProxyThenable) {
+                return Promise.resolve(ret);
+            }
+            return ret;
+        },
+        replInit() {
+            Module.ccall("mp_js_repl_init", "null", ["null"]);
+        },
+        replProcessChar(chr) {
+            return Module.ccall(
+                "mp_js_repl_process_char",
+                "number",
+                ["number"],
+                [chr],
+            );
+        },
+        // Needed if the GC/asyncify is enabled.
+        async replProcessCharWithAsyncify(chr) {
+            return Module.ccall(
+                "mp_js_repl_process_char",
+                "number",
+                ["number"],
+                [chr],
+                { async: true },
+            );
+        },
+    };
+}
+
+globalThis.loadMicroPython = loadMicroPython;
+
+async function runCLI() {
+    const fs = await import("fs");
+    let heap_size = 128 * 1024;
+    let contents = "";
+    let repl = true;
+
+    for (let i = 2; i < process.argv.length; i++) {
+        if (process.argv[i] === "-X" && i < process.argv.length - 1) {
+            if (process.argv[i + 1].includes("heapsize=")) {
+                heap_size = parseInt(process.argv[i + 1].split("heapsize=")[1]);
+                const suffix = process.argv[i + 1].substr(-1).toLowerCase();
+                if (suffix === "k") {
+                    heap_size *= 1024;
+                } else if (suffix === "m") {
+                    heap_size *= 1024 * 1024;
+                }
+                ++i;
+            }
+        } else {
+            contents += fs.readFileSync(process.argv[i], "utf8");
+            repl = false;
+        }
+    }
+
+    if (process.stdin.isTTY === false) {
+        contents = fs.readFileSync(0, "utf8");
+        repl = false;
+    }
+
+    const mp = await loadMicroPython({
+        heapsize: heap_size,
+        stdout: (data) => process.stdout.write(data),
+        linebuffer: false,
+    });
+
+    if (repl) {
+        mp.replInit();
+        process.stdin.setRawMode(true);
+        process.stdin.on("data", (data) => {
+            for (let i = 0; i < data.length; i++) {
+                mp.replProcessCharWithAsyncify(data[i]).then((result) => {
+                    if (result) {
+                        process.exit();
+                    }
+                });
+            }
+        });
+    } else {
+        // If the script to run ends with a running of the asyncio main loop, then inject
+        // a simple `asyncio.run` hook that starts the main task.  This is primarily to
+        // support running the standard asyncio tests.
+        if (contents.endsWith("asyncio.run(main())\n")) {
+            const asyncio = mp.pyimport("asyncio");
+            asyncio.run = async (task) => {
+                await asyncio.create_task(task);
+            };
+        }
+
+        try {
+            mp.runPython(contents);
+        } catch (error) {
+            if (error.name === "PythonError") {
+                if (error.type === "SystemExit") {
+                    // SystemExit, this is a valid exception to successfully end a script.
+                } else {
+                    // An unhandled Python exception, print in out.
+                    console.error(error.message);
+                }
+            } else {
+                // A non-Python exception.  Re-raise it.
+                throw error;
+            }
+        }
+    }
+}
+
+// Check if Node is running (equivalent to ENVIRONMENT_IS_NODE).
+if (
+    typeof process === "object" &&
+    typeof process.versions === "object" &&
+    typeof process.versions.node === "string"
+) {
+    // Check if this module is run from the command line via `node micropython.mjs`.
+    //
+    // See https://stackoverflow.com/questions/6398196/detect-if-called-through-require-or-directly-by-command-line/66309132#66309132
+    //
+    // Note:
+    // - `resolve()` is used to handle symlinks
+    // - `includes()` is used to handle cases where the file extension was omitted when passed to node
+
+    if (process.argv.length > 1) {
+        const path = await import("path");
+        const url = await import("url");
+
+        const pathToThisFile = path.resolve(url.fileURLToPath(import.meta.url));
+        const pathPassedToNode = path.resolve(process.argv[1]);
+        const isThisFileBeingRunViaCLI =
+            pathToThisFile.includes(pathPassedToNode);
+
+        if (isThisFileBeingRunViaCLI) {
+            runCLI();
+        }
+    }
+}
diff --git a/tulip/amyrepl/asyncio/__init__.py b/tulip/amyrepl/asyncio/__init__.py
new file mode 100644
index 000000000..ba1ca6351
--- /dev/null
+++ b/tulip/amyrepl/asyncio/__init__.py
@@ -0,0 +1,9 @@
+# MicroPython asyncio module, for use with webassembly port
+# MIT license; Copyright (c) 2024 Damien P. George
+
+from .core import *
+from .funcs import wait_for, wait_for_ms, gather
+from .event import Event
+from .lock import Lock
+
+__version__ = (3, 0, 0)
diff --git a/tulip/amyrepl/asyncio/core.py b/tulip/amyrepl/asyncio/core.py
new file mode 100644
index 000000000..47846fc25
--- /dev/null
+++ b/tulip/amyrepl/asyncio/core.py
@@ -0,0 +1,258 @@
+# MicroPython asyncio module, for use with webassembly port
+# MIT license; Copyright (c) 2019-2024 Damien P. George
+
+from time import ticks_ms as ticks, ticks_diff, ticks_add
+import sys, js, jsffi
+
+# Import TaskQueue and Task from built-in C code.
+from _asyncio import TaskQueue, Task
+
+
+################################################################################
+# Exceptions
+
+
+class CancelledError(BaseException):
+    pass
+
+
+class TimeoutError(Exception):
+    pass
+
+
+# Used when calling Loop.call_exception_handler.
+_exc_context = {"message": "Task exception wasn't retrieved", "exception": None, "future": None}
+
+
+################################################################################
+# Sleep functions
+
+
+# "Yield" once, then raise StopIteration
+class SingletonGenerator:
+    def __init__(self):
+        self.state = None
+        self.exc = StopIteration()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.state is not None:
+            _task_queue.push(cur_task, self.state)
+            self.state = None
+            return None
+        else:
+            self.exc.__traceback__ = None
+            raise self.exc
+
+
+# Pause task execution for the given time (integer in milliseconds, uPy extension)
+# Use a SingletonGenerator to do it without allocating on the heap
+def sleep_ms(t, sgen=SingletonGenerator()):
+    assert sgen.state is None
+    sgen.state = ticks_add(ticks(), max(0, t))
+    return sgen
+
+
+# Pause task execution for the given time (in seconds)
+def sleep(t):
+    return sleep_ms(int(t * 1000))
+
+
+################################################################################
+# Main run loop
+
+asyncio_timer = None
+
+
+class TopLevelCoro:
+    @staticmethod
+    def set(resolve, reject):
+        TopLevelCoro.resolve = resolve
+        TopLevelCoro.reject = reject
+
+    @staticmethod
+    def send(value):
+        TopLevelCoro.resolve()
+
+
+class ThenableEvent:
+    def __init__(self, thenable):
+        self.result = None  # Result of the thenable
+        self.waiting = None  # Task waiting on completion of this thenable
+        thenable.then(self.set)
+
+    def set(self, value=None):
+        # Thenable/Promise is fulfilled, set result and schedule any waiting task.
+        self.result = value
+        if self.waiting:
+            _task_queue.push(self.waiting)
+            self.waiting = None
+
+    def remove(self, task):
+        self.waiting = None
+
+    # async
+    def wait(self):
+        # Set the calling task as the task waiting on this thenable.
+        self.waiting = cur_task
+        # Set calling task's data to this object so it can be removed if needed.
+        cur_task.data = self
+        # Wait for the thenable to fulfill.
+        yield
+        # Return the result of the thenable.
+        return self.result
+
+
+# Ensure the awaitable is a task
+def _promote_to_task(aw):
+    return aw if isinstance(aw, Task) else create_task(aw)
+
+
+def _schedule_run_iter(dt):
+    global asyncio_timer
+    if asyncio_timer is not None:
+        js.clearTimeout(asyncio_timer)
+    asyncio_timer = js.setTimeout(_run_iter, dt)
+
+
+def _run_iter():
+    global cur_task
+    excs_all = (CancelledError, Exception)  # To prevent heap allocation in loop
+    excs_stop = (CancelledError, StopIteration)  # To prevent heap allocation in loop
+    while True:
+        # Wait until the head of _task_queue is ready to run
+        t = _task_queue.peek()
+        if t:
+            # A task waiting on _task_queue; "ph_key" is time to schedule task at
+            dt = max(0, ticks_diff(t.ph_key, ticks()))
+        else:
+            # No tasks can be woken so finished running
+            cur_task = _top_level_task
+            return
+
+        if dt > 0:
+            # schedule to call again later
+            cur_task = _top_level_task
+            _schedule_run_iter(dt)
+            return
+
+        # Get next task to run and continue it
+        t = _task_queue.pop()
+        cur_task = t
+        try:
+            # Continue running the coroutine, it's responsible for rescheduling itself
+            exc = t.data
+            if not exc:
+                t.coro.send(None)
+            else:
+                # If the task is finished and on the run queue and gets here, then it
+                # had an exception and was not await'ed on.  Throwing into it now will
+                # raise StopIteration and the code below will catch this and run the
+                # call_exception_handler function.
+                t.data = None
+                t.coro.throw(exc)
+        except excs_all as er:
+            # Check the task is not on any event queue
+            assert t.data is None
+            # This task is done.
+            if t.state:
+                # Task was running but is now finished.
+                waiting = False
+                if t.state is True:
+                    # "None" indicates that the task is complete and not await'ed on (yet).
+                    t.state = None
+                elif callable(t.state):
+                    # The task has a callback registered to be called on completion.
+                    t.state(t, er)
+                    t.state = False
+                    waiting = True
+                else:
+                    # Schedule any other tasks waiting on the completion of this task.
+                    while t.state.peek():
+                        _task_queue.push(t.state.pop())
+                        waiting = True
+                    # "False" indicates that the task is complete and has been await'ed on.
+                    t.state = False
+                if not waiting and not isinstance(er, excs_stop):
+                    # An exception ended this detached task, so queue it for later
+                    # execution to handle the uncaught exception if no other task retrieves
+                    # the exception in the meantime (this is handled by Task.throw).
+                    _task_queue.push(t)
+                # Save return value of coro to pass up to caller.
+                t.data = er
+            elif t.state is None:
+                # Task is already finished and nothing await'ed on the task,
+                # so call the exception handler.
+
+                # Save exception raised by the coro for later use.
+                t.data = exc
+
+                # Create exception context and call the exception handler.
+                _exc_context["exception"] = exc
+                _exc_context["future"] = t
+                Loop.call_exception_handler(_exc_context)
+
+
+# Create and schedule a new task from a coroutine.
+def create_task(coro):
+    if not hasattr(coro, "send"):
+        raise TypeError("coroutine expected")
+    t = Task(coro, globals())
+    _task_queue.push(t)
+    return t
+
+
+# Task used to suspend and resume top-level await.
+_top_level_task = Task(TopLevelCoro, globals())
+
+################################################################################
+# Event loop wrapper
+
+
+cur_task = _top_level_task
+
+
+class Loop:
+    _exc_handler = None
+
+    def create_task(coro):
+        return create_task(coro)
+
+    def close():
+        pass
+
+    def set_exception_handler(handler):
+        Loop._exc_handler = handler
+
+    def get_exception_handler():
+        return Loop._exc_handler
+
+    def default_exception_handler(loop, context):
+        print(context["message"], file=sys.stderr)
+        print("future:", context["future"], "coro=", context["future"].coro, file=sys.stderr)
+        sys.print_exception(context["exception"], sys.stderr)
+
+    def call_exception_handler(context):
+        (Loop._exc_handler or Loop.default_exception_handler)(Loop, context)
+
+
+def get_event_loop():
+    return Loop
+
+
+def current_task():
+    if cur_task is None:
+        raise RuntimeError("no running event loop")
+    return cur_task
+
+
+def new_event_loop():
+    global _task_queue
+    _task_queue = TaskQueue(_schedule_run_iter)  # TaskQueue of Task instances.
+    return Loop
+
+
+# Initialise default event loop.
+new_event_loop()
diff --git a/tulip/amyrepl/lexer_dedent.c b/tulip/amyrepl/lexer_dedent.c
new file mode 100644
index 000000000..555caea89
--- /dev/null
+++ b/tulip/amyrepl/lexer_dedent.c
@@ -0,0 +1,105 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "lexer_dedent.h"
+
+typedef struct _mp_reader_mem_dedent_t {
+    size_t free_len; // if >0 mem is freed on close by: m_free(beg, free_len)
+    const byte *beg;
+    const byte *cur;
+    const byte *end;
+    size_t dedent_prefix;
+} mp_reader_mem_dedent_t;
+
+// Work out the amount of common whitespace among all non-empty lines.
+static size_t dedent(const byte *text, size_t len) {
+    size_t min_prefix = -1;
+    size_t cur_prefix = 0;
+    bool start_of_line = true;
+    for (const byte *t = text; t < text + len; ++t) {
+        if (*t == '\n') {
+            start_of_line = true;
+            cur_prefix = 0;
+        } else if (start_of_line) {
+            if (unichar_isspace(*t)) {
+                ++cur_prefix;
+            } else {
+                if (cur_prefix < min_prefix) {
+                    min_prefix = cur_prefix;
+                    if (min_prefix == 0) {
+                        return min_prefix;
+                    }
+                }
+                start_of_line = false;
+            }
+        }
+    }
+    return min_prefix;
+}
+
+static mp_uint_t mp_reader_mem_dedent_readbyte(void *data) {
+    mp_reader_mem_dedent_t *reader = (mp_reader_mem_dedent_t *)data;
+    if (reader->cur < reader->end) {
+        byte c = *reader->cur++;
+        if (c == '\n') {
+            for (size_t i = 0; i < reader->dedent_prefix; ++i) {
+                if (*reader->cur == '\n') {
+                    break;
+                }
+                ++reader->cur;
+            }
+        }
+        return c;
+    } else {
+        return MP_READER_EOF;
+    }
+}
+
+static void mp_reader_mem_dedent_close(void *data) {
+    mp_reader_mem_dedent_t *reader = (mp_reader_mem_dedent_t *)data;
+    if (reader->free_len > 0) {
+        m_del(char, (char *)reader->beg, reader->free_len);
+    }
+    m_del_obj(mp_reader_mem_dedent_t, reader);
+}
+
+static void mp_reader_new_mem_dedent(mp_reader_t *reader, const byte *buf, size_t len, size_t free_len) {
+    mp_reader_mem_dedent_t *rm = m_new_obj(mp_reader_mem_dedent_t);
+    rm->free_len = free_len;
+    rm->beg = buf;
+    rm->cur = buf;
+    rm->end = buf + len;
+    rm->dedent_prefix = dedent(buf, len);
+    reader->data = rm;
+    reader->readbyte = mp_reader_mem_dedent_readbyte;
+    reader->close = mp_reader_mem_dedent_close;
+}
+
+mp_lexer_t *mp_lexer_new_from_str_len_dedent(qstr src_name, const char *str, size_t len, size_t free_len) {
+    mp_reader_t reader;
+    mp_reader_new_mem_dedent(&reader, (const byte *)str, len, free_len);
+    return mp_lexer_new(src_name, reader);
+}
diff --git a/tulip/amyrepl/lexer_dedent.h b/tulip/amyrepl/lexer_dedent.h
new file mode 100644
index 000000000..a8cc2526b
--- /dev/null
+++ b/tulip/amyrepl/lexer_dedent.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MICROPY_INCLUDED_WEBASSEMBLY_LEXER_DEDENT_H
+#define MICROPY_INCLUDED_WEBASSEMBLY_LEXER_DEDENT_H
+
+#include "py/lexer.h"
+
+// This function creates a new "dedenting lexer" which automatically dedents the input
+// source code if every non-empty line in that source starts with a common whitespace
+// prefix.  It does this dedenting inplace as the memory is read.
+mp_lexer_t *mp_lexer_new_from_str_len_dedent(qstr src_name, const char *str, size_t len, size_t free_len);
+
+#endif // MICROPY_INCLUDED_WEBASSEMBLY_LEXER_DEDENT_H
diff --git a/tulip/amyrepl/library.h b/tulip/amyrepl/library.h
new file mode 100644
index 000000000..04b408d71
--- /dev/null
+++ b/tulip/amyrepl/library.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017, 2018 Rami Ali
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/obj.h"
+
+extern void mp_js_write(const char *str, mp_uint_t len);
+extern int mp_js_ticks_ms(void);
+extern void mp_js_hook(void);
+extern double mp_js_time_ms(void);
+extern uint32_t mp_js_random_u32(void);
diff --git a/tulip/amyrepl/library.js b/tulip/amyrepl/library.js
new file mode 100644
index 000000000..3f6c9cb61
--- /dev/null
+++ b/tulip/amyrepl/library.js
@@ -0,0 +1,75 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2017, 2018 Rami Ali
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+mergeInto(LibraryManager.library, {
+    // This string will be emitted directly into the output file by Emscripten.
+    mp_js_ticks_ms__postset: "var MP_JS_EPOCH = Date.now()",
+
+    mp_js_ticks_ms: () => Date.now() - MP_JS_EPOCH,
+
+    mp_js_hook: () => {
+        if (ENVIRONMENT_IS_NODE) {
+            const mp_interrupt_char = Module.ccall(
+                "mp_hal_get_interrupt_char",
+                "number",
+                ["number"],
+                ["null"],
+            );
+            const fs = require("fs");
+
+            const buf = Buffer.alloc(1);
+            try {
+                const n = fs.readSync(process.stdin.fd, buf, 0, 1);
+                if (n > 0) {
+                    if (buf[0] === mp_interrupt_char) {
+                        Module.ccall(
+                            "mp_sched_keyboard_interrupt",
+                            "null",
+                            ["null"],
+                            ["null"],
+                        );
+                    } else {
+                        process.stdout.write(String.fromCharCode(buf[0]));
+                    }
+                }
+            } catch (e) {
+                if (e.code === "EAGAIN") {
+                } else {
+                    throw e;
+                }
+            }
+        }
+    },
+
+    mp_js_time_ms: () => Date.now(),
+
+    // Node prior to v19 did not expose "crypto" as a global, so make sure it exists.
+    mp_js_random_u32__postset:
+        "if (globalThis.crypto === undefined) { globalThis.crypto = require('crypto'); }",
+
+    mp_js_random_u32: () =>
+        globalThis.crypto.getRandomValues(new Uint32Array(1))[0],
+});
diff --git a/tulip/amyrepl/main.c b/tulip/amyrepl/main.c
new file mode 100644
index 000000000..c542f0cd7
--- /dev/null
+++ b/tulip/amyrepl/main.c
@@ -0,0 +1,247 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013-2021 Damien P. George and 2017, 2018 Rami Ali
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "py/builtin.h"
+#include "py/compile.h"
+#include "py/runtime.h"
+#include "py/repl.h"
+#include "py/gc.h"
+#include "py/mperrno.h"
+#include "extmod/vfs.h"
+#include "extmod/vfs_posix.h"
+#include "shared/runtime/pyexec.h"
+
+#include "emscripten.h"
+#include "lexer_dedent.h"
+#include "library.h"
+#include "proxy_c.h"
+
+// This counter tracks the current depth of calls into C code that originated
+// externally, ie from JavaScript.  When the counter is 0 that corresponds to
+// the top-level call into C.
+static size_t external_call_depth = 0;
+
+#if MICROPY_GC_SPLIT_HEAP_AUTO
+static void gc_collect_top_level(void);
+#endif
+
+void external_call_depth_inc(void) {
+    ++external_call_depth;
+    #if MICROPY_GC_SPLIT_HEAP_AUTO
+    if (external_call_depth == 1) {
+        gc_collect_top_level();
+    }
+    #endif
+}
+
+void external_call_depth_dec(void) {
+    --external_call_depth;
+}
+
+void mp_js_init(int pystack_size, int heap_size) {
+    #if MICROPY_ENABLE_PYSTACK
+    mp_obj_t *pystack = (mp_obj_t *)malloc(pystack_size * sizeof(mp_obj_t));
+    mp_pystack_init(pystack, pystack + pystack_size);
+    #endif
+
+    #if MICROPY_ENABLE_GC
+    char *heap = (char *)malloc(heap_size * sizeof(char));
+    gc_init(heap, heap + heap_size);
+    #endif
+
+    #if MICROPY_GC_SPLIT_HEAP_AUTO
+    // When MICROPY_GC_SPLIT_HEAP_AUTO is enabled, set the GC threshold to a low
+    // value so that a collection is triggered before the heap fills up.  The actual
+    // garbage collection will happen later when control returns to the top-level,
+    // via the `gc_collect_pending` flag and `gc_collect_top_level()`.
+    MP_STATE_MEM(gc_alloc_threshold) = 16 * 1024 / MICROPY_BYTES_PER_GC_BLOCK;
+    #endif
+
+    mp_init();
+
+    #if MICROPY_VFS_POSIX
+    {
+        // Mount the host FS at the root of our internal VFS
+        mp_obj_t args[2] = {
+            MP_OBJ_TYPE_GET_SLOT(&mp_type_vfs_posix, make_new)(&mp_type_vfs_posix, 0, 0, NULL),
+            MP_OBJ_NEW_QSTR(qstr_from_str("/")),
+        };
+        mp_vfs_mount(2, args, (mp_map_t *)&mp_const_empty_map);
+        MP_STATE_VM(vfs_cur) = MP_STATE_VM(vfs_mount_table);
+    }
+    mp_obj_list_append(mp_sys_path, MP_OBJ_NEW_QSTR(MP_QSTR__slash_lib));
+    #endif
+}
+
+void mp_js_register_js_module(const char *name, uint32_t *value) {
+    mp_obj_t module_name = MP_OBJ_NEW_QSTR(qstr_from_str(name));
+    mp_obj_t module = proxy_convert_js_to_mp_obj_cside(value);
+    mp_map_t *mp_loaded_modules_map = &MP_STATE_VM(mp_loaded_modules_dict).map;
+    mp_map_lookup(mp_loaded_modules_map, module_name, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = module;
+}
+
+void mp_js_do_import(const char *name, uint32_t *out) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t ret = mp_import_name(qstr_from_str(name), mp_const_none, MP_OBJ_NEW_SMALL_INT(0));
+        // Return the leaf of the import, eg for "a.b.c" return "c".
+        const char *m = name;
+        const char *n = name;
+        for (;; ++n) {
+            if (*n == '\0' || *n == '.') {
+                if (m != name) {
+                    ret = mp_load_attr(ret, qstr_from_strn(m, n - m));
+                }
+                m = n + 1;
+                if (*n == '\0') {
+                    break;
+                }
+            }
+        }
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(ret, out);
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_exc_cside(nlr.ret_val, out);
+    }
+}
+
+void mp_js_do_exec(const char *src, size_t len, uint32_t *out) {
+    external_call_depth_inc();
+    mp_parse_input_kind_t input_kind = MP_PARSE_FILE_INPUT;
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_lexer_t *lex = mp_lexer_new_from_str_len_dedent(MP_QSTR__lt_stdin_gt_, src, len, 0);
+        qstr source_name = lex->source_name;
+        mp_parse_tree_t parse_tree = mp_parse(lex, input_kind);
+        mp_obj_t module_fun = mp_compile(&parse_tree, source_name, false);
+        mp_obj_t ret = mp_call_function_0(module_fun);
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(ret, out);
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_exc_cside(nlr.ret_val, out);
+    }
+}
+
+void mp_js_do_exec_async(const char *src, size_t len, uint32_t *out) {
+    mp_compile_allow_top_level_await = true;
+    mp_js_do_exec(src, len, out);
+    mp_compile_allow_top_level_await = false;
+}
+
+void mp_js_repl_init(void) {
+    pyexec_event_repl_init();
+}
+
+int mp_js_repl_process_char(int c) {
+    external_call_depth_inc();
+    int ret = pyexec_event_repl_process_char(c);
+    external_call_depth_dec();
+    return ret;
+}
+
+#if MICROPY_GC_SPLIT_HEAP_AUTO
+
+static bool gc_collect_pending = false;
+
+// The largest new region that is available to become Python heap.
+size_t gc_get_max_new_split(void) {
+    return 128 * 1024 * 1024;
+}
+
+// Don't collect anything.  Instead require the heap to grow.
+void gc_collect(void) {
+    gc_collect_pending = true;
+}
+
+// Collect at the top-level, where there are no root pointers from stack/registers.
+static void gc_collect_top_level(void) {
+    if (gc_collect_pending) {
+        gc_collect_pending = false;
+        gc_collect_start();
+        gc_collect_end();
+    }
+}
+
+#else
+
+static void gc_scan_func(void *begin, void *end) {
+    gc_collect_root((void **)begin, (void **)end - (void **)begin + 1);
+}
+
+void gc_collect(void) {
+    gc_collect_start();
+    emscripten_scan_stack(gc_scan_func);
+    emscripten_scan_registers(gc_scan_func);
+    gc_collect_end();
+}
+
+#endif
+
+#if !MICROPY_VFS
+mp_lexer_t *mp_lexer_new_from_file(qstr filename) {
+    mp_raise_OSError(MP_ENOENT);
+}
+
+mp_import_stat_t mp_import_stat(const char *path) {
+    return MP_IMPORT_STAT_NO_EXIST;
+}
+
+mp_obj_t mp_builtin_open(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs) {
+    return mp_const_none;
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_open_obj, 1, mp_builtin_open);
+#endif
+
+void nlr_jump_fail(void *val) {
+    while (1) {
+        ;
+    }
+}
+
+void NORETURN __fatal_error(const char *msg) {
+    while (1) {
+        ;
+    }
+}
+
+#ifndef NDEBUG
+void MP_WEAK __assert_func(const char *file, int line, const char *func, const char *expr) {
+    printf("Assertion '%s' failed, at file %s:%d\n", expr, file, line);
+    __fatal_error("Assertion failed");
+}
+#endif
diff --git a/tulip/amyrepl/modjs.c b/tulip/amyrepl/modjs.c
new file mode 100644
index 000000000..bed09086a
--- /dev/null
+++ b/tulip/amyrepl/modjs.c
@@ -0,0 +1,55 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/objmodule.h"
+#include "py/runtime.h"
+#include "proxy_c.h"
+
+#if MICROPY_PY_JS
+
+/******************************************************************************/
+// js module
+
+void mp_module_js_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
+    mp_obj_jsproxy_t global_this;
+    global_this.ref = 0;
+    mp_obj_jsproxy_attr(MP_OBJ_FROM_PTR(&global_this), attr, dest);
+}
+
+static const mp_rom_map_elem_t mp_module_js_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_js) },
+};
+static MP_DEFINE_CONST_DICT(mp_module_js_globals, mp_module_js_globals_table);
+
+const mp_obj_module_t mp_module_js = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t *)&mp_module_js_globals,
+};
+
+MP_REGISTER_MODULE(MP_QSTR_js, mp_module_js);
+MP_REGISTER_MODULE_DELEGATION(mp_module_js, mp_module_js_attr);
+
+#endif // MICROPY_PY_JS
diff --git a/tulip/amyrepl/modjsffi.c b/tulip/amyrepl/modjsffi.c
new file mode 100644
index 000000000..ac3d86023
--- /dev/null
+++ b/tulip/amyrepl/modjsffi.c
@@ -0,0 +1,121 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "emscripten.h"
+#include "py/gc.h"
+#include "py/objmodule.h"
+#include "py/runtime.h"
+#include "proxy_c.h"
+
+#if MICROPY_PY_JSFFI
+
+/******************************************************************************/
+// jsffi module
+
+EM_JS(void, proxy_convert_mp_to_js_then_js_to_mp_obj_jsside, (uint32_t * out), {
+    const ret = proxy_convert_mp_to_js_obj_jsside(out);
+    proxy_convert_js_to_mp_obj_jsside_force_double_proxy(ret, out);
+});
+
+static mp_obj_t mp_jsffi_create_proxy(mp_obj_t arg) {
+    uint32_t out[3];
+    proxy_convert_mp_to_js_obj_cside(arg, out);
+    proxy_convert_mp_to_js_then_js_to_mp_obj_jsside(out);
+    return proxy_convert_js_to_mp_obj_cside(out);
+}
+static MP_DEFINE_CONST_FUN_OBJ_1(mp_jsffi_create_proxy_obj, mp_jsffi_create_proxy);
+
+EM_JS(void, proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside, (uint32_t * out), {
+    const ret = proxy_convert_mp_to_js_obj_jsside(out);
+    const js_obj = PyProxy.toJs(ret);
+    proxy_convert_js_to_mp_obj_jsside(js_obj, out);
+});
+
+static mp_obj_t mp_jsffi_to_js(mp_obj_t arg) {
+    uint32_t out[3];
+    proxy_convert_mp_to_js_obj_cside(arg, out);
+    proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside(out);
+    return proxy_convert_js_to_mp_obj_cside(out);
+}
+static MP_DEFINE_CONST_FUN_OBJ_1(mp_jsffi_to_js_obj, mp_jsffi_to_js);
+
+// *FORMAT-OFF*
+EM_JS(void, js_get_proxy_js_ref_info, (uint32_t * out), {
+    let used = 0;
+    for (const elem of proxy_js_ref) {
+        if (elem !== undefined) {
+            ++used;
+        }
+    }
+    Module.setValue(out, proxy_js_ref.length, "i32");
+    Module.setValue(out + 4, used, "i32");
+});
+// *FORMAT-ON*
+
+static mp_obj_t mp_jsffi_mem_info(void) {
+    mp_obj_list_t *l = (mp_obj_list_t *)MP_OBJ_TO_PTR(MP_STATE_PORT(proxy_c_ref));
+    mp_int_t used = 0;
+    for (size_t i = 0; i < l->len; ++i) {
+        if (l->items[i] != MP_OBJ_NULL) {
+            ++used;
+        }
+    }
+    uint32_t proxy_js_ref_info[2];
+    js_get_proxy_js_ref_info(proxy_js_ref_info);
+    gc_info_t info;
+    gc_info(&info);
+    mp_obj_t elems[] = {
+        MP_OBJ_NEW_SMALL_INT(info.total), // GC heap total bytes
+        MP_OBJ_NEW_SMALL_INT(info.used), // GC heap used bytes
+        MP_OBJ_NEW_SMALL_INT(info.free), // GC heap free bytes
+        MP_OBJ_NEW_SMALL_INT(l->len), // proxy_c_ref allocated size
+        MP_OBJ_NEW_SMALL_INT(used), // proxy_c_ref used
+        MP_OBJ_NEW_SMALL_INT(proxy_js_ref_info[0]), // proxy_js_ref allocated size
+        MP_OBJ_NEW_SMALL_INT(proxy_js_ref_info[1]), // proxy_js_ref used
+    };
+    return mp_obj_new_tuple(MP_ARRAY_SIZE(elems), elems);
+}
+static MP_DEFINE_CONST_FUN_OBJ_0(mp_jsffi_mem_info_obj, mp_jsffi_mem_info);
+
+static const mp_rom_map_elem_t mp_module_jsffi_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_jsffi) },
+
+    { MP_ROM_QSTR(MP_QSTR_JsProxy), MP_ROM_PTR(&mp_type_jsproxy) },
+    { MP_ROM_QSTR(MP_QSTR_JsException), MP_ROM_PTR(&mp_type_JsException) },
+    { MP_ROM_QSTR(MP_QSTR_create_proxy), MP_ROM_PTR(&mp_jsffi_create_proxy_obj) },
+    { MP_ROM_QSTR(MP_QSTR_to_js), MP_ROM_PTR(&mp_jsffi_to_js_obj) },
+    { MP_ROM_QSTR(MP_QSTR_mem_info), MP_ROM_PTR(&mp_jsffi_mem_info_obj) },
+};
+static MP_DEFINE_CONST_DICT(mp_module_jsffi_globals, mp_module_jsffi_globals_table);
+
+const mp_obj_module_t mp_module_jsffi = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t *)&mp_module_jsffi_globals,
+};
+
+MP_REGISTER_MODULE(MP_QSTR_jsffi, mp_module_jsffi);
+
+#endif // MICROPY_PY_JSFFI
diff --git a/tulip/amyrepl/modtime.c b/tulip/amyrepl/modtime.c
new file mode 100644
index 000000000..1b1e63d4d
--- /dev/null
+++ b/tulip/amyrepl/modtime.c
@@ -0,0 +1,51 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/obj.h"
+#include "shared/timeutils/timeutils.h"
+#include "library.h"
+
+// Return the localtime as an 8-tuple.
+static mp_obj_t mp_time_localtime_get(void) {
+    timeutils_struct_time_t tm;
+    timeutils_seconds_since_epoch_to_struct_time(mp_hal_time_ms() / 1000, &tm);
+    mp_obj_t tuple[8] = {
+        mp_obj_new_int(tm.tm_year),
+        mp_obj_new_int(tm.tm_mon),
+        mp_obj_new_int(tm.tm_mday),
+        mp_obj_new_int(tm.tm_hour),
+        mp_obj_new_int(tm.tm_min),
+        mp_obj_new_int(tm.tm_sec),
+        mp_obj_new_int(tm.tm_wday),
+        mp_obj_new_int(tm.tm_yday),
+    };
+    return mp_obj_new_tuple(8, tuple);
+}
+
+// Returns the number of seconds, as a float, since the Epoch.
+static mp_obj_t mp_time_time_get(void) {
+    return mp_obj_new_float((mp_float_t)mp_hal_time_ms() / 1000);
+}
diff --git a/tulip/amyrepl/mpconfigport.h b/tulip/amyrepl/mpconfigport.h
new file mode 100644
index 000000000..ab56162ca
--- /dev/null
+++ b/tulip/amyrepl/mpconfigport.h
@@ -0,0 +1,128 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013-2022 Damien P. George
+ * Copyright (c) 2017, 2018 Rami Ali
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Options to control how MicroPython is built for this port, overriding
+// defaults in py/mpconfig.h.
+
+#include <stdint.h>
+#include <stdlib.h> // for malloc, for MICROPY_GC_SPLIT_HEAP_AUTO
+
+// Variant-specific definitions.
+#include "mpconfigvariant.h"
+
+#ifndef MICROPY_CONFIG_ROM_LEVEL
+#define MICROPY_CONFIG_ROM_LEVEL (MICROPY_CONFIG_ROM_LEVEL_EXTRA_FEATURES)
+#endif
+
+#define MICROPY_ALLOC_PATH_MAX      (256)
+#define MICROPY_PERSISTENT_CODE_LOAD (1)
+#define MICROPY_COMP_ALLOW_TOP_LEVEL_AWAIT (1)
+#define MICROPY_READER_VFS          (MICROPY_VFS)
+#define MICROPY_ENABLE_GC           (1)
+#define MICROPY_ENABLE_PYSTACK      (1)
+#define MICROPY_STACK_CHECK         (0)
+#define MICROPY_KBD_EXCEPTION       (1)
+#define MICROPY_REPL_EVENT_DRIVEN   (1)
+#define MICROPY_LONGINT_IMPL        (MICROPY_LONGINT_IMPL_MPZ)
+#define MICROPY_ENABLE_DOC_STRING   (1)
+#define MICROPY_WARNINGS            (1)
+#define MICROPY_ERROR_PRINTER       (&mp_stderr_print)
+#define MICROPY_FLOAT_IMPL          (MICROPY_FLOAT_IMPL_DOUBLE)
+#define MICROPY_USE_INTERNAL_ERRNO  (1)
+#define MICROPY_USE_INTERNAL_PRINTF (0)
+
+#define MICROPY_EPOCH_IS_1970       (1)
+#define MICROPY_PY_ASYNCIO_TASK_QUEUE_PUSH_CALLBACK (1)
+#define MICROPY_PY_RANDOM_SEED_INIT_FUNC (mp_js_random_u32())
+#define MICROPY_PY_TIME_GMTIME_LOCALTIME_MKTIME (1)
+#define MICROPY_PY_TIME_TIME_TIME_NS (1)
+#define MICROPY_PY_TIME_INCLUDEFILE "ports/webassembly/modtime.c"
+#ifndef MICROPY_VFS
+#define MICROPY_VFS                 (1)
+#endif
+#define MICROPY_VFS_POSIX           (MICROPY_VFS)
+#define MICROPY_PY_SYS_PLATFORM     "webassembly"
+
+#ifndef MICROPY_PY_JS
+#define MICROPY_PY_JS (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
+#endif
+
+#ifndef MICROPY_PY_JSFFI
+#define MICROPY_PY_JSFFI (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
+#endif
+
+#define MICROPY_EVENT_POLL_HOOK \
+    do { \
+        extern void mp_handle_pending(bool); \
+        mp_handle_pending(true); \
+    } while (0);
+
+// Whether the VM will periodically call mp_js_hook(), which checks for
+// interrupt characters on stdin (or equivalent input).
+#ifndef MICROPY_VARIANT_ENABLE_JS_HOOK
+#define MICROPY_VARIANT_ENABLE_JS_HOOK (0)
+#endif
+
+#if MICROPY_VARIANT_ENABLE_JS_HOOK
+#define MICROPY_VM_HOOK_COUNT (10)
+#define MICROPY_VM_HOOK_INIT static uint vm_hook_divisor = MICROPY_VM_HOOK_COUNT;
+#define MICROPY_VM_HOOK_POLL if (--vm_hook_divisor == 0) { \
+        vm_hook_divisor = MICROPY_VM_HOOK_COUNT; \
+        extern void mp_js_hook(void); \
+        mp_js_hook(); \
+}
+#define MICROPY_VM_HOOK_LOOP MICROPY_VM_HOOK_POLL
+#define MICROPY_VM_HOOK_RETURN MICROPY_VM_HOOK_POLL
+#endif
+
+// type definitions for the specific machine
+
+#define MP_SSIZE_MAX (0x7fffffff)
+
+// This port is intended to be 32-bit, but unfortunately, int32_t for
+// different targets may be defined in different ways - either as int
+// or as long. This requires different printf formatting specifiers
+// to print such value. So, we avoid int32_t and use int directly.
+#define UINT_FMT "%u"
+#define INT_FMT "%d"
+typedef int mp_int_t; // must be pointer size
+typedef unsigned mp_uint_t; // must be pointer size
+typedef long mp_off_t;
+
+#define MICROPY_HW_BOARD_NAME "JS"
+#define MICROPY_HW_MCU_NAME "Emscripten"
+
+#define MP_STATE_PORT MP_STATE_VM
+
+#if MICROPY_VFS
+// _GNU_SOURCE must be defined to get definitions of DT_xxx symbols from dirent.h.
+#define _GNU_SOURCE
+#endif
+
+extern const struct _mp_print_t mp_stderr_print;
+
+uint32_t mp_js_random_u32(void);
diff --git a/tulip/amyrepl/mphalport.c b/tulip/amyrepl/mphalport.c
new file mode 100644
index 000000000..3935068b9
--- /dev/null
+++ b/tulip/amyrepl/mphalport.c
@@ -0,0 +1,79 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013, 2014 Damien P. George and 2017, 2018 Rami Ali
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include "py/mphal.h"
+#include "library.h"
+
+static void stderr_print_strn(void *env, const char *str, size_t len) {
+    (void)env;
+    write(2, str, len);
+}
+
+const mp_print_t mp_stderr_print = {NULL, stderr_print_strn};
+
+mp_uint_t mp_hal_stdout_tx_strn(const char *str, size_t len) {
+    return write(1, str, len);
+}
+
+void mp_hal_delay_ms(mp_uint_t ms) {
+    uint32_t start = mp_hal_ticks_ms();
+    while (mp_hal_ticks_ms() - start < ms) {
+    }
+}
+
+void mp_hal_delay_us(mp_uint_t us) {
+    uint32_t start = mp_hal_ticks_us();
+    while (mp_hal_ticks_us() - start < us) {
+    }
+}
+
+mp_uint_t mp_hal_ticks_us(void) {
+    return mp_js_ticks_ms() * 1000;
+}
+
+mp_uint_t mp_hal_ticks_ms(void) {
+    return mp_js_ticks_ms();
+}
+
+mp_uint_t mp_hal_ticks_cpu(void) {
+    return 0;
+}
+
+uint64_t mp_hal_time_ms(void) {
+    double mm = mp_js_time_ms();
+    return (uint64_t)mm;
+}
+
+uint64_t mp_hal_time_ns(void) {
+    return mp_hal_time_ms() * 1000000ULL;
+}
+
+extern int mp_interrupt_char;
+
+int mp_hal_get_interrupt_char(void) {
+    return mp_interrupt_char;
+}
diff --git a/tulip/amyrepl/mphalport.h b/tulip/amyrepl/mphalport.h
new file mode 100644
index 000000000..a90de8ec5
--- /dev/null
+++ b/tulip/amyrepl/mphalport.h
@@ -0,0 +1,63 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2013, 2014 Damien P. George and 2017, 2018 Rami Ali
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "py/obj.h"
+#include "shared/runtime/interrupt_char.h"
+
+#define mp_hal_stdin_rx_chr() (0)
+mp_uint_t mp_hal_stdout_tx_strn(const char *str, size_t len);
+
+void mp_hal_delay_ms(mp_uint_t ms);
+void mp_hal_delay_us(mp_uint_t us);
+mp_uint_t mp_hal_ticks_ms(void);
+mp_uint_t mp_hal_ticks_us(void);
+mp_uint_t mp_hal_ticks_cpu(void);
+uint64_t mp_hal_time_ms(void);
+
+int mp_hal_get_interrupt_char(void);
+
+#if MICROPY_VFS_POSIX
+
+#include <errno.h>
+
+// This macro is used to implement PEP 475 to retry specified syscalls on EINTR
+#define MP_HAL_RETRY_SYSCALL(ret, syscall, raise) \
+    { \
+        for (;;) { \
+            ret = syscall; \
+            if (ret == -1) { \
+                int err = errno; \
+                if (err == EINTR) { \
+                    mp_handle_pending(true); \
+                    continue; \
+                } \
+                raise; \
+            } \
+            break; \
+        } \
+    }
+
+#endif
diff --git a/tulip/amyrepl/node_run.sh b/tulip/amyrepl/node_run.sh
new file mode 100755
index 000000000..466ffe39e
--- /dev/null
+++ b/tulip/amyrepl/node_run.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+node $(dirname $0)/build/micropython.js "$@"
diff --git a/tulip/amyrepl/objjsproxy.c b/tulip/amyrepl/objjsproxy.c
new file mode 100644
index 000000000..167d4382b
--- /dev/null
+++ b/tulip/amyrepl/objjsproxy.c
@@ -0,0 +1,550 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "emscripten.h"
+#include "py/objmodule.h"
+#include "py/runtime.h"
+#include "proxy_c.h"
+
+EM_JS(bool, has_attr, (int jsref, const char *str), {
+    const base = proxy_js_ref[jsref];
+    const attr = UTF8ToString(str);
+    if (attr in base) {
+        return true;
+    } else {
+        return false;
+    }
+});
+
+// *FORMAT-OFF*
+EM_JS(bool, lookup_attr, (int jsref, const char *str, uint32_t * out), {
+    const base = proxy_js_ref[jsref];
+    const attr = UTF8ToString(str);
+
+    // Attempt to lookup the requested attribute from the base object:
+    // - If the value is not `undefined` then the attribute exists with that value.
+    // - Otherwise if the value is `undefined` and the `in` operator returns true, then
+    //   that attribute does exist and is intended to have a value of `undefined`.
+    // - Otherwise, the attribute does not exist.
+    let value = base[attr];
+    if (value !== undefined || attr in base) {
+        if (typeof value === "function") {
+            if (base !== globalThis) {
+                if ("_ref" in value) {
+                    // This is a proxy of a Python function, it doesn't need
+                    // binding.  And not binding it means if it's passed back
+                    // to Python then it can be extracted from the proxy as a
+                    // true Python function.
+                } else {
+                    // A function that is not a Python function.  Bind it.
+                    value = value.bind(base);
+                }
+            }
+        }
+        proxy_convert_js_to_mp_obj_jsside(value, out);
+        return true;
+    } else {
+        return false;
+    }
+});
+// *FORMAT-ON*
+
+EM_JS(void, store_attr, (int jsref, const char *attr_ptr, uint32_t * value_ref), {
+    const attr = UTF8ToString(attr_ptr);
+    const value = proxy_convert_mp_to_js_obj_jsside(value_ref);
+    proxy_js_ref[jsref][attr] = value;
+});
+
+EM_JS(void, call0, (int f_ref, uint32_t * out), {
+    // Because of JavaScript "this" semantics, we must extract the target function
+    // to a variable before calling it, so "this" is bound to the correct value.
+    //
+    // In detail:
+    // In JavaScript, proxy_js_ref[f_ref] acts like a function call
+    // proxy_js_ref.at(f_ref), and "this" will be bound to proxy_js_ref if
+    // there is a chain of calls, such as proxy_js_ref.at(f_ref)().
+    // But proxy_js_ref is not "this" in the context of the call, so we
+    // must extract the function to an independent variable and then call
+    // that variable, so that "this" is correct (it will be "undefined").
+
+    const f = proxy_js_ref[f_ref];
+    const ret = f();
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(int, call1, (int f_ref, uint32_t * a0, uint32_t * out), {
+    const a0_js = proxy_convert_mp_to_js_obj_jsside(a0);
+    const f = proxy_js_ref[f_ref];
+    const ret = f(a0_js);
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(int, call2, (int f_ref, uint32_t * a0, uint32_t * a1, uint32_t * out), {
+    const a0_js = proxy_convert_mp_to_js_obj_jsside(a0);
+    const a1_js = proxy_convert_mp_to_js_obj_jsside(a1);
+    const f = proxy_js_ref[f_ref];
+    const ret = f(a0_js, a1_js);
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(int, calln, (int f_ref, uint32_t n_args, uint32_t * value, uint32_t * out), {
+    const f = proxy_js_ref[f_ref];
+    const a = [];
+    for (let i = 0; i < n_args; ++i) {
+        const v = proxy_convert_mp_to_js_obj_jsside(value + i * 3 * 4);
+        a.push(v);
+    }
+    const ret = f(... a);
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(void, call0_kwarg, (int f_ref, uint32_t n_kw, uint32_t * key, uint32_t * value, uint32_t * out), {
+    const f = proxy_js_ref[f_ref];
+    const a = {};
+    for (let i = 0; i < n_kw; ++i) {
+        const k = UTF8ToString(getValue(key + i * 4, "i32"));
+        const v = proxy_convert_mp_to_js_obj_jsside(value + i * 3 * 4);
+        a[k] = v;
+    }
+    const ret = f(a);
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(void, call1_kwarg, (int f_ref, uint32_t * arg0, uint32_t n_kw, uint32_t * key, uint32_t * value, uint32_t * out), {
+    const f = proxy_js_ref[f_ref];
+    const a0 = proxy_convert_mp_to_js_obj_jsside(arg0);
+    const a = {};
+    for (let i = 0; i < n_kw; ++i) {
+        const k = UTF8ToString(getValue(key + i * 4, "i32"));
+        const v = proxy_convert_mp_to_js_obj_jsside(value + i * 3 * 4);
+        a[k] = v;
+    }
+    const ret = f(a0, a);
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(void, js_reflect_construct, (int f_ref, uint32_t n_args, uint32_t * args, uint32_t * out), {
+    const f = proxy_js_ref[f_ref];
+    const as = [];
+    for (let i = 0; i < n_args; ++i) {
+        as.push(proxy_convert_mp_to_js_obj_jsside(args + i * 3 * 4));
+    }
+    const ret = Reflect.construct(f, as);
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(void, js_get_iter, (int f_ref, uint32_t * out), {
+    const f = proxy_js_ref[f_ref];
+    const ret = f[Symbol.iterator]();
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(bool, js_iter_next, (int f_ref, uint32_t * out), {
+    const f = proxy_js_ref[f_ref];
+    const ret = f.next();
+    if (ret.done) {
+        return false;
+    } else {
+        proxy_convert_js_to_mp_obj_jsside(ret.value, out);
+        return true;
+    }
+});
+
+EM_JS(void, js_subscr_load, (int f_ref, uint32_t * index_ref, uint32_t * out), {
+    const target = proxy_js_ref[f_ref];
+    const index = python_index_semantics(target, proxy_convert_mp_to_js_obj_jsside(index_ref));
+    const ret = target[index];
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+
+EM_JS(void, js_subscr_store, (int f_ref, uint32_t * idx, uint32_t * value), {
+    const f = proxy_js_ref[f_ref];
+    f[proxy_convert_mp_to_js_obj_jsside(idx)] = proxy_convert_mp_to_js_obj_jsside(value);
+});
+
+static void jsproxy_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+    mp_printf(print, "<JsProxy %d>", self->ref);
+}
+
+static mp_obj_t jsproxy_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+
+    if (n_kw == 0) {
+        mp_arg_check_num(n_args, n_kw, 0, MP_OBJ_FUN_ARGS_MAX, false);
+    } else {
+        mp_arg_check_num(n_args, n_kw, 0, 1, true);
+        uint32_t key[n_kw];
+        uint32_t value[PVN * n_kw];
+        for (int i = 0; i < n_kw; ++i) {
+            key[i] = (uintptr_t)mp_obj_str_get_str(args[n_args + i * 2]);
+            proxy_convert_mp_to_js_obj_cside(args[n_args + i * 2 + 1], &value[i * PVN]);
+        }
+        uint32_t out[3];
+        if (n_args == 0) {
+            call0_kwarg(self->ref, n_kw, key, value, out);
+        } else {
+            // n_args == 1
+            uint32_t arg0[PVN];
+            proxy_convert_mp_to_js_obj_cside(args[0], arg0);
+            call1_kwarg(self->ref, arg0, n_kw, key, value, out);
+        }
+        return proxy_convert_js_to_mp_obj_cside(out);
+    }
+
+    if (n_args == 0) {
+        uint32_t out[3];
+        call0(self->ref, out);
+        return proxy_convert_js_to_mp_obj_cside(out);
+    } else if (n_args == 1) {
+        uint32_t arg0[PVN];
+        uint32_t out[PVN];
+        proxy_convert_mp_to_js_obj_cside(args[0], arg0);
+        call1(self->ref, arg0, out);
+        return proxy_convert_js_to_mp_obj_cside(out);
+    } else if (n_args == 2) {
+        uint32_t arg0[PVN];
+        proxy_convert_mp_to_js_obj_cside(args[0], arg0);
+        uint32_t arg1[PVN];
+        proxy_convert_mp_to_js_obj_cside(args[1], arg1);
+        uint32_t out[3];
+        call2(self->ref, arg0, arg1, out);
+        return proxy_convert_js_to_mp_obj_cside(out);
+    } else {
+        uint32_t value[PVN * n_args];
+        for (int i = 0; i < n_args; ++i) {
+            proxy_convert_mp_to_js_obj_cside(args[i], &value[i * PVN]);
+        }
+        uint32_t out[3];
+        calln(self->ref, n_args, value, out);
+        return proxy_convert_js_to_mp_obj_cside(out);
+    }
+}
+
+EM_JS(void, proxy_js_free_obj, (int js_ref), {
+    if (js_ref >= PROXY_JS_REF_NUM_STATIC) {
+        proxy_js_ref[js_ref] = undefined;
+        if (js_ref < proxy_js_ref_next) {
+            proxy_js_ref_next = js_ref;
+        }
+    }
+});
+
+static mp_obj_t jsproxy___del__(mp_obj_t self_in) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+    proxy_js_free_obj(self->ref);
+    return mp_const_none;
+}
+static MP_DEFINE_CONST_FUN_OBJ_1(jsproxy___del___obj, jsproxy___del__);
+
+static mp_obj_t jsproxy_reflect_construct(size_t n_args, const mp_obj_t *args) {
+    int arg0 = mp_obj_jsproxy_get_ref(args[0]);
+    n_args -= 1;
+    args += 1;
+    uint32_t args_conv[n_args];
+    for (unsigned int i = 0; i < n_args; ++i) {
+        proxy_convert_mp_to_js_obj_cside(args[i], &args_conv[i * PVN]);
+    }
+    uint32_t out[PVN];
+    js_reflect_construct(arg0, n_args, args_conv, out);
+    return proxy_convert_js_to_mp_obj_cside(out);
+}
+static MP_DEFINE_CONST_FUN_OBJ_VAR(jsproxy_reflect_construct_obj, 1, jsproxy_reflect_construct);
+
+static mp_obj_t jsproxy_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+    if (value == MP_OBJ_SENTINEL) {
+        // Load subscript.
+        uint32_t idx[PVN], out[PVN];
+        proxy_convert_mp_to_js_obj_cside(index, idx);
+        js_subscr_load(self->ref, idx, out);
+        return proxy_convert_js_to_mp_obj_cside(out);
+    } else if (value == MP_OBJ_NULL) {
+        // Delete subscript.
+        return MP_OBJ_NULL; // not supported
+    } else {
+        // Store subscript.
+        uint32_t idx[PVN], val[PVN];
+        proxy_convert_mp_to_js_obj_cside(index, idx);
+        proxy_convert_mp_to_js_obj_cside(value, val);
+        js_subscr_store(self->ref, idx, val);
+        return mp_const_none;
+    }
+}
+
+void mp_obj_jsproxy_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+    if (dest[0] == MP_OBJ_NULL) {
+        // Load attribute.
+        uint32_t out[PVN];
+        if (attr == MP_QSTR___del__) {
+            // For finaliser.
+            dest[0] = MP_OBJ_FROM_PTR(&jsproxy___del___obj);
+            dest[1] = self_in;
+        } else if (lookup_attr(self->ref, qstr_str(attr), out)) {
+            dest[0] = proxy_convert_js_to_mp_obj_cside(out);
+        } else if (attr == MP_QSTR_new) {
+            // Special case to handle construction of JS objects.
+            // JS objects don't have a ".new" attribute, doing "Obj.new" is a Pyodide idiom for "new Obj".
+            // It translates to the JavaScript "Reflect.construct(Obj, Array(...args))".
+            dest[0] = MP_OBJ_FROM_PTR(&jsproxy_reflect_construct_obj);
+            dest[1] = self_in;
+        }
+    } else if (dest[1] == MP_OBJ_NULL) {
+        // Delete attribute.
+    } else {
+        // Store attribute.
+        uint32_t value[PVN];
+        proxy_convert_mp_to_js_obj_cside(dest[1], value);
+        store_attr(self->ref, qstr_str(attr), value);
+        dest[0] = MP_OBJ_NULL;
+    }
+}
+
+/******************************************************************************/
+// jsproxy iterator
+
+typedef struct _jsproxy_it_t {
+    mp_obj_base_t base;
+    mp_fun_1_t iternext;
+    mp_obj_jsproxy_t *iter;
+} jsproxy_it_t;
+
+static mp_obj_t jsproxy_it_iternext(mp_obj_t self_in) {
+    jsproxy_it_t *self = MP_OBJ_TO_PTR(self_in);
+    uint32_t out[3];
+    if (js_iter_next(self->iter->ref, out)) {
+        return proxy_convert_js_to_mp_obj_cside(out);
+    } else {
+        return MP_OBJ_STOP_ITERATION;
+    }
+}
+
+static mp_obj_t jsproxy_new_it(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(jsproxy_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+    jsproxy_it_t *o = (jsproxy_it_t *)iter_buf;
+    o->base.type = &mp_type_polymorph_iter;
+    o->iternext = jsproxy_it_iternext;
+    uint32_t out[3];
+    js_get_iter(self->ref, out);
+    o->iter = proxy_convert_js_to_mp_obj_cside(out);
+    return MP_OBJ_FROM_PTR(o);
+}
+
+/******************************************************************************/
+// jsproxy generator
+
+enum {
+    JSOBJ_GEN_STATE_WAITING,
+    JSOBJ_GEN_STATE_COMPLETED,
+    JSOBJ_GEN_STATE_EXHAUSTED,
+};
+
+typedef struct _jsproxy_gen_t {
+    mp_obj_base_t base;
+    mp_obj_t thenable;
+    int state;
+} jsproxy_gen_t;
+
+mp_vm_return_kind_t jsproxy_gen_resume(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t *ret_val) {
+    jsproxy_gen_t *self = MP_OBJ_TO_PTR(self_in);
+
+    if (throw_value) {
+        *ret_val = throw_value;
+        return MP_VM_RETURN_EXCEPTION;
+    }
+
+    switch (self->state) {
+        case JSOBJ_GEN_STATE_WAITING:
+            self->state = JSOBJ_GEN_STATE_COMPLETED;
+            *ret_val = self->thenable;
+            return MP_VM_RETURN_YIELD;
+
+        case JSOBJ_GEN_STATE_COMPLETED:
+            self->state = JSOBJ_GEN_STATE_EXHAUSTED;
+            *ret_val = send_value;
+            return MP_VM_RETURN_NORMAL;
+
+        case JSOBJ_GEN_STATE_EXHAUSTED:
+        default:
+            // Trying to resume an already stopped generator.
+            // This is an optimised "raise StopIteration(None)".
+            *ret_val = mp_const_none;
+            return MP_VM_RETURN_NORMAL;
+    }
+}
+
+static mp_obj_t jsproxy_gen_resume_and_raise(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, bool raise_stop_iteration) {
+    mp_obj_t ret;
+    switch (jsproxy_gen_resume(self_in, send_value, throw_value, &ret)) {
+        case MP_VM_RETURN_NORMAL:
+        default:
+            // A normal return is a StopIteration, either raise it or return
+            // MP_OBJ_STOP_ITERATION as an optimisation.
+            if (ret == mp_const_none) {
+                ret = MP_OBJ_NULL;
+            }
+            if (raise_stop_iteration) {
+                mp_raise_StopIteration(ret);
+            } else {
+                return mp_make_stop_iteration(ret);
+            }
+
+        case MP_VM_RETURN_YIELD:
+            return ret;
+
+        case MP_VM_RETURN_EXCEPTION:
+            nlr_raise(ret);
+    }
+}
+
+static mp_obj_t jsproxy_gen_instance_iternext(mp_obj_t self_in) {
+    return jsproxy_gen_resume_and_raise(self_in, mp_const_none, MP_OBJ_NULL, false);
+}
+
+static mp_obj_t jsproxy_gen_instance_send(mp_obj_t self_in, mp_obj_t send_value) {
+    return jsproxy_gen_resume_and_raise(self_in, send_value, MP_OBJ_NULL, true);
+}
+static MP_DEFINE_CONST_FUN_OBJ_2(jsproxy_gen_instance_send_obj, jsproxy_gen_instance_send);
+
+static mp_obj_t jsproxy_gen_instance_throw(size_t n_args, const mp_obj_t *args) {
+    // The signature of this function is: throw(type[, value[, traceback]])
+    // CPython will pass all given arguments through the call chain and process them
+    // at the point they are used (native generators will handle them differently to
+    // user-defined generators with a throw() method).  To save passing multiple
+    // values, MicroPython instead does partial processing here to reduce it down to
+    // one argument and passes that through:
+    // - if only args[1] is given, or args[2] is given but is None, args[1] is
+    //   passed through (in the standard case it is an exception class or instance)
+    // - if args[2] is given and not None it is passed through (in the standard
+    //   case it would be an exception instance and args[1] its corresponding class)
+    // - args[3] is always ignored
+
+    mp_obj_t exc = args[1];
+    if (n_args > 2 && args[2] != mp_const_none) {
+        exc = args[2];
+    }
+
+    return jsproxy_gen_resume_and_raise(args[0], mp_const_none, exc, true);
+}
+static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(jsproxy_gen_instance_throw_obj, 2, 4, jsproxy_gen_instance_throw);
+
+static mp_obj_t jsproxy_gen_instance_close(mp_obj_t self_in) {
+    mp_obj_t ret;
+    switch (jsproxy_gen_resume(self_in, mp_const_none, MP_OBJ_FROM_PTR(&mp_const_GeneratorExit_obj), &ret)) {
+        case MP_VM_RETURN_YIELD:
+            mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("generator ignored GeneratorExit"));
+
+        // Swallow GeneratorExit (== successful close), and re-raise any other
+        case MP_VM_RETURN_EXCEPTION:
+            // ret should always be an instance of an exception class
+            if (mp_obj_is_subclass_fast(MP_OBJ_FROM_PTR(mp_obj_get_type(ret)), MP_OBJ_FROM_PTR(&mp_type_GeneratorExit))) {
+                return mp_const_none;
+            }
+            nlr_raise(ret);
+
+        default:
+            // The only choice left is MP_VM_RETURN_NORMAL which is successful close
+            return mp_const_none;
+    }
+}
+static MP_DEFINE_CONST_FUN_OBJ_1(jsproxy_gen_instance_close_obj, jsproxy_gen_instance_close);
+
+static const mp_rom_map_elem_t jsproxy_gen_instance_locals_dict_table[] = {
+    { MP_ROM_QSTR(MP_QSTR_close), MP_ROM_PTR(&jsproxy_gen_instance_close_obj) },
+    { MP_ROM_QSTR(MP_QSTR_send), MP_ROM_PTR(&jsproxy_gen_instance_send_obj) },
+    { MP_ROM_QSTR(MP_QSTR_throw), MP_ROM_PTR(&jsproxy_gen_instance_throw_obj) },
+};
+static MP_DEFINE_CONST_DICT(jsproxy_gen_instance_locals_dict, jsproxy_gen_instance_locals_dict_table);
+
+MP_DEFINE_CONST_OBJ_TYPE(
+    mp_type_jsproxy_gen,
+    MP_QSTR_generator,
+    MP_TYPE_FLAG_ITER_IS_ITERNEXT,
+    iter, jsproxy_gen_instance_iternext,
+    locals_dict, &jsproxy_gen_instance_locals_dict
+    );
+
+static mp_obj_t jsproxy_new_gen(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(jsproxy_gen_t) <= sizeof(mp_obj_iter_buf_t));
+    jsproxy_gen_t *o = (jsproxy_gen_t *)iter_buf;
+    o->base.type = &mp_type_jsproxy_gen;
+    o->thenable = self_in;
+    o->state = JSOBJ_GEN_STATE_WAITING;
+    return MP_OBJ_FROM_PTR(o);
+}
+
+/******************************************************************************/
+
+#if MICROPY_PY_ASYNCIO
+extern mp_obj_t mp_asyncio_context;
+#endif
+
+static mp_obj_t jsproxy_getiter(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(self_in);
+    if (has_attr(self->ref, "then")) {
+        #if MICROPY_PY_ASYNCIO
+        // When asyncio is running and the caller here is a task, wrap the JavaScript
+        // thenable in a ThenableEvent, and get the task to wait on that event.  This
+        // decouples the task from the thenable and allows cancelling the task.
+        if (mp_asyncio_context != MP_OBJ_NULL) {
+            mp_obj_t cur_task = mp_obj_dict_get(mp_asyncio_context, MP_OBJ_NEW_QSTR(MP_QSTR_cur_task));
+            if (cur_task != mp_const_none) {
+                mp_obj_t thenable_event_class = mp_obj_dict_get(mp_asyncio_context, MP_OBJ_NEW_QSTR(MP_QSTR_ThenableEvent));
+                mp_obj_t thenable_event = mp_call_function_1(thenable_event_class, self_in);
+                mp_obj_t dest[2];
+                mp_load_method(thenable_event, MP_QSTR_wait, dest);
+                mp_obj_t wait_gen = mp_call_method_n_kw(0, 0, dest);
+                return mp_getiter(wait_gen, iter_buf);
+            }
+        }
+        #endif
+        return jsproxy_new_gen(self_in, iter_buf);
+    } else {
+        return jsproxy_new_it(self_in, iter_buf);
+    }
+}
+
+MP_DEFINE_CONST_OBJ_TYPE(
+    mp_type_jsproxy,
+    MP_QSTR_JsProxy,
+    MP_TYPE_FLAG_ITER_IS_GETITER,
+    print, jsproxy_print,
+    call, jsproxy_call,
+    attr, mp_obj_jsproxy_attr,
+    subscr, jsproxy_subscr,
+    iter, jsproxy_getiter
+    );
+
+mp_obj_t mp_obj_new_jsproxy(int ref) {
+    mp_obj_jsproxy_t *o = mp_obj_malloc_with_finaliser(mp_obj_jsproxy_t, &mp_type_jsproxy);
+    o->ref = ref;
+    return MP_OBJ_FROM_PTR(o);
+}
diff --git a/tulip/amyrepl/objpyproxy.js b/tulip/amyrepl/objpyproxy.js
new file mode 100644
index 000000000..3b94f8aad
--- /dev/null
+++ b/tulip/amyrepl/objpyproxy.js
@@ -0,0 +1,241 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+class PyProxy {
+    constructor(ref) {
+        this._ref = ref;
+    }
+
+    // Convert js_obj -- which is possibly a PyProxy -- to a JavaScript object.
+    static toJs(js_obj) {
+        if (!(js_obj instanceof PyProxy)) {
+            return js_obj;
+        }
+
+        const type = Module.ccall(
+            "proxy_c_to_js_get_type",
+            "number",
+            ["number"],
+            [js_obj._ref],
+        );
+
+        if (type === 1 || type === 2) {
+            // List or tuple.
+            const array_ref = Module._malloc(2 * 4);
+            const item = Module._malloc(3 * 4);
+            Module.ccall(
+                "proxy_c_to_js_get_array",
+                "null",
+                ["number", "pointer"],
+                [js_obj._ref, array_ref],
+            );
+            const len = Module.getValue(array_ref, "i32");
+            const items_ptr = Module.getValue(array_ref + 4, "i32");
+            const js_array = [];
+            for (let i = 0; i < len; ++i) {
+                Module.ccall(
+                    "proxy_convert_mp_to_js_obj_cside",
+                    "null",
+                    ["pointer", "pointer"],
+                    [Module.getValue(items_ptr + i * 4, "i32"), item],
+                );
+                const js_item = proxy_convert_mp_to_js_obj_jsside(item);
+                js_array.push(PyProxy.toJs(js_item));
+            }
+            Module._free(array_ref);
+            Module._free(item);
+            return js_array;
+        }
+
+        if (type === 3) {
+            // Dict.
+            const map_ref = Module._malloc(2 * 4);
+            const item = Module._malloc(3 * 4);
+            Module.ccall(
+                "proxy_c_to_js_get_dict",
+                "null",
+                ["number", "pointer"],
+                [js_obj._ref, map_ref],
+            );
+            const alloc = Module.getValue(map_ref, "i32");
+            const table_ptr = Module.getValue(map_ref + 4, "i32");
+            const js_dict = {};
+            for (let i = 0; i < alloc; ++i) {
+                const mp_key = Module.getValue(table_ptr + i * 8, "i32");
+                if (mp_key > 8) {
+                    // Convert key to JS object.
+                    Module.ccall(
+                        "proxy_convert_mp_to_js_obj_cside",
+                        "null",
+                        ["pointer", "pointer"],
+                        [mp_key, item],
+                    );
+                    const js_key = proxy_convert_mp_to_js_obj_jsside(item);
+
+                    // Convert value to JS object.
+                    const mp_value = Module.getValue(
+                        table_ptr + i * 8 + 4,
+                        "i32",
+                    );
+                    Module.ccall(
+                        "proxy_convert_mp_to_js_obj_cside",
+                        "null",
+                        ["pointer", "pointer"],
+                        [mp_value, item],
+                    );
+                    const js_value = proxy_convert_mp_to_js_obj_jsside(item);
+
+                    // Populate JS dict.
+                    js_dict[js_key] = PyProxy.toJs(js_value);
+                }
+            }
+            Module._free(map_ref);
+            Module._free(item);
+            return js_dict;
+        }
+
+        // Cannot convert to JS, leave as a PyProxy.
+        return js_obj;
+    }
+}
+
+// This handler's goal is to allow minimal introspection
+// of Python references from the JS world/utilities.
+const py_proxy_handler = {
+    isExtensible() {
+        return true;
+    },
+    ownKeys(target) {
+        const value = Module._malloc(3 * 4);
+        Module.ccall(
+            "proxy_c_to_js_dir",
+            "null",
+            ["number", "pointer"],
+            [target._ref, value],
+        );
+        const dir = proxy_convert_mp_to_js_obj_jsside_with_free(value);
+        return PyProxy.toJs(dir).filter((attr) => !attr.startsWith("__"));
+    },
+    getOwnPropertyDescriptor(target, prop) {
+        return {
+            value: target[prop],
+            enumerable: true,
+            writable: true,
+            configurable: true,
+        };
+    },
+    has(target, prop) {
+        return Module.ccall(
+            "proxy_c_to_js_has_attr",
+            "number",
+            ["number", "string"],
+            [target._ref, prop],
+        );
+    },
+    get(target, prop) {
+        if (prop === "_ref") {
+            return target._ref;
+        }
+        if (prop === "then") {
+            return null;
+        }
+
+        if (prop === Symbol.iterator) {
+            // Get the Python object iterator, and return a JavaScript generator.
+            const iter_ref = Module.ccall(
+                "proxy_c_to_js_get_iter",
+                "number",
+                ["number"],
+                [target._ref],
+            );
+            return function* () {
+                const value = Module._malloc(3 * 4);
+                while (true) {
+                    const valid = Module.ccall(
+                        "proxy_c_to_js_iternext",
+                        "number",
+                        ["number", "pointer"],
+                        [iter_ref, value],
+                    );
+                    if (!valid) {
+                        break;
+                    }
+                    yield proxy_convert_mp_to_js_obj_jsside(value);
+                }
+                Module._free(value);
+            };
+        }
+
+        const value = Module._malloc(3 * 4);
+        Module.ccall(
+            "proxy_c_to_js_lookup_attr",
+            "null",
+            ["number", "string", "pointer"],
+            [target._ref, prop, value],
+        );
+        return proxy_convert_mp_to_js_obj_jsside_with_free(value);
+    },
+    set(target, prop, value) {
+        const value_conv = Module._malloc(3 * 4);
+        proxy_convert_js_to_mp_obj_jsside(value, value_conv);
+        const ret = Module.ccall(
+            "proxy_c_to_js_store_attr",
+            "number",
+            ["number", "string", "number"],
+            [target._ref, prop, value_conv],
+        );
+        Module._free(value_conv);
+        return ret;
+    },
+    deleteProperty(target, prop) {
+        return Module.ccall(
+            "proxy_c_to_js_delete_attr",
+            "number",
+            ["number", "string"],
+            [target._ref, prop],
+        );
+    },
+};
+
+// PyProxy of a Python generator, that implements the thenable interface.
+class PyProxyThenable {
+    constructor(ref) {
+        this._ref = ref;
+    }
+
+    then(resolve, reject) {
+        const values = Module._malloc(3 * 3 * 4);
+        proxy_convert_js_to_mp_obj_jsside(resolve, values + 3 * 4);
+        proxy_convert_js_to_mp_obj_jsside(reject, values + 2 * 3 * 4);
+        Module.ccall(
+            "proxy_c_to_js_resume",
+            "null",
+            ["number", "pointer"],
+            [this._ref, values],
+        );
+        return proxy_convert_mp_to_js_obj_jsside_with_free(values);
+    }
+}
diff --git a/tulip/amyrepl/proxy_c.c b/tulip/amyrepl/proxy_c.c
new file mode 100644
index 000000000..00abc43bf
--- /dev/null
+++ b/tulip/amyrepl/proxy_c.c
@@ -0,0 +1,615 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "emscripten.h"
+#include "py/builtin.h"
+#include "py/runtime.h"
+#include "proxy_c.h"
+
+// Number of static entries at the start of proxy_c_ref.
+#define PROXY_C_REF_NUM_STATIC (1)
+
+// These constants should match the constants in proxy_js.js.
+
+enum {
+    PROXY_KIND_MP_EXCEPTION = -1,
+    PROXY_KIND_MP_NULL = 0,
+    PROXY_KIND_MP_NONE = 1,
+    PROXY_KIND_MP_BOOL = 2,
+    PROXY_KIND_MP_INT = 3,
+    PROXY_KIND_MP_FLOAT = 4,
+    PROXY_KIND_MP_STR = 5,
+    PROXY_KIND_MP_CALLABLE = 6,
+    PROXY_KIND_MP_GENERATOR = 7,
+    PROXY_KIND_MP_OBJECT = 8,
+    PROXY_KIND_MP_JSPROXY = 9,
+    PROXY_KIND_MP_EXISTING = 10,
+};
+
+enum {
+    PROXY_KIND_JS_UNDEFINED = 0,
+    PROXY_KIND_JS_NULL = 1,
+    PROXY_KIND_JS_BOOLEAN = 2,
+    PROXY_KIND_JS_INTEGER = 3,
+    PROXY_KIND_JS_DOUBLE = 4,
+    PROXY_KIND_JS_STRING = 5,
+    PROXY_KIND_JS_OBJECT = 6,
+    PROXY_KIND_JS_PYPROXY = 7,
+};
+
+MP_DEFINE_CONST_OBJ_TYPE(
+    mp_type_undefined,
+    MP_QSTR_undefined,
+    MP_TYPE_FLAG_NONE
+    );
+
+static const mp_obj_base_t mp_const_undefined_obj = {&mp_type_undefined};
+
+#define mp_const_undefined (MP_OBJ_FROM_PTR(&mp_const_undefined_obj))
+
+MP_DEFINE_EXCEPTION(JsException, Exception)
+
+// Index to start searching for the next available slot in proxy_c_ref.
+static size_t proxy_c_ref_next;
+
+void proxy_c_init(void) {
+    MP_STATE_PORT(proxy_c_ref) = mp_obj_new_list(0, NULL);
+    MP_STATE_PORT(proxy_c_dict) = mp_obj_new_dict(0);
+    mp_obj_list_append(MP_STATE_PORT(proxy_c_ref), MP_OBJ_NULL);
+    proxy_c_ref_next = PROXY_C_REF_NUM_STATIC;
+}
+
+MP_REGISTER_ROOT_POINTER(mp_obj_t proxy_c_ref);
+MP_REGISTER_ROOT_POINTER(mp_obj_t proxy_c_dict);
+
+// obj cannot be MP_OBJ_NULL.
+static inline size_t proxy_c_add_obj(mp_obj_t obj) {
+    // Search for the first free slot in proxy_c_ref.
+    size_t id = 0;
+    mp_obj_list_t *l = (mp_obj_list_t *)MP_OBJ_TO_PTR(MP_STATE_PORT(proxy_c_ref));
+    while (proxy_c_ref_next < l->len) {
+        if (l->items[proxy_c_ref_next] == MP_OBJ_NULL) {
+            // Free slot found, reuse it.
+            id = proxy_c_ref_next;
+            ++proxy_c_ref_next;
+            l->items[id] = obj;
+            break;
+        }
+        ++proxy_c_ref_next;
+    }
+
+    if (id == 0) {
+        // No free slots, so grow proxy_c_ref by one (append at the end of the list).
+        id = l->len;
+        mp_obj_list_append(MP_STATE_PORT(proxy_c_ref), obj);
+        proxy_c_ref_next = l->len;
+    }
+
+    // Add the object to proxy_c_dict, keyed by the object pointer, with value the object id.
+    mp_obj_t obj_key = mp_obj_new_int_from_uint((uintptr_t)obj);
+    mp_map_elem_t *elem = mp_map_lookup(mp_obj_dict_get_map(MP_STATE_PORT(proxy_c_dict)), obj_key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
+    elem->value = mp_obj_new_int_from_uint(id);
+
+    return id;
+}
+
+EM_JS(int, js_check_existing, (int c_ref), {
+    return proxy_js_check_existing(c_ref);
+});
+
+// obj cannot be MP_OBJ_NULL.
+static inline int proxy_c_check_existing(mp_obj_t obj) {
+    mp_obj_t obj_key = mp_obj_new_int_from_uint((uintptr_t)obj);
+    mp_map_elem_t *elem = mp_map_lookup(mp_obj_dict_get_map(MP_STATE_PORT(proxy_c_dict)), obj_key, MP_MAP_LOOKUP);
+    if (elem == NULL) {
+        return -1;
+    }
+    uint32_t c_ref = mp_obj_int_get_truncated(elem->value);
+    return js_check_existing(c_ref);
+}
+
+static inline mp_obj_t proxy_c_get_obj(uint32_t c_ref) {
+    return ((mp_obj_list_t *)MP_OBJ_TO_PTR(MP_STATE_PORT(proxy_c_ref)))->items[c_ref];
+}
+
+void proxy_c_free_obj(uint32_t c_ref) {
+    if (c_ref >= PROXY_C_REF_NUM_STATIC) {
+        // Remove the object from proxy_c_dict if the c_ref in that dict corresponds to this object.
+        // (It may be that this object exists in the dict but with a different c_ref from a more
+        // recent proxy of this object.)
+        mp_obj_t obj_key = mp_obj_new_int_from_uint((uintptr_t)proxy_c_get_obj(c_ref));
+        mp_map_elem_t *elem = mp_map_lookup(mp_obj_dict_get_map(MP_STATE_PORT(proxy_c_dict)), obj_key, MP_MAP_LOOKUP);
+        if (elem != NULL && mp_obj_int_get_truncated(elem->value) == c_ref) {
+            mp_map_lookup(mp_obj_dict_get_map(MP_STATE_PORT(proxy_c_dict)), obj_key, MP_MAP_LOOKUP_REMOVE_IF_FOUND);
+        }
+
+        // Clear the slot in proxy_c_ref used by this object, so the GC can reclaim the object.
+        ((mp_obj_list_t *)MP_OBJ_TO_PTR(MP_STATE_PORT(proxy_c_ref)))->items[c_ref] = MP_OBJ_NULL;
+        proxy_c_ref_next = MIN(proxy_c_ref_next, c_ref);
+    }
+}
+
+mp_obj_t proxy_convert_js_to_mp_obj_cside(uint32_t *value) {
+    if (value[0] == PROXY_KIND_JS_UNDEFINED) {
+        return mp_const_undefined;
+    } else if (value[0] == PROXY_KIND_JS_NULL) {
+        return mp_const_none;
+    } else if (value[0] == PROXY_KIND_JS_BOOLEAN) {
+        return mp_obj_new_bool(value[1]);
+    } else if (value[0] == PROXY_KIND_JS_INTEGER) {
+        return mp_obj_new_int(value[1]);
+    } else if (value[0] == PROXY_KIND_JS_DOUBLE) {
+        return mp_obj_new_float_from_d(*(double *)&value[1]);
+    } else if (value[0] == PROXY_KIND_JS_STRING) {
+        mp_obj_t s = mp_obj_new_str((void *)value[2], value[1]);
+        free((void *)value[2]);
+        return s;
+    } else if (value[0] == PROXY_KIND_JS_PYPROXY) {
+        return proxy_c_get_obj(value[1]);
+    } else {
+        // PROXY_KIND_JS_OBJECT
+        return mp_obj_new_jsproxy(value[1]);
+    }
+}
+
+void proxy_convert_mp_to_js_obj_cside(mp_obj_t obj, uint32_t *out) {
+    uint32_t kind;
+    int js_ref;
+    if (obj == MP_OBJ_NULL) {
+        kind = PROXY_KIND_MP_NULL;
+    } else if (obj == mp_const_none) {
+        kind = PROXY_KIND_MP_NONE;
+    } else if (mp_obj_is_bool(obj)) {
+        kind = PROXY_KIND_MP_BOOL;
+        out[1] = mp_obj_is_true(obj);
+    } else if (mp_obj_is_int(obj)) {
+        kind = PROXY_KIND_MP_INT;
+        out[1] = mp_obj_get_int_truncated(obj); // TODO support big int
+    } else if (mp_obj_is_float(obj)) {
+        kind = PROXY_KIND_MP_FLOAT;
+        *(double *)&out[1] = mp_obj_get_float(obj);
+    } else if (mp_obj_is_str(obj)) {
+        kind = PROXY_KIND_MP_STR;
+        size_t len;
+        const char *str = mp_obj_str_get_data(obj, &len);
+        out[1] = len;
+        out[2] = (uintptr_t)str;
+    } else if (obj == mp_const_undefined) {
+        kind = PROXY_KIND_MP_JSPROXY;
+        out[1] = 1;
+    } else if (mp_obj_is_jsproxy(obj)) {
+        kind = PROXY_KIND_MP_JSPROXY;
+        out[1] = mp_obj_jsproxy_get_ref(obj);
+    } else if ((js_ref = proxy_c_check_existing(obj)) >= 0) {
+        kind = PROXY_KIND_MP_EXISTING;
+        out[1] = js_ref;
+    } else if (mp_obj_get_type(obj) == &mp_type_JsException) {
+        mp_obj_exception_t *exc = MP_OBJ_TO_PTR(obj);
+        if (exc->args->len > 0 && mp_obj_is_jsproxy(exc->args->items[0])) {
+            kind = PROXY_KIND_MP_JSPROXY;
+            out[1] = mp_obj_jsproxy_get_ref(exc->args->items[0]);
+        } else {
+            kind = PROXY_KIND_MP_OBJECT;
+            out[1] = proxy_c_add_obj(obj);
+        }
+    } else {
+        if (mp_obj_is_callable(obj)) {
+            kind = PROXY_KIND_MP_CALLABLE;
+        } else if (mp_obj_is_type(obj, &mp_type_gen_instance)) {
+            kind = PROXY_KIND_MP_GENERATOR;
+        } else {
+            kind = PROXY_KIND_MP_OBJECT;
+        }
+        out[1] = proxy_c_add_obj(obj);
+    }
+    out[0] = kind;
+}
+
+void proxy_convert_mp_to_js_exc_cside(void *exc, uint32_t *out) {
+    out[0] = PROXY_KIND_MP_EXCEPTION;
+    vstr_t vstr;
+    mp_print_t print;
+    vstr_init_print(&vstr, 64, &print);
+    vstr_add_str(&vstr, qstr_str(mp_obj_get_type(MP_OBJ_FROM_PTR(exc))->name));
+    vstr_add_char(&vstr, '\x04');
+    mp_obj_print_exception(&print, MP_OBJ_FROM_PTR(exc));
+    char *s = malloc(vstr_len(&vstr) + 1);
+    memcpy(s, vstr_str(&vstr), vstr_len(&vstr));
+    out[1] = vstr_len(&vstr);
+    out[2] = (uintptr_t)s;
+    vstr_clear(&vstr);
+}
+
+void proxy_c_to_js_call(uint32_t c_ref, uint32_t n_args, uint32_t *args_value, uint32_t *out) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t args[n_args];
+        for (size_t i = 0; i < n_args; ++i) {
+            args[i] = proxy_convert_js_to_mp_obj_cside(args_value + i * 3);
+        }
+        mp_obj_t obj = proxy_c_get_obj(c_ref);
+        mp_obj_t member = mp_call_function_n_kw(obj, n_args, 0, args);
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(member, out);
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_exc_cside(nlr.ret_val, out);
+    }
+}
+
+void proxy_c_to_js_dir(uint32_t c_ref, uint32_t *out) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t obj = proxy_c_get_obj(c_ref);
+        mp_obj_t dir;
+        if (mp_obj_is_dict_or_ordereddict(obj)) {
+            mp_map_t *map = mp_obj_dict_get_map(obj);
+            dir = mp_obj_new_list(0, NULL);
+            for (size_t i = 0; i < map->alloc; i++) {
+                if (mp_map_slot_is_filled(map, i)) {
+                    mp_obj_list_append(dir, map->table[i].key);
+                }
+            }
+        } else {
+            mp_obj_t args[1] = { obj };
+            dir = mp_builtin_dir_obj.fun.var(1, args);
+        }
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(dir, out);
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_exc_cside(nlr.ret_val, out);
+    }
+}
+
+bool proxy_c_to_js_has_attr(uint32_t c_ref, const char *attr_in) {
+    mp_obj_t obj = proxy_c_get_obj(c_ref);
+    qstr attr = qstr_from_str(attr_in);
+    if (mp_obj_is_dict_or_ordereddict(obj)) {
+        mp_map_t *map = mp_obj_dict_get_map(obj);
+        mp_map_elem_t *elem = mp_map_lookup(map, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP);
+        return elem != NULL;
+    } else {
+        mp_obj_t dest[2];
+        mp_load_method_protected(obj, attr, dest, true);
+        if (dest[0] != MP_OBJ_NULL) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void proxy_c_to_js_lookup_attr(uint32_t c_ref, const char *attr_in, uint32_t *out) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t obj = proxy_c_get_obj(c_ref);
+        qstr attr = qstr_from_str(attr_in);
+        mp_obj_t member;
+        if (mp_obj_is_dict_or_ordereddict(obj)) {
+            // Lookup the requested attribute as a key in the target dict, and
+            // return `undefined` if not found (instead of raising `KeyError`).
+            mp_obj_dict_t *self = MP_OBJ_TO_PTR(obj);
+            mp_map_elem_t *elem = mp_map_lookup(&self->map, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP);
+            if (elem == NULL) {
+                member = mp_const_undefined;
+            } else {
+                member = elem->value;
+            }
+        } else {
+            // Lookup the requested attribute as a member/method of the target object.
+            member = mp_load_attr(obj, attr);
+        }
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(member, out);
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_exc_cside(nlr.ret_val, out);
+    }
+}
+
+static bool proxy_c_to_js_store_helper(uint32_t c_ref, const char *attr_in, uint32_t *value_in) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t obj = proxy_c_get_obj(c_ref);
+        qstr attr = qstr_from_str(attr_in);
+
+        mp_obj_t value = MP_OBJ_NULL;
+        if (value_in != NULL) {
+            value = proxy_convert_js_to_mp_obj_cside(value_in);
+        }
+
+        if (mp_obj_is_dict_or_ordereddict(obj)) {
+            if (value == MP_OBJ_NULL) {
+                mp_obj_dict_delete(obj, MP_OBJ_NEW_QSTR(attr));
+            } else {
+                mp_obj_dict_store(obj, MP_OBJ_NEW_QSTR(attr), value);
+            }
+        } else {
+            mp_store_attr(obj, attr, value);
+        }
+        nlr_pop();
+        external_call_depth_dec();
+        return true;
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        return false;
+    }
+}
+
+bool proxy_c_to_js_store_attr(uint32_t c_ref, const char *attr_in, uint32_t *value_in) {
+    return proxy_c_to_js_store_helper(c_ref, attr_in, value_in);
+}
+
+bool proxy_c_to_js_delete_attr(uint32_t c_ref, const char *attr_in) {
+    return proxy_c_to_js_store_helper(c_ref, attr_in, NULL);
+}
+
+uint32_t proxy_c_to_js_get_type(uint32_t c_ref) {
+    mp_obj_t obj = proxy_c_get_obj(c_ref);
+    const mp_obj_type_t *type = mp_obj_get_type(obj);
+    if (type == &mp_type_tuple) {
+        return 1;
+    } else if (type == &mp_type_list) {
+        return 2;
+    } else if (type == &mp_type_dict) {
+        return 3;
+    } else {
+        return 4;
+    }
+}
+
+void proxy_c_to_js_get_array(uint32_t c_ref, uint32_t *out) {
+    mp_obj_t obj = proxy_c_get_obj(c_ref);
+    size_t len;
+    mp_obj_t *items;
+    mp_obj_get_array(obj, &len, &items);
+    out[0] = len;
+    out[1] = (uintptr_t)items;
+}
+
+void proxy_c_to_js_get_dict(uint32_t c_ref, uint32_t *out) {
+    mp_obj_t obj = proxy_c_get_obj(c_ref);
+    mp_map_t *map = mp_obj_dict_get_map(obj);
+    out[0] = map->alloc;
+    out[1] = (uintptr_t)map->table;
+}
+
+EM_JS(void, js_get_error_info, (int jsref, uint32_t * out_name, uint32_t * out_message), {
+    const error = proxy_js_ref[jsref];
+    proxy_convert_js_to_mp_obj_jsside(error.name, out_name);
+    proxy_convert_js_to_mp_obj_jsside(error.message, out_message);
+});
+
+mp_obj_t mp_obj_jsproxy_make_js_exception(mp_obj_t error) {
+    uint32_t out_name[PVN];
+    uint32_t out_message[PVN];
+    js_get_error_info(mp_obj_jsproxy_get_ref(error), out_name, out_message);
+    mp_obj_t args[3] = {
+        error,
+        proxy_convert_js_to_mp_obj_cside(out_name),
+        proxy_convert_js_to_mp_obj_cside(out_message),
+    };
+    return mp_obj_new_exception_args(&mp_type_JsException, MP_ARRAY_SIZE(args), args);
+}
+
+/******************************************************************************/
+// Bridge Python iterator to JavaScript iterator protocol.
+
+uint32_t proxy_c_to_js_get_iter(uint32_t c_ref) {
+    mp_obj_t obj = proxy_c_get_obj(c_ref);
+    mp_obj_t iter = mp_getiter(obj, NULL);
+    return proxy_c_add_obj(iter);
+}
+
+bool proxy_c_to_js_iternext(uint32_t c_ref, uint32_t *out) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t obj = proxy_c_get_obj(c_ref);
+        mp_obj_t iter = mp_iternext_allow_raise(obj);
+        if (iter == MP_OBJ_STOP_ITERATION) {
+            external_call_depth_dec();
+            nlr_pop();
+            return false;
+        }
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(iter, out);
+        return true;
+    } else {
+        external_call_depth_dec();
+        if (mp_obj_is_subclass_fast(MP_OBJ_FROM_PTR(((mp_obj_base_t *)nlr.ret_val)->type), MP_OBJ_FROM_PTR(&mp_type_StopIteration))) {
+            return false;
+        } else {
+            // uncaught exception
+            proxy_convert_mp_to_js_exc_cside(nlr.ret_val, out);
+            return true;
+        }
+    }
+}
+
+/******************************************************************************/
+// Bridge Python generator to JavaScript thenable.
+
+static const mp_obj_fun_builtin_var_t resume_obj;
+
+EM_JS(void, js_then_resolve, (uint32_t * ret_value, uint32_t * resolve), {
+    const ret_value_js = proxy_convert_mp_to_js_obj_jsside(ret_value);
+    const resolve_js = proxy_convert_mp_to_js_obj_jsside(resolve);
+    resolve_js(ret_value_js);
+});
+
+EM_JS(void, js_then_reject, (uint32_t * ret_value, uint32_t * reject), {
+    // The ret_value object should be a Python exception.  Convert it to a
+    // JavaScript PythonError and pass it as the reason to reject the promise.
+    let ret_value_js;
+    try {
+        ret_value_js = proxy_convert_mp_to_js_obj_jsside(ret_value);
+    } catch(error) {
+        ret_value_js = error;
+    }
+    const reject_js = proxy_convert_mp_to_js_obj_jsside(reject);
+    reject_js(ret_value_js);
+});
+
+// *FORMAT-OFF*
+EM_JS(void, js_then_continue, (int jsref, uint32_t * py_resume, uint32_t * resolve, uint32_t * reject, uint32_t * out), {
+    const py_resume_js = proxy_convert_mp_to_js_obj_jsside(py_resume);
+    const resolve_js = proxy_convert_mp_to_js_obj_jsside(resolve);
+    const reject_js = proxy_convert_mp_to_js_obj_jsside(reject);
+    const ret = proxy_js_ref[jsref].then(
+        (result) => {
+            // The Promise is fulfilled on the JavaScript side.  Take the result and
+            // send it to the encapsulating generator on the Python side, so it
+            // becomes the result of the "yield from" that deferred to this Promise.
+            py_resume_js(result, null, resolve_js, reject_js);
+        },
+        (reason) => {
+            // The Promise is rejected on the JavaScript side.  Take the reason and
+            // throw it into the encapsulating generator on the Python side.
+            py_resume_js(null, reason, resolve_js, reject_js);
+        },
+    );
+    proxy_convert_js_to_mp_obj_jsside(ret, out);
+});
+// *FORMAT-ON*
+
+EM_JS(void, create_promise, (uint32_t * out_set, uint32_t * out_promise), {
+    const out_set_js = proxy_convert_mp_to_js_obj_jsside(out_set);
+    const promise = new Promise(out_set_js);
+    proxy_convert_js_to_mp_obj_jsside(promise, out_promise);
+});
+
+static mp_obj_t proxy_resume_execute(mp_obj_t self_in, mp_obj_t send_value, mp_obj_t throw_value, mp_obj_t resolve, mp_obj_t reject) {
+    if (throw_value != MP_OBJ_NULL && throw_value != mp_const_none) {
+        if (send_value == mp_const_none) {
+            send_value = MP_OBJ_NULL;
+        }
+        // Ensure that the `throw_value` is a proper Python exception instance.
+        if (mp_obj_is_jsproxy(throw_value)) {
+            throw_value = mp_obj_jsproxy_make_js_exception(throw_value);
+        } else {
+            throw_value = mp_make_raise_obj(throw_value);
+        }
+    } else {
+        throw_value = MP_OBJ_NULL;
+        if (send_value == mp_const_undefined) {
+            send_value = mp_const_none;
+        }
+    }
+
+    mp_obj_t ret_value;
+    mp_vm_return_kind_t ret_kind = mp_resume(self_in, send_value, throw_value, &ret_value);
+
+    if (ret_kind == MP_VM_RETURN_NORMAL) {
+        uint32_t out_ret_value[PVN];
+        uint32_t out_resolve[PVN];
+        proxy_convert_mp_to_js_obj_cside(ret_value, out_ret_value);
+        proxy_convert_mp_to_js_obj_cside(resolve, out_resolve);
+        js_then_resolve(out_ret_value, out_resolve);
+        return mp_const_none;
+    } else if (ret_kind == MP_VM_RETURN_YIELD) {
+        // If ret_value is None then there has been a top-level await of an asyncio primitive.
+        // Otherwise, ret_value should be a JS thenable.
+
+        if (ret_value == mp_const_none) {
+            // Waiting on an asyncio primitive to complete, eg a Task or Event.
+            //
+            // Completion of this primitive will occur when the asyncio.core._top_level_task
+            // Task is made runable and its coroutine's send() method is called.  Need to
+            // construct a Promise that resolves when that send() method is called, because
+            // that will resume the top-level await from the JavaScript side.
+            //
+            // This is accomplished via the asyncio.core.TopLevelCoro class and its methods.
+            mp_obj_t asyncio = mp_import_name(MP_QSTR_asyncio_dot_core, mp_const_none, MP_OBJ_NEW_SMALL_INT(0));
+            mp_obj_t asyncio_core = mp_load_attr(asyncio, MP_QSTR_core);
+            mp_obj_t top_level_coro = mp_load_attr(asyncio_core, MP_QSTR_TopLevelCoro);
+            mp_obj_t top_level_coro_set = mp_load_attr(top_level_coro, MP_QSTR_set);
+            uint32_t out_set[PVN];
+            proxy_convert_mp_to_js_obj_cside(top_level_coro_set, out_set);
+            uint32_t out_promise[PVN];
+            create_promise(out_set, out_promise);
+            ret_value = proxy_convert_js_to_mp_obj_cside(out_promise);
+        }
+
+        mp_obj_t py_resume = mp_obj_new_bound_meth(MP_OBJ_FROM_PTR(&resume_obj), self_in);
+        int ref = mp_obj_jsproxy_get_ref(ret_value);
+        uint32_t out_py_resume[PVN];
+        uint32_t out_resolve[PVN];
+        uint32_t out_reject[PVN];
+        proxy_convert_mp_to_js_obj_cside(py_resume, out_py_resume);
+        proxy_convert_mp_to_js_obj_cside(resolve, out_resolve);
+        proxy_convert_mp_to_js_obj_cside(reject, out_reject);
+        uint32_t out[PVN];
+        js_then_continue(ref, out_py_resume, out_resolve, out_reject, out);
+        return proxy_convert_js_to_mp_obj_cside(out);
+    } else { // ret_kind == MP_VM_RETURN_EXCEPTION;
+        // Pass the exception through as an object to reject the promise (don't raise/throw it).
+        uint32_t out_ret_value[PVN];
+        uint32_t out_reject[PVN];
+        proxy_convert_mp_to_js_exc_cside(ret_value, out_ret_value);
+        proxy_convert_mp_to_js_obj_cside(reject, out_reject);
+        js_then_reject(out_ret_value, out_reject);
+        return mp_const_none;
+    }
+}
+
+static mp_obj_t resume_fun(size_t n_args, const mp_obj_t *args) {
+    return proxy_resume_execute(args[0], args[1], args[2], args[3], args[4]);
+}
+static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(resume_obj, 5, 5, resume_fun);
+
+void proxy_c_to_js_resume(uint32_t c_ref, uint32_t *args) {
+    external_call_depth_inc();
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_obj_t obj = proxy_c_get_obj(c_ref);
+        mp_obj_t resolve = proxy_convert_js_to_mp_obj_cside(args + 1 * 3);
+        mp_obj_t reject = proxy_convert_js_to_mp_obj_cside(args + 2 * 3);
+        mp_obj_t ret = proxy_resume_execute(obj, mp_const_none, mp_const_none, resolve, reject);
+        nlr_pop();
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_obj_cside(ret, args);
+    } else {
+        // uncaught exception
+        external_call_depth_dec();
+        proxy_convert_mp_to_js_exc_cside(nlr.ret_val, args);
+    }
+}
diff --git a/tulip/amyrepl/proxy_c.h b/tulip/amyrepl/proxy_c.h
new file mode 100644
index 000000000..d3567c195
--- /dev/null
+++ b/tulip/amyrepl/proxy_c.h
@@ -0,0 +1,62 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MICROPY_INCLUDED_WEBASSEMBLY_PROXY_C_H
+#define MICROPY_INCLUDED_WEBASSEMBLY_PROXY_C_H
+
+#include "py/obj.h"
+
+// proxy value number of items
+#define PVN (3)
+
+typedef struct _mp_obj_jsproxy_t {
+    mp_obj_base_t base;
+    int ref;
+} mp_obj_jsproxy_t;
+
+extern const mp_obj_type_t mp_type_jsproxy;
+extern const mp_obj_type_t mp_type_JsException;
+
+void external_call_depth_inc(void);
+void external_call_depth_dec(void);
+
+void proxy_c_init(void);
+mp_obj_t proxy_convert_js_to_mp_obj_cside(uint32_t *value);
+void proxy_convert_mp_to_js_obj_cside(mp_obj_t obj, uint32_t *out);
+void proxy_convert_mp_to_js_exc_cside(void *exc, uint32_t *out);
+
+mp_obj_t mp_obj_new_jsproxy(int ref);
+void mp_obj_jsproxy_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest);
+
+static inline bool mp_obj_is_jsproxy(mp_obj_t o) {
+    return mp_obj_get_type(o) == &mp_type_jsproxy;
+}
+
+static inline int mp_obj_jsproxy_get_ref(mp_obj_t o) {
+    mp_obj_jsproxy_t *self = MP_OBJ_TO_PTR(o);
+    return self->ref;
+}
+
+#endif // MICROPY_INCLUDED_WEBASSEMBLY_PROXY_C_H
diff --git a/tulip/amyrepl/proxy_js.js b/tulip/amyrepl/proxy_js.js
new file mode 100644
index 000000000..9e7c233e3
--- /dev/null
+++ b/tulip/amyrepl/proxy_js.js
@@ -0,0 +1,313 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023-2024 Damien P. George
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Number of static entries at the start of proxy_js_ref.
+const PROXY_JS_REF_NUM_STATIC = 2;
+
+// These constants should match the constants in proxy_c.c.
+
+const PROXY_KIND_MP_EXCEPTION = -1;
+const PROXY_KIND_MP_NULL = 0;
+const PROXY_KIND_MP_NONE = 1;
+const PROXY_KIND_MP_BOOL = 2;
+const PROXY_KIND_MP_INT = 3;
+const PROXY_KIND_MP_FLOAT = 4;
+const PROXY_KIND_MP_STR = 5;
+const PROXY_KIND_MP_CALLABLE = 6;
+const PROXY_KIND_MP_GENERATOR = 7;
+const PROXY_KIND_MP_OBJECT = 8;
+const PROXY_KIND_MP_JSPROXY = 9;
+const PROXY_KIND_MP_EXISTING = 10;
+
+const PROXY_KIND_JS_UNDEFINED = 0;
+const PROXY_KIND_JS_NULL = 1;
+const PROXY_KIND_JS_BOOLEAN = 2;
+const PROXY_KIND_JS_INTEGER = 3;
+const PROXY_KIND_JS_DOUBLE = 4;
+const PROXY_KIND_JS_STRING = 5;
+const PROXY_KIND_JS_OBJECT = 6;
+const PROXY_KIND_JS_PYPROXY = 7;
+
+class PythonError extends Error {
+    constructor(exc_type, exc_details) {
+        super(exc_details);
+        this.name = "PythonError";
+        this.type = exc_type;
+    }
+}
+
+function proxy_js_init() {
+    globalThis.proxy_js_ref = [globalThis, undefined];
+    globalThis.proxy_js_ref_next = PROXY_JS_REF_NUM_STATIC;
+    globalThis.proxy_js_map = new Map();
+    globalThis.proxy_js_existing = [undefined];
+    globalThis.pyProxyFinalizationRegistry = new FinalizationRegistry(
+        (cRef) => {
+            globalThis.proxy_js_map.delete(cRef);
+            Module.ccall("proxy_c_free_obj", "null", ["number"], [cRef]);
+        },
+    );
+}
+
+// Check if the c_ref (Python proxy index) has a corresponding JavaScript-side PyProxy
+// associated with it.  If so, take a concrete reference to this PyProxy from the WeakRef
+// and put it in proxy_js_existing, to be referenced and reused by PROXY_KIND_MP_EXISTING.
+function proxy_js_check_existing(c_ref) {
+    const existing_obj = globalThis.proxy_js_map.get(c_ref)?.deref();
+    if (existing_obj === undefined) {
+        return -1;
+    }
+
+    // Search for a free slot in proxy_js_existing.
+    for (let i = 0; i < globalThis.proxy_js_existing.length; ++i) {
+        if (globalThis.proxy_js_existing[i] === undefined) {
+            // Free slot found, put existing_obj here and return the index.
+            globalThis.proxy_js_existing[i] = existing_obj;
+            return i;
+        }
+    }
+
+    // No free slot, so append to proxy_js_existing and return the new index.
+    globalThis.proxy_js_existing.push(existing_obj);
+    return globalThis.proxy_js_existing.length - 1;
+}
+
+// js_obj cannot be undefined
+function proxy_js_add_obj(js_obj) {
+    // Search for the first free slot in proxy_js_ref.
+    while (proxy_js_ref_next < proxy_js_ref.length) {
+        if (proxy_js_ref[proxy_js_ref_next] === undefined) {
+            // Free slot found, reuse it.
+            const id = proxy_js_ref_next;
+            ++proxy_js_ref_next;
+            proxy_js_ref[id] = js_obj;
+            return id;
+        }
+        ++proxy_js_ref_next;
+    }
+
+    // No free slots, so grow proxy_js_ref by one (append at the end of the array).
+    const id = proxy_js_ref.length;
+    proxy_js_ref[id] = js_obj;
+    proxy_js_ref_next = proxy_js_ref.length;
+    return id;
+}
+
+function proxy_call_python(target, argumentsList) {
+    let args = 0;
+
+    // Strip trailing "undefined" arguments.
+    while (
+        argumentsList.length > 0 &&
+        argumentsList[argumentsList.length - 1] === undefined
+    ) {
+        argumentsList.pop();
+    }
+
+    if (argumentsList.length > 0) {
+        // TODO use stackAlloc/stackRestore?
+        args = Module._malloc(argumentsList.length * 3 * 4);
+        for (const i in argumentsList) {
+            proxy_convert_js_to_mp_obj_jsside(
+                argumentsList[i],
+                args + i * 3 * 4,
+            );
+        }
+    }
+    const value = Module._malloc(3 * 4);
+    Module.ccall(
+        "proxy_c_to_js_call",
+        "null",
+        ["number", "number", "number", "pointer"],
+        [target, argumentsList.length, args, value],
+    );
+    if (argumentsList.length > 0) {
+        Module._free(args);
+    }
+    const ret = proxy_convert_mp_to_js_obj_jsside_with_free(value);
+    if (ret instanceof PyProxyThenable) {
+        // In Python when an async function is called it creates the
+        // corresponding "generator", which must then be executed at
+        // the top level by an asyncio-like scheduler.  In JavaScript
+        // the semantics for async functions is that they are started
+        // immediately (their non-async prefix code is executed immediately)
+        // and only if they await do they return a Promise to delay the
+        // execution of the remainder of the function.
+        //
+        // Emulate the JavaScript behaviour here by resolving the Python
+        // async function.  We assume that the caller who gets this
+        // return is JavaScript.
+        return Promise.resolve(ret);
+    }
+    return ret;
+}
+
+function proxy_convert_js_to_mp_obj_jsside(js_obj, out) {
+    let kind;
+    if (js_obj === undefined) {
+        kind = PROXY_KIND_JS_UNDEFINED;
+    } else if (js_obj === null) {
+        kind = PROXY_KIND_JS_NULL;
+    } else if (typeof js_obj === "boolean") {
+        kind = PROXY_KIND_JS_BOOLEAN;
+        Module.setValue(out + 4, js_obj, "i32");
+    } else if (typeof js_obj === "number") {
+        if (Number.isInteger(js_obj)) {
+            kind = PROXY_KIND_JS_INTEGER;
+            Module.setValue(out + 4, js_obj, "i32");
+        } else {
+            kind = PROXY_KIND_JS_DOUBLE;
+            // double must be stored to an address that's a multiple of 8
+            const temp = (out + 4) & ~7;
+            Module.setValue(temp, js_obj, "double");
+            const double_lo = Module.getValue(temp, "i32");
+            const double_hi = Module.getValue(temp + 4, "i32");
+            Module.setValue(out + 4, double_lo, "i32");
+            Module.setValue(out + 8, double_hi, "i32");
+        }
+    } else if (typeof js_obj === "string") {
+        kind = PROXY_KIND_JS_STRING;
+        const len = Module.lengthBytesUTF8(js_obj);
+        const buf = Module._malloc(len + 1);
+        Module.stringToUTF8(js_obj, buf, len + 1);
+        Module.setValue(out + 4, len, "i32");
+        Module.setValue(out + 8, buf, "i32");
+    } else if (
+        js_obj instanceof PyProxy ||
+        (typeof js_obj === "function" && "_ref" in js_obj) ||
+        js_obj instanceof PyProxyThenable
+    ) {
+        kind = PROXY_KIND_JS_PYPROXY;
+        Module.setValue(out + 4, js_obj._ref, "i32");
+    } else {
+        kind = PROXY_KIND_JS_OBJECT;
+        const id = proxy_js_add_obj(js_obj);
+        Module.setValue(out + 4, id, "i32");
+    }
+    Module.setValue(out + 0, kind, "i32");
+}
+
+function proxy_convert_js_to_mp_obj_jsside_force_double_proxy(js_obj, out) {
+    if (
+        js_obj instanceof PyProxy ||
+        (typeof js_obj === "function" && "_ref" in js_obj) ||
+        js_obj instanceof PyProxyThenable
+    ) {
+        const kind = PROXY_KIND_JS_OBJECT;
+        const id = proxy_js_add_obj(js_obj);
+        Module.setValue(out + 4, id, "i32");
+        Module.setValue(out + 0, kind, "i32");
+    } else {
+        proxy_convert_js_to_mp_obj_jsside(js_obj, out);
+    }
+}
+
+function proxy_convert_mp_to_js_obj_jsside(value) {
+    const kind = Module.getValue(value, "i32");
+    let obj;
+    if (kind === PROXY_KIND_MP_EXCEPTION) {
+        // Exception
+        const str_len = Module.getValue(value + 4, "i32");
+        const str_ptr = Module.getValue(value + 8, "i32");
+        const str = Module.UTF8ToString(str_ptr, str_len);
+        Module._free(str_ptr);
+        const str_split = str.split("\x04");
+        throw new PythonError(str_split[0], str_split[1]);
+    }
+    if (kind === PROXY_KIND_MP_NULL) {
+        // MP_OBJ_NULL
+        throw new Error("NULL object");
+    }
+    if (kind === PROXY_KIND_MP_NONE) {
+        // None
+        obj = null;
+    } else if (kind === PROXY_KIND_MP_BOOL) {
+        // bool
+        obj = Module.getValue(value + 4, "i32") ? true : false;
+    } else if (kind === PROXY_KIND_MP_INT) {
+        // int
+        obj = Module.getValue(value + 4, "i32");
+    } else if (kind === PROXY_KIND_MP_FLOAT) {
+        // float
+        // double must be loaded from an address that's a multiple of 8
+        const temp = (value + 4) & ~7;
+        const double_lo = Module.getValue(value + 4, "i32");
+        const double_hi = Module.getValue(value + 8, "i32");
+        Module.setValue(temp, double_lo, "i32");
+        Module.setValue(temp + 4, double_hi, "i32");
+        obj = Module.getValue(temp, "double");
+    } else if (kind === PROXY_KIND_MP_STR) {
+        // str
+        const str_len = Module.getValue(value + 4, "i32");
+        const str_ptr = Module.getValue(value + 8, "i32");
+        obj = Module.UTF8ToString(str_ptr, str_len);
+    } else if (kind === PROXY_KIND_MP_JSPROXY) {
+        // js proxy
+        const id = Module.getValue(value + 4, "i32");
+        obj = proxy_js_ref[id];
+    } else if (kind === PROXY_KIND_MP_EXISTING) {
+        const id = Module.getValue(value + 4, "i32");
+        obj = globalThis.proxy_js_existing[id];
+        globalThis.proxy_js_existing[id] = undefined;
+    } else {
+        // obj
+        const id = Module.getValue(value + 4, "i32");
+        if (kind === PROXY_KIND_MP_CALLABLE) {
+            obj = (...args) => {
+                return proxy_call_python(id, args);
+            };
+            obj._ref = id;
+        } else if (kind === PROXY_KIND_MP_GENERATOR) {
+            obj = new PyProxyThenable(id);
+        } else {
+            // PROXY_KIND_MP_OBJECT
+            const target = new PyProxy(id);
+            obj = new Proxy(target, py_proxy_handler);
+        }
+        globalThis.pyProxyFinalizationRegistry.register(obj, id);
+        globalThis.proxy_js_map.set(id, new WeakRef(obj));
+    }
+    return obj;
+}
+
+function proxy_convert_mp_to_js_obj_jsside_with_free(value) {
+    const ret = proxy_convert_mp_to_js_obj_jsside(value);
+    Module._free(value);
+    return ret;
+}
+
+function python_index_semantics(target, index_in) {
+    let index = index_in;
+    if (typeof index === "number") {
+        if (index < 0) {
+            index += target.length;
+        }
+        if (index < 0 || index >= target.length) {
+            throw new PythonError("IndexError", "index out of range");
+        }
+    }
+    return index;
+}
diff --git a/tulip/amyrepl/qstrdefsport.h b/tulip/amyrepl/qstrdefsport.h
new file mode 100644
index 000000000..421344bd4
--- /dev/null
+++ b/tulip/amyrepl/qstrdefsport.h
@@ -0,0 +1,4 @@
+// qstrs specific to this port
+// *FORMAT-OFF*
+Q(/lib)
+Q(asyncio.core)
diff --git a/tulip/amyrepl/variants/manifest.py b/tulip/amyrepl/variants/manifest.py
new file mode 100644
index 000000000..e282e3590
--- /dev/null
+++ b/tulip/amyrepl/variants/manifest.py
@@ -0,0 +1,28 @@
+# The asyncio package is built from the standard implementation but with the
+# core scheduler replaced with a custom scheduler that uses the JavaScript
+# runtime (with setTimeout an Promise's) to contrtol the scheduling.
+freeze("../../../../amy", "amy.py")
+freeze("../../../../amy/experiments", "tulip_piano.py")
+freeze("../../../../amy/experiments", "piano_params.py")
+freeze("../../../../amy", "juno.py")
+
+package(
+    "asyncio",
+    (
+        "event.py",
+        "funcs.py",
+        "lock.py",
+    ),
+    base_path="$(MPY_DIR)/extmod",
+    opt=3,
+)
+
+package(
+    "asyncio",
+    (
+        "__init__.py",
+        "core.py",
+    ),
+    base_path="$(PORT_DIR)",
+    opt=3,
+)
diff --git a/tulip/amyrepl/variants/pyscript/manifest.py b/tulip/amyrepl/variants/pyscript/manifest.py
new file mode 100644
index 000000000..db088e70d
--- /dev/null
+++ b/tulip/amyrepl/variants/pyscript/manifest.py
@@ -0,0 +1,29 @@
+include("$(PORT_DIR)/variants/manifest.py")
+
+require("abc")
+require("base64")
+require("collections")
+require("collections-defaultdict")
+require("copy")
+require("datetime")
+require("fnmatch")
+require("functools")
+require("gzip")
+require("hmac")
+require("html")
+require("inspect")
+require("io")
+require("itertools")
+require("locale")
+require("logging")
+require("operator")
+require("os")
+require("os-path")
+require("pathlib")
+require("stat")
+require("tarfile")
+require("tarfile-write")
+require("time")
+require("unittest")
+require("uu")
+require("zlib")
diff --git a/tulip/amyrepl/variants/pyscript/mpconfigvariant.h b/tulip/amyrepl/variants/pyscript/mpconfigvariant.h
new file mode 100644
index 000000000..ed8e81280
--- /dev/null
+++ b/tulip/amyrepl/variants/pyscript/mpconfigvariant.h
@@ -0,0 +1,3 @@
+#define MICROPY_CONFIG_ROM_LEVEL                (MICROPY_CONFIG_ROM_LEVEL_FULL_FEATURES)
+#define MICROPY_GC_SPLIT_HEAP                   (1)
+#define MICROPY_GC_SPLIT_HEAP_AUTO              (1)
diff --git a/tulip/amyrepl/variants/pyscript/mpconfigvariant.mk b/tulip/amyrepl/variants/pyscript/mpconfigvariant.mk
new file mode 100644
index 000000000..016b96a99
--- /dev/null
+++ b/tulip/amyrepl/variants/pyscript/mpconfigvariant.mk
@@ -0,0 +1,3 @@
+JSFLAGS += -s ALLOW_MEMORY_GROWTH
+
+FROZEN_MANIFEST ?= variants/pyscript/manifest.py
diff --git a/tulip/amyrepl/variants/standard/mpconfigvariant.h b/tulip/amyrepl/variants/standard/mpconfigvariant.h
new file mode 100644
index 000000000..7be62ea7f
--- /dev/null
+++ b/tulip/amyrepl/variants/standard/mpconfigvariant.h
@@ -0,0 +1 @@
+#define MICROPY_VARIANT_ENABLE_JS_HOOK (1)
diff --git a/tulip/amyrepl/variants/standard/mpconfigvariant.mk b/tulip/amyrepl/variants/standard/mpconfigvariant.mk
new file mode 100644
index 000000000..62ee16190
--- /dev/null
+++ b/tulip/amyrepl/variants/standard/mpconfigvariant.mk
@@ -0,0 +1 @@
+JSFLAGS += -s ASYNCIFY
diff --git a/tulip/esp32s3/boards/manifest.py b/tulip/esp32s3/boards/manifest.py
index 34b37fcdf..d5fa4806a 100644
--- a/tulip/esp32s3/boards/manifest.py
+++ b/tulip/esp32s3/boards/manifest.py
@@ -23,4 +23,6 @@
 freeze("$(MPY_DIR)/../amy", "amy.py")
 freeze("$(MPY_DIR)/../amy", "juno.py")
 freeze("$(MPY_DIR)/../amy", "amy_wave.py")
+freeze("$(MPY_DIR)/../amy/experiments", "tulip_piano.py")
+freeze("$(MPY_DIR)/../amy/experiments", "piano_params.py")
 #freeze("$(MPY_DIR)/lib/micropython-lib/micropython/utarfile", "utarfile.py")
diff --git a/tulip/esp32s3/esp32_common.cmake b/tulip/esp32s3/esp32_common.cmake
index 2092ce128..8af0aa930 100644
--- a/tulip/esp32s3/esp32_common.cmake
+++ b/tulip/esp32s3/esp32_common.cmake
@@ -19,6 +19,11 @@ if(NOT TULIP_SHARED_DIR)
     get_filename_component(TULIP_SHARED_DIR ${CMAKE_CURRENT_LIST_DIR}/../shared ABSOLUTE)
 endif()
 
+# Set location of the ulab directory.
+if(NOT ULAB_DIR)
+    get_filename_component(ULAB_DIR ${CMAKE_CURRENT_LIST_DIR}/../shared/ulab/code ABSOLUTE)
+endif()
+
 # Set location of the amy directory.
 if(NOT AMY_DIR)
     get_filename_component(AMY_DIR ${CMAKE_CURRENT_LIST_DIR}/../../amy ABSOLUTE)
@@ -198,6 +203,39 @@ list(APPEND MICROPY_SOURCE_EXTMOD
     ${AMY_DIR}/src/partials.c
     ${AMY_DIR}/src/pcm.c
     ${AMY_DIR}/src/log2_exp2.c
+    ${ULAB_DIR}/scipy/integrate/integrate.c
+    ${ULAB_DIR}/scipy/linalg/linalg.c
+    ${ULAB_DIR}/scipy/optimize/optimize.c
+    ${ULAB_DIR}/scipy/signal/signal.c
+    ${ULAB_DIR}/scipy/special/special.c
+    ${ULAB_DIR}/ndarray_operators.c
+    ${ULAB_DIR}/ulab_tools.c
+    ${ULAB_DIR}/ndarray.c
+    ${ULAB_DIR}/numpy/ndarray/ndarray_iter.c
+    ${ULAB_DIR}/ndarray_properties.c
+    ${ULAB_DIR}/numpy/approx.c
+    ${ULAB_DIR}/numpy/bitwise.c
+    ${ULAB_DIR}/numpy/compare.c
+    ${ULAB_DIR}/numpy/carray/carray.c
+    ${ULAB_DIR}/numpy/carray/carray_tools.c
+    ${ULAB_DIR}/numpy/create.c
+    ${ULAB_DIR}/numpy/fft/fft.c
+    ${ULAB_DIR}/numpy/fft/fft_tools.c
+    ${ULAB_DIR}/numpy/filter.c
+    ${ULAB_DIR}/numpy/io/io.c
+    ${ULAB_DIR}/numpy/linalg/linalg.c
+    ${ULAB_DIR}/numpy/linalg/linalg_tools.c
+    ${ULAB_DIR}/numpy/numerical.c
+    ${ULAB_DIR}/numpy/poly.c
+    ${ULAB_DIR}/numpy/random/random.c
+    ${ULAB_DIR}/numpy/stats.c
+    ${ULAB_DIR}/numpy/transform.c
+    ${ULAB_DIR}/numpy/vector.c
+    ${ULAB_DIR}/numpy/numpy.c
+    ${ULAB_DIR}/scipy/scipy.c
+    ${ULAB_DIR}/user/user.c
+    ${ULAB_DIR}/utils/utils.c
+    ${ULAB_DIR}/ulab.c
 )
 
 list(APPEND MICROPY_SOURCE_QSTR
@@ -282,6 +320,7 @@ idf_component_register(
         ${MICROPY_INC_TINYUSB}
         ../../tulip/shared
         ../../amy/src
+        ../../tulip/shared/ulab/code
         ${LV_BINDING_DIR}
         ${LVGL_DIR}/src
     REQUIRES
@@ -302,6 +341,7 @@ target_compile_definitions(${MICROPY_TARGET} PUBLIC
     MICROPY_VFS_FAT=1
     MICROPY_VFS_LFS2=1
     MICROPY_VFS_LFS1=1
+    MODULE_ULAB_ENABLED=1
     FFCONF_H=\"${MICROPY_OOFATFS_DIR}/ffconf.h\"
     LFS1_NO_MALLOC LFS1_NO_DEBUG LFS1_NO_WARN LFS1_NO_ERROR LFS1_NO_ASSERT
     LFS2_NO_MALLOC LFS2_NO_ASSERT
@@ -327,6 +367,7 @@ target_compile_options(${MICROPY_TARGET} PUBLIC
     -fsingle-precision-constant
     -Wno-strict-aliasing
     -DESP_PLATFORM
+    -DAMY_HAS_AUDIO_IN
     -DSTATIC=static
     -DLFS2_NO_DEBUG
 )
diff --git a/tulip/linux/Makefile b/tulip/linux/Makefile
index 859d9c475..2f2ef1a61 100644
--- a/tulip/linux/Makefile
+++ b/tulip/linux/Makefile
@@ -88,7 +88,7 @@ INC += -I$(BUILD)
 CWARN = -Wall -Werror
 CWARN += -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-old-style-declaration -Wno-unused-but-set-parameter -Wpointer-arith -Wdouble-promotion -Wno-float-conversion -Wno-missing-declarations  -Wno-unused-but-set-variable -Wno-sign-compare -Wno-gnu-variable-sized-type-not-at-end -Wno-undefined-internal
 CFLAGS += $(INC) $(CWARN) -std=gnu99 -DUNIX $(CFLAGS_MOD) $(COPT) -I$(VARIANT_DIR) $(CFLAGS_EXTRA) 
-CFLAGS += -DTULIP_DESKTOP
+CFLAGS += -DTULIP_DESKTOP -DAMY_HAS_AUDIO_IN
 CFLAGS += $(ARCHFLAGS) 
 
 # Debugging/Optimization
diff --git a/tulip/linux/variants/manifest.py b/tulip/linux/variants/manifest.py
index acccc016c..73c184a3b 100644
--- a/tulip/linux/variants/manifest.py
+++ b/tulip/linux/variants/manifest.py
@@ -3,4 +3,6 @@
 freeze("$(MPY_DIR)/../amy", "amy.py")
 freeze("$(MPY_DIR)/../amy", "juno.py")
 freeze("$(MPY_DIR)/../amy", "amy_wave.py")
+freeze("$(MPY_DIR)/../amy/experiments", "tulip_piano.py")
+freeze("$(MPY_DIR)/../amy/experiments", "piano_params.py")
 
diff --git a/tulip/macos/Makefile b/tulip/macos/Makefile
index cea92c985..d83893be3 100644
--- a/tulip/macos/Makefile
+++ b/tulip/macos/Makefile
@@ -21,6 +21,7 @@ MAKEFLAGS += --jobs=$(CPUS)
 # If the build directory is not given, make it reflect the variant name.
 BUILD ?= build-$(VARIANT)/tulip/obj
 
+
 TOP = ../../micropython
 
 VARIANT_DIR ?= ../../micropython/ports/unix/variants/$(VARIANT)
@@ -101,7 +102,7 @@ INC += -I$(BUILD)
 CWARN = -Wall -Werror
 CWARN += -Wextra -Wno-unused-parameter -Wno-unused-but-set-parameter -Wpointer-arith -Wdouble-promotion -Wfloat-conversion -Wno-missing-declarations  -Wno-unused-but-set-variable -Wno-sign-compare -Wno-gnu-variable-sized-type-not-at-end -Wno-undefined-internal
 CFLAGS += $(INC) $(CWARN) -std=gnu99 -DUNIX $(CFLAGS_MOD) $(COPT) -I$(VARIANT_DIR) $(CFLAGS_EXTRA) 
-CFLAGS += -DTULIP_DESKTOP -DMACOS
+CFLAGS += -DTULIP_DESKTOP -DMACOS -DAMY_HAS_AUDIO_IN
 CFLAGS += $(ARCHFLAGS) 
 
 # Debugging/Optimization
@@ -200,7 +201,6 @@ endif
 
 
 include ../shared/tulip.mk
-
 MICROPY_PORT_DIR=../../micropython/ports/unix
 
 ifeq ($(MICROPY_PY_SSL),1)
@@ -308,7 +308,6 @@ OBJ += $(addprefix $(BUILD)/, $(EXTMOD_SRC_C:.c=.o))
 OBJ += $(addprefix $(BUILD)/, $(LIB_SRC_C:.c=.o))
 
 
-
 # List of sources for qstr extraction
 SRC_QSTR += $(SRC_C) $(SRC_CXX) $(SHARED_SRC_C) $(EXTMOD_SRC_C) 
 
diff --git a/tulip/macos/variants/manifest.py b/tulip/macos/variants/manifest.py
index acccc016c..73c184a3b 100644
--- a/tulip/macos/variants/manifest.py
+++ b/tulip/macos/variants/manifest.py
@@ -3,4 +3,6 @@
 freeze("$(MPY_DIR)/../amy", "amy.py")
 freeze("$(MPY_DIR)/../amy", "juno.py")
 freeze("$(MPY_DIR)/../amy", "amy_wave.py")
+freeze("$(MPY_DIR)/../amy/experiments", "tulip_piano.py")
+freeze("$(MPY_DIR)/../amy/experiments", "piano_params.py")
 
diff --git a/tulip/shared/tulip.mk b/tulip/shared/tulip.mk
index 5727538a8..70540e551 100644
--- a/tulip/shared/tulip.mk
+++ b/tulip/shared/tulip.mk
@@ -1,8 +1,9 @@
 # tulip.mk
 
-TULIP_EXTMOD_DIR = ../shared
+TULIP_EXTMOD_DIR = $(TOP)/../tulip/shared
+ULAB_DIR = $(TOP)/../tulip/shared/ulab/code
 
-EXTMOD_SRC_C += $(addprefix ../amy/src/, \
+EXTMOD_SRC_C += $(addprefix $(TOP)/../amy/src/, \
 	amy.c \
 	algorithms.c \
 	custom.c \
@@ -39,9 +40,50 @@ EXTMOD_SRC_C += $(addprefix $(TULIP_EXTMOD_DIR)/, \
 	lvgl_u8g2.c \
 	)
 
+EXTMOD_SRC_C += $(addprefix $(ULAB_DIR)/, \
+	scipy/integrate/integrate.c \
+	scipy/linalg/linalg.c \
+	scipy/optimize/optimize.c \
+	scipy/signal/signal.c \
+	scipy/special/special.c \
+	ndarray_operators.c \
+	ulab_tools.c \
+	ndarray.c \
+	numpy/ndarray/ndarray_iter.c \
+	ndarray_properties.c \
+	numpy/approx.c \
+	numpy/bitwise.c \
+	numpy/compare.c \
+	numpy/carray/carray.c \
+	numpy/carray/carray_tools.c \
+	numpy/create.c \
+	numpy/fft/fft.c \
+	numpy/fft/fft_tools.c \
+	numpy/filter.c \
+	numpy/io/io.c \
+	numpy/linalg/linalg.c \
+	numpy/linalg/linalg_tools.c \
+	numpy/numerical.c \
+	numpy/poly.c \
+	numpy/random/random.c \
+	numpy/stats.c \
+	numpy/transform.c \
+	numpy/vector.c \
+	numpy/numpy.c \
+	scipy/scipy.c \
+	user/user.c \
+	utils/utils.c \
+	ulab.c \
+	)
+
+
+
+INC += -I$(ULAB_DIR)
+INC += -I$(TULIP_EXTMOD_DIR)
+INC += -I$(TOP)/../amy/src
+
+CFLAGS_EXTRA += -DMODULE_ULAB_ENABLED=1
 
-INC += -I$(TOP)/$(TULIP_EXTMOD_DIR)
-INC += -I../amy/src/
 
 #$(BUILD)/tulip/%.o: CFLAGS += -Wno-maybe-uninitialized -Wno-pointer-arith -Wno-unused-but-set-variable -Wno-format -Wno-sign-compare -Wno-old-style-declaration
 
diff --git a/tulip/shared/ulab/.gitignore b/tulip/shared/ulab/.gitignore
new file mode 100644
index 000000000..e7dd09527
--- /dev/null
+++ b/tulip/shared/ulab/.gitignore
@@ -0,0 +1,13 @@
+/micropython
+/circuitpython
+/*.exp
+/*.out
+/docs/manual/build/
+/docs/manual/source/**/*.pyi
+/docs/.ipynb_checkpoints/
+/docs/ulab-test.ipynb
+/code/.atom-build.yml
+build/micropython
+build/ulab
+
+.idea
\ No newline at end of file
diff --git a/tulip/shared/ulab/.readthedocs.yaml b/tulip/shared/ulab/.readthedocs.yaml
new file mode 100644
index 000000000..2d51cceab
--- /dev/null
+++ b/tulip/shared/ulab/.readthedocs.yaml
@@ -0,0 +1,24 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-20.04
+  tools:
+    python: "3.9"
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/manual/source/conf.py
+
+# If using Sphinx, optionally build your docs in additional formats such as PDF
+formats: all
+
+# Optionally declare the Python requirements required to build your docs
+python:
+  install:
+    - requirements: requirements.txt
diff --git a/tulip/shared/ulab/CONTRIBUTING.md b/tulip/shared/ulab/CONTRIBUTING.md
new file mode 100644
index 000000000..edfdc7a8a
--- /dev/null
+++ b/tulip/shared/ulab/CONTRIBUTING.md
@@ -0,0 +1,17 @@
+Contributions of any kind are always welcome. 
+
+# Contributing to the code base
+
+If you feel like adding to the code, you can simply issue a pull request. If you do so, please, try to adhere to `micropython`'s [coding conventions](https://github.com/micropython/micropython/blob/master/CODECONVENTIONS.md#c-code-conventions).
+
+# Documentation
+
+However, you can also contribute to the documentation (preferably via the [jupyter notebooks](https://github.com/v923z/micropython-ulab/tree/master/docs). 
+
+## Testing
+
+If you decide to lend a hand with testing, here are the steps:
+
+1. Write a test script that checks a particular function, or a set of related functions!
+1. Drop this script in one of the folders in [ulab tests](https://github.com/v923z/micropython-ulab/tree/master/tests)!
+1. Run the [./build.sh](https://github.com/v923z/micropython-ulab/blob/master/build.sh) script in the root directory of `ulab`! This will clone the latest `micropython`, compile the firmware for `unix`, execute all scripts in the `ulab/tests`, and compare the results to those in the expected results files, which are also in `ulab/tests`, and have an extension `.exp`. In case you have a new snippet, i.e., you have no expected results file, or if the results differ from those in the expected file, a new expected file will be generated in the root directory. You should inspect the contents of this file, and if they are satisfactory, then the file can be moved to the `ulab/tests` folder, alongside your snippet. 
diff --git a/tulip/shared/ulab/LICENSE b/tulip/shared/ulab/LICENSE
new file mode 100644
index 000000000..1d4df66d3
--- /dev/null
+++ b/tulip/shared/ulab/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Zoltán Vörös
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/tulip/shared/ulab/README.md b/tulip/shared/ulab/README.md
new file mode 100644
index 000000000..5c91d6714
--- /dev/null
+++ b/tulip/shared/ulab/README.md
@@ -0,0 +1,446 @@
+# ulab
+
+[![Documentation Status](https://readthedocs.org/projects/micropython-ulab/badge/?version=latest)](https://micropython-ulab.readthedocs.io/en/latest/index.html)
+
+`ulab` is a `numpy`-like array manipulation library for [micropython](http://micropython.org/) and [CircuitPython](https://circuitpython.org/).
+The module is written in C, defines compact containers (`ndarray`s) for numerical data of one to four
+dimensions, and is fast. The library is a software-only standard `micropython` user module,
+i.e., it has no hardware dependencies, and can be compiled for any platform. 8-, and 16-bit signed
+and unsigned integer `dtypes`, as well as `float`, and, optionally, ` complex` are supported.
+The `float` implementation of `micropython` (32-bit `float`, or 64-bit `double`) is automatically
+detected and handled.
+
+1. [Supported functions and methods](#supported-functions-and-methods)
+    1. [ndarray methods](#ndarray-methods)
+    2. [numpy and scipy functions](#numpy-and-scipy-functions)
+    3. [ulab utilities](#ulab-utilities)
+    4. [user module](#user-module)
+4. [Usage](#usage)
+5. [Finding help](#finding-help)
+6. [Benchmarks](#benchmarks)
+7. [Firmware](#firmware)
+    1. [Customising the firmware](#customising-the-firmware)
+    1. [Platforms including ulab](#platforms-including-ulab)
+    1. [Compiling](#compiling)
+        1. [UNIX](#unix-port)
+        1. [STM-based boards](#stm-based-boards)
+        1. [ESP32-based boards](#esp32-based-boards)
+        1. [RP2-based boards](#rp2-based-boards)
+        1. [Compiling for CircuitPython](#compiling-for-circuitpython)
+8. [Issues, contributing, and testing](#issues-contributing-and-testing)
+    1. [Testing](#testing)
+
+# Supported functions and methods
+
+
+## ndarray methods
+
+`ulab` implements `numpy`'s `ndarray` with the `==`, `!=`, `<`, `<=`, `>`, `>=`, `+`, `-`, `/`, `*`, `**`,
+`+=`, `-=`, `*=`, `/=`, `**=` binary operators, and the `len`, `~`, `-`, `+`, `abs` unary operators that
+operate element-wise. Type-aware `ndarray`s can be initialised from any `micropython` iterable, lists of
+iterables via the `array` constructor, or by means of the `arange`, `concatenate`, `diag`, `eye`,
+`frombuffer`, `full`, `linspace`, `logspace`, `ones`, or `zeros`  functions.
+
+`ndarray`s can be sliced, and iterated on, and have a number of their own methods, and properties, such as `flatten()`, `itemsize`, `reshape()`,
+`shape`, `size`, `strides`, `tobytes()`, `tolist()`, and `transpose()` and `T`. If the firmware is compiled with `complex` support,
+the `imag`, and `real` properties are automatically included.
+
+## `numpy` and `scipy` functions
+
+In addition, `ulab` includes [universal functions](https://micropython-ulab.readthedocs.io/en/latest/numpy-universal.html), [many `numpy` functions](https://micropython-ulab.readthedocs.io/en/latest/numpy-functions.html), and functions from the [`numpy.fft`](https://micropython-ulab.readthedocs.io/en/latest/numpy-fft.html), [`numpy.linalg`](https://micropython-ulab.readthedocs.io/en/latest/numpy-linalg.html), [`numpy.random`](https://micropython-ulab.readthedocs.io/en/latest/numpy-random.html), [`scipy.linalg`](https://micropython-ulab.readthedocs.io/en/latest/scipy-linalg.html), [`scipy.optimize`](https://micropython-ulab.readthedocs.io/en/latest/scipy-optimize.html), [`scipy.signal`](https://micropython-ulab.readthedocs.io/en/latest/scipy-signal.html), and [`scipy.special`](https://micropython-ulab.readthedocs.io/en/latest/scipy-special.html) modules. A complete list of available routines can be found under [micropython-ulab](https://micropython-ulab.readthedocs.io/en/latest).
+
+## `ulab` utilities
+
+The [`utils`](https://micropython-ulab.readthedocs.io/en/latest/ulab-utils.html) module contains functions for
+interfacing with peripheral devices supporting the buffer protocol. These functions do not have an obvious
+`numpy` equivalent, but share a similar programming interface, and allow direct data input-output between
+numerical arrays and hardware components.
+
+## `user` module
+
+User-defined functions operating on numerical data can easily be added via the `user` module. This allows for transparent extensions, without having to change anything in the core. Hints as to how to work with `ndarray`s at the C level can be found in the [programming manual](https://micropython-ulab.readthedocs.io/en/latest/ulab-programming.html).
+
+# Usage
+
+`ulab` sports a `numpy/scipy`-compatible interface, which makes porting of `CPython` code straightforward. The following
+snippet should run equally well in `micropython`, or on a PC.
+
+```python
+try:
+    from ulab import numpy
+    from ulab import scipy
+except ImportError:
+    import numpy
+    import scipy.special
+
+x = numpy.array([1, 2, 3])
+scipy.special.erf(x)
+```
+
+# Finding help
+
+Documentation can be found on [readthedocs](https://readthedocs.org/) under
+[micropython-ulab](https://micropython-ulab.readthedocs.io/en/latest),
+as well as at [circuitpython-ulab](https://circuitpython.readthedocs.io/en/latest/shared-bindings/ulab/__init__.html).
+A number of practical examples are listed in Jeff Epler's excellent
+[circuitpython-ulab](https://learn.adafruit.com/ulab-crunch-numbers-fast-with-circuitpython/overview) overview.
+The [tricks](https://micropython-ulab.readthedocs.io/en/latest/ulab-tricks.html) chapter of the user manual discusses
+methods by which RAM and speed can be leveraged in particular numerical problems.
+
+# Benchmarks
+
+Representative numbers on performance can be found under [ulab samples](https://github.com/thiagofe/ulab_samples).
+
+# Firmware
+
+Pre-built, and up-to-date firmware files for select platforms can be downloaded
+from [micropython-builder](https://github.com/v923z/micropython-builder).
+## Customising the firmware
+
+If flash space is a concern, unnecessary functions can be excluded from the compiled firmware with
+pre-processor switches. In addition, `ulab` also has options for trading execution speed for firmware size.
+A thorough discussion on how the firmware can be customised can be found in the
+[corresponding section](https://micropython-ulab.readthedocs.io/en/latest/ulab-intro.html#customising-the-firmware)
+of the user manual.
+
+## Platforms including ulab
+
+`ulab` is also included in the following compiled `micropython` variants and derivatives:
+
+1. `CircuitPython` for SAMD51 and nRF microcontrollers https://github.com/adafruit/circuitpython
+1. `MicroPython for K210` https://github.com/loboris/MicroPython_K210_LoBo
+1. `MaixPy` https://github.com/sipeed/MaixPy
+1. `OpenMV` https://github.com/openmv/openmv
+1. `pimoroni-pico` https://github.com/pimoroni/pimoroni-pico
+
+## Compiling
+
+If you want to try the latest version of `ulab` on `micropython` or one of its forks, the firmware can be compiled
+from the source by following these steps:
+
+### UNIX port
+
+Simply clone the `ulab` repository with
+
+```bash
+git clone https://github.com/v923z/micropython-ulab.git ulab
+```
+and then run
+
+```bash
+./build.sh [matrix.dims] # Dimensions is 2 by default
+```
+This command will clone `micropython`, and build the `unix` port automatically, as well as run the test scripts. If you want an interactive `unix` session, you can launch it in
+
+```bash
+ulab/micropython/ports/unix
+```
+
+### STM-based boards
+
+First, you have to clone the `micropython` repository by running
+
+```bash
+git clone https://github.com/micropython/micropython.git
+```
+on the command line. This will create a new repository with the name `micropython`. Staying there, clone the `ulab` repository with
+
+```bash
+git clone https://github.com/v923z/micropython-ulab.git ulab
+```
+If you don't have the cross-compiler installed, your might want to do that now, for instance on Linux by executing
+
+```bash
+sudo apt-get install gcc-arm-none-eabi
+```
+
+If this step was successful, you can try to run the `make` command in the port's directory as
+
+```bash
+make BOARD=PYBV11 USER_C_MODULES=../../../ulab all
+```
+which will prepare the firmware for pyboard.v.11. Similarly,
+
+```bash
+make BOARD=PYBD_SF6 USER_C_MODULES=../../../ulab all
+```
+will compile for the SF6 member of the PYBD series. If your target is `unix`, you don't need to specify the `BOARD` parameter.
+
+Provided that you managed to compile the firmware, you would upload that by running either
+
+```bash
+dfu-util --alt 0 -D firmware.dfu
+```
+or
+
+```bash
+python pydfu.py -u firmware.dfu
+```
+
+In case you got stuck somewhere in the process, a bit more detailed instructions can be found under https://github.com/micropython/micropython/wiki/Getting-Started, and https://github.com/micropython/micropython/wiki/Pyboard-Firmware-Update.
+
+
+### ESP32-based boards
+
+`ulab` can be tested on the ESP32 in [wokwi's micropython emulator](https://wokwi.com/arduino/projects/322114140704342610) without having to compile the C code. This utility also offers the possibility to save and share your `micropython` code.
+
+Firmware for `Espressif` hardware can be built in two different ways, which are discussed in the next two paragraphs. A solution for issues with the firmware size is outlined in the [last paragraph](#what-to-do-if-the-firmware-is-too-large) of this section.
+
+#### Compiling with cmake
+
+Beginning with version 1.15, `micropython` switched to `cmake` on the ESP32 port. If your operating system supports `CMake > 3.12`, you can either simply download, and run the single [build script](https://github.com/v923z/micropython-ulab/blob/master/build/esp32-cmake.sh), or follow the step in this section. Otherwise, you should skip to the [next one](#compiling-with-make), where the old, `make`-based approach is discussed.
+
+In case you encounter difficulties during the build process, you can consult the (general instructions for the ESP32)[https://github.com/micropython/micropython/tree/master/ports/esp32#micropython-port-to-the-esp32].
+
+First, clone the `ulab`, the `micropython`, as well as the `espressif` repositories:
+
+```bash
+export BUILD_DIR=$(pwd)
+
+git clone https://github.com/v923z/micropython-ulab.git ulab
+git clone https://github.com/micropython/micropython.git
+
+cd $BUILD_DIR/micropython/
+
+git clone -b v4.0.2 --recursive https://github.com/espressif/esp-idf.git
+
+```
+Also later releases of `esp-idf` are possible (e.g. `v4.2.1`).
+
+Then install the `ESP-IDF` tools:
+
+```bash
+cd esp-idf
+./install.sh
+. ./export.sh
+```
+
+Next, build the `micropython` cross-compiler, and the `ESP` sub-modules:
+
+```bash
+cd $BUILD_DIR/micropython/mpy-cross
+make
+cd $BUILD_DIR/micropython/ports/esp32
+make submodules
+```
+At this point, all requirements are installed and built. We can now compile the firmware with `ulab`. In `$BUILD_DIR/micropython/ports/esp32` create a `makefile` with the following content:
+
+```bash
+BOARD = GENERIC
+USER_C_MODULES = $(BUILD_DIR)/ulab/code/micropython.cmake
+
+include Makefile
+```
+You specify with the `BOARD` variable, what you want to compile for, a generic board, or `TINYPICO` (for `micropython` version >1.1.5, use `UM_TINYPICO`), etc. Still in `$BUILD_DIR/micropython/ports/esp32`, you can now run `make`.
+
+#### Compiling with make
+
+If your operating system does not support a recent enough version of `CMake`, you have to stay with `micropython` version 1.14. The firmware can be compiled either by downloading and running the [build script](https://github.com/v923z/micropython-ulab/blob/master/build/esp32.sh), or following the steps below:
+
+First, clone `ulab` with
+
+```bash
+git clone https://github.com/v923z/micropython-ulab.git ulab
+```
+
+and then, in the same directory, `micropython`
+
+```bash
+git clone https://github.com/micropython/micropython.git
+```
+
+At this point, you should have `ulab`, and `micropython` side by side.
+
+With version 1.14, `micropython` switched to `cmake` on the `ESP32` port, thus breaking compatibility with user modules. `ulab` can, however, still be compiled with version 1.14. You can check out a particular version by pinning the release tag as
+
+```bash
+
+cd ./micropython/
+git checkout tags/v1.14
+
+```
+Next, update the submodules,
+
+```bash
+git submodule update --init
+cd ./mpy-cross && make # build cross-compiler (required)
+```
+and find the ESP commit hash
+
+```bash
+cd ./micropython/ports/esp32
+make ESPIDF= # will display supported ESP-IDF commit hashes
+# output should look like: """
+# ...
+# Supported git hash (v3.3): 9e70825d1e1cbf7988cf36981774300066580ea7
+# Supported git hash (v4.0) (experimental): 4c81978a3e2220674a432a588292a4c860eef27b
+```
+
+Choose an ESPIDF version from one of the options printed by the previous command:
+
+```bash
+ESPIDF_VER=9e70825d1e1cbf7988cf36981774300066580ea7
+```
+
+In the `micropython` directory, create a new directory with
+```bash
+mkdir esp32
+```
+Your `micropython` directory should now look like
+
+```bash
+ls
+ACKNOWLEDGEMENTS    CONTRIBUTING.md  esp32     lib      mpy-cross  README.md
+CODECONVENTIONS.md  docs             examples  LICENSE  ports      tests
+CODEOFCONDUCT.md    drivers          extmod    logo     py         tools
+```
+
+In `./micropython/esp32`, download the software development kit with
+
+```bash
+git clone https://github.com/espressif/esp-idf.git esp-idf
+cd ./esp-idf
+git checkout $ESPIDF_VER
+git submodule update --init --recursive # get idf submodules
+pip install -r ./requirements.txt # install python reqs
+```
+
+Next, still staying in `./micropython/eps32/esd-idf/`, install the ESP32 compiler. If using an ESP-IDF version >= 4.x (chosen by `$ESPIDF_VER` above), this can be done by running `. $BUILD_DIR/esp-idf/install.sh`. Otherwise, for version 3.x, run the following commands in in `.micropython/esp32/esp-idf`:
+
+```bash
+# for 64 bit linux
+curl https://dl.espressif.com/dl/xtensa-esp32-elf-linux64-1.22.0-80-g6c4433a-5.2.0.tar.gz | tar xvz
+
+# for 32 bit
+# curl https://dl.espressif.com/dl/xtensa-esp32-elf-linux32-1.22.0-80-g6c4433a-5.2.0.tar.gz | tar xvz
+
+# don't worry about adding to path; we'll specify that later
+
+# also, see https://docs.espressif.com/projects/esp-idf/en/v3.3.2/get-started for more info
+```
+
+Finally, build the firmware:
+
+```bash
+cd ./micropython/ports/esp32
+# temporarily add esp32 compiler to path
+export PATH=../../esp32/esp-idf/xtensa-esp32-elf/bin:$PATH
+export ESPIDF=../../esp32/esp-idf # req'd by Makefile
+export BOARD=GENERIC # options are dirs in ./boards
+export USER_C_MODULES=../../../ulab # include ulab in firmware
+
+make submodules & make all
+```
+
+If it compiles without error, you can plug in your ESP32 via USB and then flash it with:
+
+```bash
+make erase && make deploy
+```
+
+#### What to do, if the firmware is too large?
+
+When selecting `BOARD=TINYPICO`, the firmware is built but fails to deploy, because it is too large for the standard partitions. We can rectify the problem by creating a new partition table. In order to do so, in `$BUILD_DIR/micropython/ports/esp32/`, copy the following 8 lines to a file named `partitions_ulab.cvs`:
+
+```
+# Notes: the offset of the partition table itself is set in
+# $ESPIDF/components/partition_table/Kconfig.projbuild and the
+# offset of the factory/ota_0 partition is set in makeimg.py
+# Name,   Type, SubType, Offset,  Size, Flags
+nvs,      data, nvs,     0x9000,  0x6000,
+phy_init, data, phy,     0xf000,  0x1000,
+factory,  app,  factory, 0x10000, 0x200000,
+vfs,      data, fat,     0x220000, 0x180000,
+```
+This expands the `factory` partition by 128 kB, and reduces the size of `vfs` by the same amount. Having defined the new partition table, we should extend `sdkconfig.board` by adding the following two lines:
+
+```
+CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions_ulab.csv"
+```
+This file can be found in `$BUILD_DIR/micropython/ports/esp32/boards/TINYPICO/`. Finally, run `make clean`, and `make`. The new firmware contains the modified partition table, and should fit on the microcontroller.
+
+### RP2-based boards
+
+RP2 firmware can be compiled either by downloading and running the single [build script](https://github.com/v923z/micropython-ulab/blob/master/build/rp2.sh)/[build script for Pico W](https://github.com/v923z/micropython-ulab/blob/master/build/rp2w.sh), or executing the commands below.
+
+First, clone `micropython`:
+
+```bash
+git clone https://github.com/micropython/micropython.git
+```
+
+Then, setup the required submodules:
+
+```bash
+cd micropython
+git submodule update --init lib/tinyusb
+git submodule update --init lib/pico-sdk
+cd lib/pico-sdk
+git submodule update --init lib/tinyusb
+```
+
+You'll also need to compile `mpy-cross`:
+
+```bash
+cd ../../mpy-cross
+make
+```
+
+That's all you need to do for the `micropython` repository. Now, let us clone `ulab` (in a directory outside the micropython repository):
+
+```bash
+cd ../../
+git clone https://github.com/v923z/micropython-ulab ulab
+```
+
+With this setup, we can now build the firmware. Back in the `micropython` repository, use these commands:
+
+```bash
+cd ports/rp2
+make USER_C_MODULES=/path/to/ulab/code/micropython.cmake
+```
+
+If `micropython` and `ulab` were in the same folder on the computer, you can set `USER_C_MODULES=../../../ulab/code/micropython.cmake`. The compiled firmware will be placed in `micropython/ports/rp2/build`.
+
+# Compiling for CircuitPython
+
+[Adafruit Industries](www.adafruit.com) always include a relatively recent version of `ulab` in their nightly builds. However, if you really need the bleeding edge, you can easily compile the firmware from the source. Simply clone `circuitpython`, and move the commit pointer to the latest version of `ulab` (`ulab` will automatically be cloned with `circuitpython`):
+
+```bash
+git clone https://github.com/adafruit/circuitpython.git
+
+cd circuitpyton/extmod/ulab
+
+# update ulab here
+git checkout master
+git pull
+```
+You might have to check, whether the `CIRCUITPY_ULAB` variable is set to `1` for the port that you want to compile for. You find this piece of information in the `make` fragment:
+
+```bash
+circuitpython/ports/port_of_your_choice/mpconfigport.mk
+```
+After this, you would run `make` with the single `BOARD` argument, e.g.:
+
+```bash
+make BOARD=mini_sam_m4
+```
+
+# Issues, contributing, and testing
+
+If you find a problem with the code, please, raise an [issue](https://github.com/v923z/micropython-ulab/issues)! An issue should address a single problem, and should contain a minimal code snippet that demonstrates the difference from the expected behaviour. Reducing a problem to the bare minimum significantly increases the chances of a quick fix.
+
+Feature requests (porting a particular function from `numpy` or `scipy`) should also be posted at [ulab issue](https://github.com/v923z/micropython-ulab/issues).
+
+Contributions of any kind are always welcome. If you feel like adding to the code, you can simply issue a pull request. If you do so, please, try to adhere to `micropython`'s [coding conventions](https://github.com/micropython/micropython/blob/master/CODECONVENTIONS.md#c-code-conventions).
+
+However, you can also contribute to the documentation (preferably via the [jupyter notebooks](https://github.com/v923z/micropython-ulab/tree/master/docs), or improve the [tests](https://github.com/v923z/micropython-ulab/tree/master/tests).
+
+## Testing
+
+If you decide to lend a hand with testing, here are the steps:
+
+1. Write a test script that checks a particular function, or a set of related functions!
+1. Drop this script in one of the folders in [ulab tests](https://github.com/v923z/micropython-ulab/tree/master/tests)!
+1. Run the [./build.sh](https://github.com/v923z/micropython-ulab/blob/master/build.sh) script in the root directory of `ulab`! This will clone the latest `micropython`, compile the firmware for `unix`, execute all scripts in the `ulab/tests`, and compare the results to those in the expected results files, which are also in `ulab/tests`, and have an extension `.exp`. In case you have a new snippet, i.e., you have no expected results file, or if the results differ from those in the expected file, a new expected file will be generated in the root directory. You should inspect the contents of this file, and if they are satisfactory, then the file can be moved to the `ulab/tests` folder, alongside your snippet.
diff --git a/tulip/shared/ulab/build-cp.sh b/tulip/shared/ulab/build-cp.sh
new file mode 100755
index 000000000..9081a618b
--- /dev/null
+++ b/tulip/shared/ulab/build-cp.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+set -e
+# POSIX compliant version
+readlinkf_posix() {
+  [ "${1:-}" ] || return 1
+  max_symlinks=40
+  CDPATH='' # to avoid changing to an unexpected directory
+
+  target=$1
+  [ -e "${target%/}" ] || target=${1%"${1##*[!/]}"} # trim trailing slashes
+  [ -d "${target:-/}" ] && target="$target/"
+
+  cd -P . 2>/dev/null || return 1
+  while [ "$max_symlinks" -ge 0 ] && max_symlinks=$((max_symlinks - 1)); do
+    if [ ! "$target" = "${target%/*}" ]; then
+      case $target in
+        /*) cd -P "${target%/*}/" 2>/dev/null || break ;;
+        *) cd -P "./${target%/*}" 2>/dev/null || break ;;
+      esac
+      target=${target##*/}
+    fi
+
+    if [ ! -L "$target" ]; then
+      target="${PWD%/}${target:+/}${target}"
+      printf '%s\n' "${target:-/}"
+      return 0
+    fi
+
+    # `ls -dl` format: "%s %u %s %s %u %s %s -> %s\n",
+    #   <file mode>, <number of links>, <owner name>, <group name>,
+    #   <size>, <date and time>, <pathname of link>, <contents of link>
+    # https://pubs.opengroup.org/onlinepubs/9699919799/utilities/ls.html
+    link=$(ls -dl -- "$target" 2>/dev/null) || break
+    target=${link#*" $target -> "}
+  done
+  return 1
+}
+NPROC=$(python3 -c 'import multiprocessing; print(multiprocessing.cpu_count())')
+HERE="$(dirname -- "$(readlinkf_posix -- "${0}")" )"
+[ -e circuitpython/py/py.mk ] || (git clone --branch main https://github.com/adafruit/circuitpython && cd circuitpython && make fetch-all-submodules && git submodule update --init lib/uzlib tools)
+rm -rf circuitpython/extmod/ulab; ln -s "$HERE" circuitpython/extmod/ulab
+dims=${1-2}
+make -C circuitpython/mpy-cross -j$NPROC
+make -k -C circuitpython/ports/unix -j$NPROC DEBUG=1 MICROPY_PY_FFI=0 MICROPY_PY_BTREE=0 MICROPY_SSL_AXTLS=0 MICROPY_PY_USSL=0 CFLAGS_EXTRA="-Wno-tautological-constant-out-of-range-compare -Wno-unknown-pragmas -DULAB_MAX_DIMS=$dims" BUILD=build-$dims PROG=micropython-$dims
+
+# bash test-common.sh "${dims}" "circuitpython/ports/unix/micropython-$dims"
+
+# Docs don't depend on the dimensionality, so only do it once
+if [ "$dims" -eq 2 ]; then
+    (cd circuitpython && sphinx-build -E -W -b html . _build/html)
+    (cd circuitpython && make check-stubs)
+fi
diff --git a/tulip/shared/ulab/build.sh b/tulip/shared/ulab/build.sh
new file mode 100755
index 000000000..7927d4ac7
--- /dev/null
+++ b/tulip/shared/ulab/build.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+
+GIT_HASH=`git describe --abbrev=8 --always`
+
+# POSIX compliant version
+readlinkf_posix() {
+  [ "${1:-}" ] || return 1
+  max_symlinks=40
+  CDPATH='' # to avoid changing to an unexpected directory
+
+  target=$1
+  [ -e "${target%/}" ] || target=${1%"${1##*[!/]}"} # trim trailing slashes
+  [ -d "${target:-/}" ] && target="$target/"
+
+  cd -P . 2>/dev/null || return 1
+  while [ "$max_symlinks" -ge 0 ] && max_symlinks=$((max_symlinks - 1)); do
+    if [ ! "$target" = "${target%/*}" ]; then
+      case $target in
+        /*) cd -P "${target%/*}/" 2>/dev/null || break ;;
+        *) cd -P "./${target%/*}" 2>/dev/null || break ;;
+      esac
+      target=${target##*/}
+    fi
+
+    if [ ! -L "$target" ]; then
+      target="${PWD%/}${target:+/}${target}"
+      printf '%s\n' "${target:-/}"
+      return 0
+    fi
+
+    # `ls -dl` format: "%s %u %s %s %u %s %s -> %s\n",
+    #   <file mode>, <number of links>, <owner name>, <group name>,
+    #   <size>, <date and time>, <pathname of link>, <contents of link>
+    # https://pubs.opengroup.org/onlinepubs/9699919799/utilities/ls.html
+    link=$(ls -dl -- "$target" 2>/dev/null) || break
+    target=${link#*" $target -> "}
+  done
+  return 1
+}
+NPROC=`python3 -c 'import multiprocessing; print(multiprocessing.cpu_count())'`
+PLATFORM=`python3 -c 'import sys; print(sys.platform)'`
+set -e
+HERE="$(dirname -- "$(readlinkf_posix -- "${0}")" )"
+dims=${1-2}
+if [ ! -d "micropython" ] ; then
+  git clone https://github.com/micropython/micropython
+else
+  git -C micropython pull
+fi
+make -C micropython/mpy-cross -j${NPROC}
+make -C micropython/ports/unix submodules
+make -C micropython/ports/unix -j${NPROC} USER_C_MODULES="${HERE}" DEBUG=1 STRIP=: MICROPY_PY_FFI=0 MICROPY_PY_BTREE=0 CFLAGS_EXTRA=-DULAB_MAX_DIMS=$dims CFLAGS_EXTRA+=-DULAB_HASH=$GIT_HASH BUILD=build-$dims PROG=micropython-$dims
+
+PROG="micropython/ports/unix/build-$dims/micropython-$dims"
+if [ ! -e "$PROG" ]; then
+  # Older MicroPython revision, executable is still in ports/unix.
+  PROG="micropython/ports/unix/micropython-$dims"
+fi
+
+bash test-common.sh "${dims}" "$PROG"
+
+# Build with single-precision float.
+make -C micropython/ports/unix -j${NPROC} USER_C_MODULES="${HERE}" DEBUG=1 STRIP=: MICROPY_PY_FFI=0 MICROPY_PY_BTREE=0 CFLAGS_EXTRA=-DMICROPY_FLOAT_IMPL=MICROPY_FLOAT_IMPL_FLOAT CFLAGS_EXTRA+=-DULAB_MAX_DIMS=$dims CFLAGS_EXTRA+=-DULAB_HASH=$GIT_HASH BUILD=build-nanbox-$dims PROG=micropython-nanbox-$dims
+
+# The unix nanbox variant builds as a 32-bit executable and requires gcc-multilib.
+# macOS doesn't support i386 builds so only build on linux.
+if [ $PLATFORM = linux ]; then
+    make -C micropython/ports/unix -j${NPROC} VARIANT=nanbox USER_C_MODULES="${HERE}" DEBUG=1 STRIP=: MICROPY_PY_FFI=0 MICROPY_PY_BTREE=0 CFLAGS_EXTRA=-DULAB_MAX_DIMS=$dims CFLAGS_EXTRA+=-DULAB_HASH=$GIT_HASH BUILD=build-nanbox-$dims PROG=micropython-nanbox-$dims
+fi
+
diff --git a/tulip/shared/ulab/build/esp32-cmake.sh b/tulip/shared/ulab/build/esp32-cmake.sh
new file mode 100644
index 000000000..0093c5b7a
--- /dev/null
+++ b/tulip/shared/ulab/build/esp32-cmake.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+export BUILD_DIR=$(pwd)
+
+echo "--- CLONING ULAB ---"
+git clone --depth 1 https://github.com/v923z/micropython-ulab.git ulab
+
+echo "--- CLONING MICROPYTHON ---"
+git clone --depth 1 https://github.com/micropython/micropython.git
+
+echo "--- CLONING ESP-IDF ---"
+cd $BUILD_DIR/micropython/
+git clone --depth 1 -b v4.0.2 --recursive https://github.com/espressif/esp-idf.git
+
+echo "--- INSTALL ESP-IDF ---"
+cd $BUILD_DIR/micropython/esp-idf
+./install.sh
+. ./export.sh
+
+echo "--- MPY-CROSS ---"
+cd $BUILD_DIR/micropython/mpy-cross
+make
+
+echo "--- ESP32 SUBMODULES ---"
+cd $BUILD_DIR/micropython/ports/esp32
+make submodules
+
+echo "--- PATCH MAKEFILE ---"
+cp $BUILD_DIR/micropython/ports/esp32/Makefile $BUILD_DIR/micropython/ports/esp32/MakefileOld
+echo "BOARD = GENERIC" > $BUILD_DIR/micropython/ports/esp32/Makefile
+echo "USER_C_MODULES = \$(BUILD_DIR)/ulab/code/micropython.cmake" >> $BUILD_DIR/micropython/ports/esp32/Makefile
+cat $BUILD_DIR/micropython/ports/esp32/MakefileOld >> $BUILD_DIR/micropython/ports/esp32/Makefile
+
+echo "--- MAKE ---"
+make
diff --git a/tulip/shared/ulab/build/esp32.sh b/tulip/shared/ulab/build/esp32.sh
new file mode 100644
index 000000000..d5571cdeb
--- /dev/null
+++ b/tulip/shared/ulab/build/esp32.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+export BUILD_DIR=$(pwd)
+
+git clone https://github.com/v923z/micropython-ulab.git ulab
+git clone https://github.com/micropython/micropython.git
+
+cd $BUILD_DIR/micropython/
+git checkout tags/v1.14
+
+git submodule update --init
+cd ./mpy-cross && make # build cross-compiler (required)
+
+cd $BUILD_DIR/micropython/ports/esp32
+make ESPIDF= # will display supported ESP-IDF commit hashes
+# output should look like: """
+# ...
+# Supported git hash (v3.3): 9e70825d1e1cbf7988cf36981774300066580ea7
+# Supported git hash (v4.0) (experimental): 4c81978a3e2220674a432a588292a4c860eef27b
+
+ESPIDF_VER=9e70825d1e1cbf7988cf36981774300066580ea7
+
+mkdir $BUILD_DIR/micropython/esp32
+
+cd $BUILD_DIR/micropython/esp32
+git clone https://github.com/espressif/esp-idf.git esp-idf
+cd $BUILD_DIR/micropython/esp32/esp-idf
+git checkout $ESPIDF_VER
+git submodule update --init --recursive # get idf submodules
+pip install -r ./requirements.txt # install python reqs
+
+curl https://dl.espressif.com/dl/xtensa-esp32-elf-linux64-1.22.0-80-g6c4433a-5.2.0.tar.gz | tar xvz
+
+cd $BUILD_DIR/micropython/ports/esp32
+# temporarily add esp32 compiler to path
+export PATH=$BUILD_DIR/micropython/esp32/esp-idf/xtensa-esp32-elf/bine:$PATH
+export ESPIDF=$BUILD_DIR/micropython/esp32/esp-idf
+export BOARD=GENERIC # board options are in ./board
+export USER_C_MODULES=$BUILD_DIR/ulab # include ulab in firmware
+
+make submodules & make all
diff --git a/tulip/shared/ulab/build/rp2.sh b/tulip/shared/ulab/build/rp2.sh
new file mode 100644
index 000000000..e89ab13c4
--- /dev/null
+++ b/tulip/shared/ulab/build/rp2.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+export BUILD_DIR=$(pwd)
+export MPY_DIR=$BUILD_DIR/micropython
+export ULAB_DIR=$BUILD_DIR/../code
+
+if [ ! -d $ULAB_DIR ]; then
+    printf "Cloning ulab\n"
+    ULAB_DIR=$BUILD_DIR/ulab/code
+    git clone https://github.com/v923z/micropython-ulab.git ulab
+fi
+
+if [ ! -d $MPY_DIR ]; then
+    printf "Cloning MicroPython\n"
+    git clone https://github.com/micropython/micropython.git micropython
+fi
+
+cd $MPY_DIR
+git submodule update --init
+cd ./mpy-cross && make # build cross-compiler (required)
+
+cd $MPY_DIR/ports/rp2
+rm -r build
+make USER_C_MODULES=$ULAB_DIR/micropython.cmake
diff --git a/tulip/shared/ulab/build/rp2w.sh b/tulip/shared/ulab/build/rp2w.sh
new file mode 100644
index 000000000..ada620ad8
--- /dev/null
+++ b/tulip/shared/ulab/build/rp2w.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+export BOARD=RPI_PICO_W
+export BUILD_DIR=$(pwd)
+export MPY_DIR=$BUILD_DIR/micropython
+export ULAB_DIR=$BUILD_DIR/../code
+
+if [ ! -d $ULAB_DIR ]; then
+    printf "Cloning ulab\n"
+    ULAB_DIR=$BUILD_DIR/ulab/code
+    git clone https://github.com/v923z/micropython-ulab.git ulab
+fi
+
+if [ ! -d $MPY_DIR ]; then
+    printf "Cloning MicroPython\n"
+    git clone https://github.com/micropython/micropython.git micropython
+fi
+
+cd $MPY_DIR
+git submodule update --init
+cd ./mpy-cross && make # build cross-compiler (required)
+
+cd $MPY_DIR/ports/rp2
+make BOARD=$BOARD clean
+make USER_C_MODULES=$ULAB_DIR/micropython.cmake BOARD=$BOARD
diff --git a/tulip/shared/ulab/code/micropython.cmake b/tulip/shared/ulab/code/micropython.cmake
new file mode 100644
index 000000000..66890c0db
--- /dev/null
+++ b/tulip/shared/ulab/code/micropython.cmake
@@ -0,0 +1,18 @@
+add_library(usermod_ulab INTERFACE)
+
+file(GLOB_RECURSE ULAB_SOURCES ${CMAKE_CURRENT_LIST_DIR}/*.c)
+
+target_sources(usermod_ulab INTERFACE
+    ${ULAB_SOURCES}
+)
+
+target_include_directories(usermod_ulab INTERFACE
+    ${CMAKE_CURRENT_LIST_DIR}
+)
+
+target_compile_definitions(usermod_ulab INTERFACE
+    MODULE_ULAB_ENABLED=1
+)
+
+target_link_libraries(usermod INTERFACE usermod_ulab)
+
diff --git a/tulip/shared/ulab/code/micropython.mk b/tulip/shared/ulab/code/micropython.mk
new file mode 100644
index 000000000..e835d87b2
--- /dev/null
+++ b/tulip/shared/ulab/code/micropython.mk
@@ -0,0 +1,42 @@
+
+USERMODULES_DIR := $(USERMOD_DIR)
+
+# Add all C files to SRC_USERMOD.
+SRC_USERMOD += $(USERMODULES_DIR)/scipy/integrate/integrate.c
+SRC_USERMOD += $(USERMODULES_DIR)/scipy/linalg/linalg.c
+SRC_USERMOD += $(USERMODULES_DIR)/scipy/optimize/optimize.c
+SRC_USERMOD += $(USERMODULES_DIR)/scipy/signal/signal.c
+SRC_USERMOD += $(USERMODULES_DIR)/scipy/special/special.c
+SRC_USERMOD += $(USERMODULES_DIR)/ndarray_operators.c
+SRC_USERMOD += $(USERMODULES_DIR)/ulab_tools.c
+SRC_USERMOD += $(USERMODULES_DIR)/ndarray.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/ndarray/ndarray_iter.c
+SRC_USERMOD += $(USERMODULES_DIR)/ndarray_properties.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/approx.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/bitwise.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/compare.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/carray/carray.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/carray/carray_tools.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/create.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/fft/fft.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/fft/fft_tools.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/filter.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/io/io.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/linalg/linalg.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/linalg/linalg_tools.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/numerical.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/poly.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/random/random.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/stats.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/transform.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/vector.c
+
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/numpy.c
+SRC_USERMOD += $(USERMODULES_DIR)/scipy/scipy.c
+SRC_USERMOD += $(USERMODULES_DIR)/user/user.c
+SRC_USERMOD += $(USERMODULES_DIR)/utils/utils.c
+SRC_USERMOD += $(USERMODULES_DIR)/ulab.c
+
+CFLAGS_USERMOD += -I$(USERMODULES_DIR)
+
+override CFLAGS_EXTRA += -DMODULE_ULAB_ENABLED=1
diff --git a/tulip/shared/ulab/code/ndarray.c b/tulip/shared/ulab/code/ndarray.c
new file mode 100644
index 000000000..26ced6fc0
--- /dev/null
+++ b/tulip/shared/ulab/code/ndarray.c
@@ -0,0 +1,2145 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2024 Zoltán Vörös
+ *               2020 Jeff Epler for Adafruit Industries
+ *               2020 Taku Fukada
+*/
+
+#include <unistd.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/runtime.h"
+#include "py/binary.h"
+#include "py/obj.h"
+#include "py/objtuple.h"
+#include "py/objint.h"
+
+#include "ulab_tools.h"
+#include "ndarray.h"
+#include "ndarray_operators.h"
+#include "numpy/carray/carray.h"
+#include "numpy/carray/carray_tools.h"
+
+mp_uint_t ndarray_print_threshold = NDARRAY_PRINT_THRESHOLD;
+mp_uint_t ndarray_print_edgeitems = NDARRAY_PRINT_EDGEITEMS;
+
+//| """Manipulate numeric data similar to numpy
+//|
+//| `ulab` is a numpy-like module for micropython, meant to simplify and
+//| speed up common mathematical operations on arrays. The primary goal was to
+//| implement a small subset of numpy that might be useful in the context of a
+//| microcontroller. This means low-level data processing of linear (array) and
+//| two-dimensional (matrix) data.
+//|
+//| `ulab` is adapted from micropython-ulab, and the original project's
+//| documentation can be found at
+//| https://micropython-ulab.readthedocs.io/en/latest/
+//|
+//| `ulab` is modeled after numpy, and aims to be a compatible subset where
+//| possible.  Numpy's documentation can be found at
+//| https://docs.scipy.org/doc/numpy/index.html"""
+//|
+
+void ndarray_set_complex_value(void *p, size_t index, mp_obj_t value) {
+    mp_float_t real, imag;
+    if(mp_obj_is_type(value, &mp_type_complex)) {
+        mp_obj_get_complex(value, &real, &imag);
+        ((mp_float_t *)p)[2 * index] = real;
+        ((mp_float_t *)p)[2 * index + 1] = imag;
+    } else {
+        real = mp_obj_get_float(value);
+        ((mp_float_t *)p)[2 * index] = real;
+        ((mp_float_t *)p)[2 * index + 1] = MICROPY_FLOAT_CONST(0.0);
+    }
+}
+
+void ndarray_fill_array_iterable(mp_float_t *array, mp_obj_t iterable) {
+    mp_obj_iter_buf_t x_buf;
+    mp_obj_t x_item, x_iterable = mp_getiter(iterable, &x_buf);
+    while ((x_item = mp_iternext(x_iterable)) != MP_OBJ_STOP_ITERATION) {
+        *array++ = (mp_float_t)mp_obj_get_float(x_item);
+    }
+}
+
+#if ULAB_HAS_FUNCTION_ITERATOR
+size_t *ndarray_new_coords(uint8_t ndim) {
+    size_t *coords = m_new0(size_t, ndim);
+    return coords;
+}
+
+void ndarray_rewind_array(uint8_t ndim, uint8_t *array, size_t *shape, int32_t *strides, size_t *coords) {
+    // resets the data pointer of a single array, whenever an axis is full
+    // since we always iterate over the very last axis, we have to keep track of
+    // the last ndim-2 axes only
+    array -= shape[ULAB_MAX_DIMS - 1] * strides[ULAB_MAX_DIMS - 1];
+    array += strides[ULAB_MAX_DIMS - 2];
+    for(uint8_t i=1; i < ndim-1; i++) {
+        coords[ULAB_MAX_DIMS - 1 - i] += 1;
+        if(coords[ULAB_MAX_DIMS - 1 - i] == shape[ULAB_MAX_DIMS - 1 - i]) { // we are at a dimension boundary
+            array -= shape[ULAB_MAX_DIMS - 1 - i] * strides[ULAB_MAX_DIMS - 1 - i];
+            array += strides[ULAB_MAX_DIMS - 2 - i];
+            coords[ULAB_MAX_DIMS - 1 - i] = 0;
+            coords[ULAB_MAX_DIMS - 2 - i] += 1;
+        } else { // coordinates can change only, if the last coordinate changes
+            return;
+        }
+    }
+}
+#endif
+
+static int32_t *strides_from_shape(size_t *shape, uint8_t dtype) {
+    // returns a strides array that corresponds to a dense array with the prescribed shape
+    int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+    strides[ULAB_MAX_DIMS-1] = (int32_t)ulab_binary_get_size(dtype);
+    for(uint8_t i=ULAB_MAX_DIMS; i > 1; i--) {
+        strides[i-2] = strides[i-1] * shape[i-1];
+    }
+    return strides;
+}
+
+size_t *ndarray_shape_vector(size_t a, size_t b, size_t c, size_t d) {
+    // returns a ULAB_MAX_DIMS-aware array of shapes
+    // WARNING: this assumes that the maximum possible dimension is 4!
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    shape[ULAB_MAX_DIMS - 1] = d;
+    #if ULAB_MAX_DIMS > 1
+    shape[ULAB_MAX_DIMS - 2] = c;
+    #endif
+    #if ULAB_MAX_DIMS > 2
+    shape[ULAB_MAX_DIMS - 3] = b;
+    #endif
+    #if ULAB_MAX_DIMS > 3
+    shape[ULAB_MAX_DIMS - 4] = a;
+    #endif
+    return shape;
+}
+
+bool ndarray_object_is_array_like(mp_obj_t o_in) {
+    if(mp_obj_is_type(o_in, &ulab_ndarray_type) ||
+      mp_obj_is_type(o_in, &mp_type_tuple) ||
+      mp_obj_is_type(o_in, &mp_type_list) ||
+      mp_obj_is_type(o_in, &mp_type_range)) {
+        return true;
+    }
+    return false;
+}
+
+void fill_array_iterable(mp_float_t *array, mp_obj_t iterable) {
+    mp_obj_iter_buf_t x_buf;
+    mp_obj_t x_item, x_iterable = mp_getiter(iterable, &x_buf);
+    size_t i=0;
+    while ((x_item = mp_iternext(x_iterable)) != MP_OBJ_STOP_ITERATION) {
+        array[i] = (mp_float_t)mp_obj_get_float(x_item);
+        i++;
+    }
+}
+
+#if NDARRAY_HAS_DTYPE
+#if ULAB_HAS_DTYPE_OBJECT
+void ndarray_dtype_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
+    (void)kind;
+    dtype_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    mp_print_str(print, "dtype('");
+    if(self->dtype == NDARRAY_BOOLEAN) {
+        mp_print_str(print, "bool')");
+    } else if(self->dtype == NDARRAY_UINT8) {
+        mp_print_str(print, "uint8')");
+    } else if(self->dtype == NDARRAY_INT8) {
+        mp_print_str(print, "int8')");
+    } else if(self->dtype == NDARRAY_UINT16) {
+        mp_print_str(print, "uint16')");
+    } else if(self->dtype == NDARRAY_INT16) {
+        mp_print_str(print, "int16')");
+    }
+    #if ULAB_SUPPORTS_COMPLEX
+    else if(self->dtype == NDARRAY_COMPLEX) {
+        mp_print_str(print, "complex')");
+    }
+    #endif
+    else {
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        mp_print_str(print, "float32')");
+        #else
+        mp_print_str(print, "float64')");
+        #endif
+    }
+}
+
+mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+    (void) type;
+    mp_arg_check_num(n_args, n_kw, 0, 1, true);
+    mp_map_t kw_args;
+    mp_map_init_fixed_table(&kw_args, n_kw, args + n_args);
+
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+    mp_arg_val_t _args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, args, &kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, _args);
+
+    dtype_obj_t *dtype = m_new_obj(dtype_obj_t);
+    dtype->base.type = &ulab_dtype_type;
+
+    if(mp_obj_is_type(args[0], &ulab_ndarray_type)) {
+        // return the dtype of the array
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0]);
+        dtype->dtype = ndarray->dtype;
+    } else {
+        uint8_t _dtype;
+        if(mp_obj_is_int(_args[0].u_obj)) {
+            _dtype = mp_obj_get_int(_args[0].u_obj);
+            if((_dtype != NDARRAY_BOOL) && (_dtype != NDARRAY_UINT8)
+                && (_dtype != NDARRAY_INT8) && (_dtype != NDARRAY_UINT16)
+                && (_dtype != NDARRAY_INT16) && (_dtype != NDARRAY_FLOAT)) {
+                mp_raise_TypeError(MP_ERROR_TEXT("data type not understood"));
+            }
+        } else {
+            GET_STR_DATA_LEN(_args[0].u_obj, _dtype_, len);
+            if(memcmp(_dtype_, "uint8", 5) == 0) {
+                _dtype = NDARRAY_UINT8;
+            } else if(memcmp(_dtype_, "int8", 4) == 0) {
+                _dtype = NDARRAY_INT8;
+            } else if(memcmp(_dtype_, "uint16", 6) == 0) {
+                _dtype = NDARRAY_UINT16;
+            } else if(memcmp(_dtype_, "int16", 5) == 0) {
+                _dtype = NDARRAY_INT16;
+            } else if(memcmp(_dtype_, "float", 5) == 0) {
+                _dtype = NDARRAY_FLOAT;
+            }
+            #if ULAB_SUPPORTS_COMPLEX
+            else if(memcmp(_dtype_, "complex", 7) == 0) {
+                _dtype = NDARRAY_COMPLEX;
+            }
+            #endif
+            else {
+                mp_raise_TypeError(MP_ERROR_TEXT("data type not understood"));
+            }
+        }
+        dtype->dtype = _dtype;
+    }
+    return MP_OBJ_FROM_PTR(dtype);
+}
+
+mp_obj_t ndarray_dtype(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    dtype_obj_t *dtype = m_new_obj(dtype_obj_t);
+    dtype->base.type = &ulab_dtype_type;
+    dtype->dtype = self->dtype;
+    return MP_OBJ_FROM_PTR(dtype);
+}
+
+#else
+// this is the cheap implementation of tbe dtype
+mp_obj_t ndarray_dtype(mp_obj_t self_in) {
+    uint8_t dtype;
+    if(mp_obj_is_type(self_in, &ulab_ndarray_type)) {
+        ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+        dtype = self->dtype;
+    } else { // we assume here that the input is a single character
+        GET_STR_DATA_LEN(self_in, _dtype, len);
+        if((len != 1) || ((*_dtype != NDARRAY_BOOL) && (*_dtype != NDARRAY_UINT8)
+            && (*_dtype != NDARRAY_INT8) && (*_dtype != NDARRAY_UINT16)
+            && (*_dtype != NDARRAY_INT16) && (*_dtype != NDARRAY_FLOAT)
+            #if ULAB_SUPPORTS_COMPLEX
+                && (*_dtype != NDARRAY_COMPLEX)
+            #endif
+        )) {
+            mp_raise_TypeError(MP_ERROR_TEXT("data type not understood"));
+        }
+        dtype = *_dtype;
+    }
+    return mp_obj_new_int(dtype);
+}
+#endif /* ULAB_HAS_DTYPE_OBJECT */
+#endif /* NDARRAY_HAS_DTYPE */
+
+#if ULAB_HAS_PRINTOPTIONS
+mp_obj_t ndarray_set_printoptions(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_threshold, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+        { MP_QSTR_edgeitems, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    if(args[0].u_obj != mp_const_none) {
+        ndarray_print_threshold = mp_obj_get_int(args[0].u_obj);
+    }
+    if(args[1].u_obj != mp_const_none) {
+        ndarray_print_edgeitems = mp_obj_get_int(args[1].u_obj);
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(ndarray_set_printoptions_obj, 0, ndarray_set_printoptions);
+
+mp_obj_t ndarray_get_printoptions(void) {
+    mp_obj_t dict = mp_obj_new_dict(2);
+    mp_obj_dict_store(MP_OBJ_FROM_PTR(dict), MP_OBJ_NEW_QSTR(MP_QSTR_threshold), mp_obj_new_int(ndarray_print_threshold));
+    mp_obj_dict_store(MP_OBJ_FROM_PTR(dict), MP_OBJ_NEW_QSTR(MP_QSTR_edgeitems), mp_obj_new_int(ndarray_print_edgeitems));
+    return dict;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_0(ndarray_get_printoptions_obj, ndarray_get_printoptions);
+#endif
+
+mp_obj_t ndarray_get_item(ndarray_obj_t *ndarray, void *array) {
+    // returns a proper micropython object from an array
+    if(!ndarray->boolean) {
+        #if ULAB_SUPPORTS_COMPLEX
+        if(ndarray->dtype == NDARRAY_COMPLEX) {
+            mp_float_t *c = (mp_float_t *)array;
+            mp_float_t real = *c++;
+            mp_float_t imag = *c;
+            return mp_obj_new_complex(real, imag);
+        }
+        #endif
+        return mp_binary_get_val_array(ndarray->dtype, array, 0);
+    } else {
+        if(*(uint8_t *)array) {
+            return mp_const_true;
+        } else {
+            return mp_const_false;
+        }
+    }
+}
+
+static void ndarray_print_element(const mp_print_t *print, ndarray_obj_t *ndarray, uint8_t *array) {
+    #if ULAB_SUPPORTS_COMPLEX
+        if(ndarray->dtype == NDARRAY_COMPLEX) {
+            // real part first
+            mp_float_t fvalue = *(mp_float_t *)array;
+            mp_obj_print_helper(print, mp_obj_new_float(fvalue), PRINT_REPR);
+            // imaginary part
+            array += ndarray->itemsize / 2;
+            fvalue = *(mp_float_t *)array;
+            if(fvalue >= MICROPY_FLOAT_CONST(0.0) || isnan(fvalue)) {
+                mp_print_str(print, "+");
+            }
+            array += ndarray->itemsize / 2;
+            mp_obj_print_helper(print, mp_obj_new_float(fvalue), PRINT_REPR);
+            mp_print_str(print, "j");
+        } else {
+            mp_obj_print_helper(print, ndarray_get_item(ndarray, array), PRINT_REPR);
+        }
+    #else
+        mp_obj_print_helper(print, ndarray_get_item(ndarray, array), PRINT_REPR);
+    #endif
+}
+
+static void ndarray_print_row(const mp_print_t *print, ndarray_obj_t *ndarray, uint8_t *array, int32_t stride, size_t n) {
+    if(n == 0) {
+        return;
+    }
+    mp_print_str(print, "[");
+    if((n <= ndarray_print_threshold) || (n <= 2*ndarray_print_edgeitems)) { // if the array is short, print everything
+        ndarray_print_element(print, ndarray, array);
+        array += stride;
+        for(size_t i=1; i < n; i++, array += stride) {
+            mp_print_str(print, ", ");
+            ndarray_print_element(print, ndarray, array);
+        }
+    } else {
+        ndarray_print_element(print, ndarray, array);
+        array += stride;
+        for(size_t i=1; i < ndarray_print_edgeitems; i++, array += stride) {
+            mp_print_str(print, ", ");
+            ndarray_print_element(print, ndarray, array);
+        }
+        mp_printf(print, ", ..., ");
+        array += stride * (n - 2 * ndarray_print_edgeitems);
+        ndarray_print_element(print, ndarray, array);
+        array += stride;
+        for(size_t i=1; i < ndarray_print_edgeitems; i++, array += stride) {
+            mp_print_str(print, ", ");
+            ndarray_print_element(print, ndarray, array);
+        }
+    }
+    mp_print_str(print, "]");
+}
+
+#if ULAB_MAX_DIMS > 1
+static void ndarray_print_bracket(const mp_print_t *print, const size_t condition, const size_t shape, const char *string) {
+    if(condition < shape) {
+        mp_print_str(print, string);
+    }
+}
+#endif
+
+void ndarray_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
+    (void)kind;
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    uint8_t *array = (uint8_t *)self->array;
+    mp_print_str(print, "array(");
+    if(self->len == 0) {
+        mp_print_str(print, "[]");
+        if(self->ndim > 1) {
+            mp_print_str(print, ", shape=(");
+            #if ULAB_MAX_DIMS > 1
+            for(uint8_t ndim = self->ndim; ndim > 1; ndim--) {
+                mp_printf(MP_PYTHON_PRINTER, "%d,", self->shape[ULAB_MAX_DIMS - ndim]);
+            }
+            #else
+            mp_printf(MP_PYTHON_PRINTER, "%d,", self->shape[0]);
+            #endif
+            mp_printf(MP_PYTHON_PRINTER, "%d)", self->shape[ULAB_MAX_DIMS - 1]);
+        }
+    } else {
+        #if ULAB_MAX_DIMS > 3
+        size_t i=0;
+        ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-4], "[");
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-3], "[");
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-2], "[");
+                do {
+                #endif
+                    ndarray_print_row(print, self, array, self->strides[ULAB_MAX_DIMS-1], self->shape[ULAB_MAX_DIMS-1]);
+                #if ULAB_MAX_DIMS > 1
+                    array += self->strides[ULAB_MAX_DIMS-2];
+                    k++;
+                    ndarray_print_bracket(print, k, self->shape[ULAB_MAX_DIMS-2], ",\n       ");
+                } while(k < self->shape[ULAB_MAX_DIMS-2]);
+                ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-2], "]");
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                j++;
+                ndarray_print_bracket(print, j, self->shape[ULAB_MAX_DIMS-3], ",\n\n       ");
+                array -= self->strides[ULAB_MAX_DIMS-2] * self->shape[ULAB_MAX_DIMS-2];
+                array += self->strides[ULAB_MAX_DIMS-3];
+            } while(j < self->shape[ULAB_MAX_DIMS-3]);
+            ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-3], "]");
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= self->strides[ULAB_MAX_DIMS-3] * self->shape[ULAB_MAX_DIMS-3];
+            array += self->strides[ULAB_MAX_DIMS-4];
+            i++;
+            ndarray_print_bracket(print, i, self->shape[ULAB_MAX_DIMS-4], ",\n\n       ");
+        } while(i < self->shape[ULAB_MAX_DIMS-4]);
+        ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-4], "]");
+        #endif
+    }
+    mp_print_str(print, ", dtype=");
+    if(self->boolean) {
+        mp_print_str(print, "bool)");
+    } else if(self->dtype == NDARRAY_UINT8) {
+        mp_print_str(print, "uint8)");
+    } else if(self->dtype == NDARRAY_INT8) {
+        mp_print_str(print, "int8)");
+    } else if(self->dtype == NDARRAY_UINT16) {
+        mp_print_str(print, "uint16)");
+    } else if(self->dtype == NDARRAY_INT16) {
+        mp_print_str(print, "int16)");
+    }
+    #if ULAB_SUPPORTS_COMPLEX
+    else if(self->dtype == NDARRAY_COMPLEX) {
+        mp_print_str(print, "complex)");
+    }
+    #endif /* ULAB_SUPPORTS_COMPLEX */
+    else {
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        mp_print_str(print, "float32)");
+        #else
+        mp_print_str(print, "float64)");
+        #endif
+    }
+}
+
+void ndarray_assign_elements(ndarray_obj_t *ndarray, mp_obj_t iterable, uint8_t dtype, size_t *idx) {
+    // assigns a single row in the tensor
+    mp_obj_t item;
+    if(ndarray->boolean) {
+        uint8_t *array = (uint8_t *)ndarray->array;
+        array += *idx;
+        while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+            if(mp_obj_is_true(item)) {
+                *array = 1;
+            }
+            array++;
+            (*idx)++;
+        }
+    } else {
+        while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+            #if ULAB_SUPPORTS_COMPLEX
+                mp_float_t real;
+                mp_float_t imag;
+                if(dtype == NDARRAY_COMPLEX) {
+                    mp_obj_get_complex(item, &real, &imag);
+                    ndarray_set_value(NDARRAY_FLOAT, ndarray->array, (*idx)++, mp_obj_new_float(real));
+                    ndarray_set_value(NDARRAY_FLOAT, ndarray->array, (*idx)++, mp_obj_new_float(imag));
+                } else {
+                    ndarray_set_value(dtype, ndarray->array, (*idx)++, item);
+                }
+            #else
+                ndarray_set_value(dtype, ndarray->array, (*idx)++, item);
+            #endif
+        }
+    }
+}
+
+bool ndarray_is_dense(ndarray_obj_t *ndarray) {
+    // returns true, if the array is dense, false otherwise
+    // the array should be dense, if the very first stride can be calculated from shape
+    int32_t stride = ndarray->itemsize;
+    for(uint8_t i = ULAB_MAX_DIMS - 1; i > ULAB_MAX_DIMS-ndarray->ndim; i--) {
+        stride *= ndarray->shape[i];
+    }
+    return stride == ndarray->strides[ULAB_MAX_DIMS-ndarray->ndim] ? true : false;
+}
+
+static size_t multiply_size(size_t a, size_t b) {
+    size_t result;
+    if (__builtin_mul_overflow(a, b, &result)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("array is too big"));
+    }
+    return result;
+}
+
+ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t dtype, uint8_t *buffer) {
+    // Creates the base ndarray with shape, and initialises the values to straight 0s
+    // optionally, values can be supplied via the last argument
+    ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
+    ndarray->base.type = &ulab_ndarray_type;
+    ndarray->dtype = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
+    ndarray->boolean = dtype == NDARRAY_BOOL ? NDARRAY_BOOLEAN : NDARRAY_NUMERIC;
+    ndarray->ndim = ndim;
+    ndarray->len = ndim == 0 ? 0 : 1;
+    ndarray->itemsize = ulab_binary_get_size(dtype);
+    int32_t *_strides;
+    if(strides == NULL) {
+        _strides = strides_from_shape(shape, ndarray->dtype);
+    } else {
+        _strides = strides;
+    }
+    for(uint8_t i=ULAB_MAX_DIMS; i > ULAB_MAX_DIMS-ndim; i--) {
+        ndarray->shape[i-1] = shape[i-1];
+        ndarray->strides[i-1] = _strides[i-1];
+        ndarray->len = multiply_size(ndarray->len, shape[i-1]);
+    }
+
+    if (SIZE_MAX / ndarray->itemsize <= ndarray->len) {
+      mp_raise_ValueError(MP_ERROR_TEXT("ndarray length overflows"));
+    }
+
+    // if the length is 0, still allocate a single item, so that contractions can be handled
+    size_t len = multiply_size(ndarray->itemsize, MAX(1, ndarray->len));
+    uint8_t *array;
+    array = buffer;
+    if(array == NULL) {
+        // this should set all elements to 0, irrespective of the of the dtype (all bits are zero)
+        // we could, perhaps, leave this step out, and initialise the array only, when needed
+        array = m_new0(byte, len);
+    }
+    ndarray->array = array;
+    ndarray->origin = array;
+    return ndarray;
+}
+
+ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t ndim, size_t *shape, uint8_t dtype) {
+    // creates a dense array, i.e., one, where the strides are derived directly from the shapes
+    // the function should work in the general n-dimensional case
+    // int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+    // strides[ULAB_MAX_DIMS - 1] = (int32_t)ulab_binary_get_size(dtype);
+    // for(size_t i = ULAB_MAX_DIMS; i > 1; i--) {
+    //     strides[i-2] = strides[i-1] * MAX(1, shape[i-1]);
+    // }
+    return ndarray_new_ndarray(ndim, shape, NULL, dtype, NULL);
+}
+
+ndarray_obj_t *ndarray_new_ndarray_from_tuple(mp_obj_tuple_t *_shape, uint8_t dtype) {
+    // creates a dense array from a tuple
+    // the function should work in the general n-dimensional case
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    for(size_t i = 0; i < _shape->len; i++) {
+        shape[ULAB_MAX_DIMS - 1 - i] = mp_obj_get_int(_shape->items[_shape->len - 1 - i]);
+    }
+    return ndarray_new_dense_ndarray(_shape->len, shape, dtype);
+}
+
+void ndarray_copy_array(ndarray_obj_t *source, ndarray_obj_t *target, uint8_t shift) {
+    // TODO: if the array is dense, the content could be copied in a single pass
+    // copies the content of source->array into a new dense void pointer
+    // it is assumed that the dtypes in source and target are the same
+    // Since the target is a new array, it is supposed to be dense
+    uint8_t *sarray = (uint8_t *)source->array;
+    uint8_t *tarray = (uint8_t *)target->array;
+    #if ULAB_SUPPORTS_COMPLEX
+    if(source->dtype == NDARRAY_COMPLEX) {
+        sarray += shift;
+    }
+    #endif
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    memcpy(tarray, sarray, target->itemsize);
+                    tarray += target->itemsize;
+                    sarray += source->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                sarray += source->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+            sarray += source->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+        sarray += source->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+}
+
+ndarray_obj_t *ndarray_new_view(ndarray_obj_t *source, uint8_t ndim, size_t *shape, int32_t *strides, int32_t offset) {
+    // creates a new view from the input arguments
+    ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
+    ndarray->base.type = &ulab_ndarray_type;
+    ndarray->boolean = source->boolean;
+    ndarray->dtype = source->dtype;
+    ndarray->ndim = ndim;
+    ndarray->itemsize = source->itemsize;
+    ndarray->len = ndim == 0 ? 0 : 1;
+    for(uint8_t i=ULAB_MAX_DIMS; i > ULAB_MAX_DIMS-ndim; i--) {
+        ndarray->shape[i-1] = shape[i-1];
+        ndarray->strides[i-1] = strides[i-1];
+        ndarray->len *= shape[i-1];
+    }
+    uint8_t *pointer = (uint8_t *)source->array;
+    pointer += offset;
+    ndarray->array = pointer;
+    ndarray->origin = source->origin;
+    return ndarray;
+}
+
+ndarray_obj_t *ndarray_copy_view(ndarray_obj_t *source) {
+    // creates a one-to-one deep copy of the input ndarray or its view
+    // the function should work in the general n-dimensional case
+    // In order to make it dtype-agnostic, we copy the memory content
+    // instead of reading out the values
+
+    int32_t *strides = strides_from_shape(source->shape, source->dtype);
+
+    uint8_t dtype = source->dtype;
+    if(source->boolean) {
+        dtype = NDARRAY_BOOL;
+    }
+    ndarray_obj_t *ndarray = ndarray_new_ndarray(source->ndim, source->shape, strides, dtype, NULL);
+    ndarray_copy_array(source, ndarray, 0);
+    return ndarray;
+}
+
+ndarray_obj_t *ndarray_copy_view_convert_type(ndarray_obj_t *source, uint8_t dtype) {
+    // creates a copy, similar to ndarray_copy_view, but it also converts the dtype, if necessary
+    if(dtype == source->dtype) {
+        return ndarray_copy_view(source);
+    }
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, dtype);
+    uint8_t *sarray = (uint8_t *)source->array;
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    #if ULAB_SUPPORTS_COMPLEX
+    uint8_t complex_size = 2 * sizeof(mp_float_t);
+    #endif
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    mp_obj_t item;
+                    #if ULAB_SUPPORTS_COMPLEX
+                    if(source->dtype == NDARRAY_COMPLEX) {
+                        if(dtype != NDARRAY_COMPLEX) {
+                            mp_raise_TypeError(MP_ERROR_TEXT("cannot convert complex type"));
+                        } else {
+                            memcpy(array, sarray, complex_size);
+                        }
+                    } else {
+                    #endif
+                        if((source->dtype == NDARRAY_FLOAT) && (dtype != NDARRAY_FLOAT)) {
+                            // floats must be treated separately, because they can't directly be converted to integer types
+                            mp_float_t f = ndarray_get_float_value(sarray, source->dtype);
+                            item = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(f));
+                        } else {
+                            item = mp_binary_get_val_array(source->dtype, sarray, 0);
+                        }
+                    #if ULAB_SUPPORTS_COMPLEX
+                        if(dtype == NDARRAY_COMPLEX) {
+                            ndarray_set_value(NDARRAY_FLOAT, array, 0, item);
+                        } else {
+                            ndarray_set_value(dtype, array, 0, item);
+                        }
+                    }
+                    #else
+                    ndarray_set_value(dtype, array, 0, item);
+                    #endif
+                    array += ndarray->itemsize;
+                    sarray += source->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                sarray += source->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+            sarray += source->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+        sarray += source->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+    return ndarray;
+}
+
+#if NDARRAY_HAS_BYTESWAP
+mp_obj_t ndarray_byteswap(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // changes the endiannes of an array
+    // if the dtype of the input uint8/int8/bool, simply return a copy or view
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_inplace, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_FALSE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(args[0].u_obj);
+    ndarray_obj_t *ndarray = NULL;
+    if(args[1].u_obj == mp_const_false) {
+        ndarray = ndarray_copy_view(self);
+    } else {
+        ndarray = ndarray_new_view(self, self->ndim, self->shape, self->strides, 0);
+    }
+    if((self->dtype == NDARRAY_BOOL) || (self->dtype == NDARRAY_UINT8) || (self->dtype == NDARRAY_INT8)) {
+        return MP_OBJ_FROM_PTR(ndarray);
+    } else {
+        uint8_t *array = (uint8_t *)ndarray->array;
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        if(self->dtype == NDARRAY_FLOAT) {
+                            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+                            SWAP(uint8_t, array[0], array[3]);
+                            SWAP(uint8_t, array[1], array[2]);
+                            #else
+                            SWAP(uint8_t, array[0], array[7]);
+                            SWAP(uint8_t, array[1], array[6]);
+                            SWAP(uint8_t, array[2], array[5]);
+                            SWAP(uint8_t, array[3], array[4]);
+                            #endif
+                        } else {
+                            SWAP(uint8_t, array[0], array[1]);
+                        }
+                        array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < ndarray->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                    array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < ndarray->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+                array += ndarray->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < ndarray->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+            array += ndarray->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < ndarray->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(ndarray_byteswap_obj, 1, ndarray_byteswap);
+#endif
+
+#if NDARRAY_HAS_COPY
+mp_obj_t ndarray_copy(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    return MP_OBJ_FROM_PTR(ndarray_copy_view(self));
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_copy_obj, ndarray_copy);
+#endif
+
+ndarray_obj_t *ndarray_new_linear_array(size_t len, uint8_t dtype) {
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    if(len == 0) {
+        return ndarray_new_dense_ndarray(0, shape, dtype);
+    }
+    shape[ULAB_MAX_DIMS-1] = len;
+    return ndarray_new_dense_ndarray(1, shape, dtype);
+}
+
+ndarray_obj_t *ndarray_from_iterable(mp_obj_t obj, uint8_t dtype) {
+    // returns an ndarray from an iterable micropython object
+    // if the input is an ndarray, returns the input...
+    if(mp_obj_is_type(obj, &ulab_ndarray_type)) {
+        return MP_OBJ_TO_PTR(obj);
+    }
+    // ... otherwise, takes the values from the iterable, and creates the corresponding ndarray
+
+    // First, we have to figure out, whether the elements of the iterable are iterables themself
+    uint8_t ndim = 0;
+    size_t shape[ULAB_MAX_DIMS];
+    mp_obj_iter_buf_t iter_buf[ULAB_MAX_DIMS];
+    mp_obj_t iterable[ULAB_MAX_DIMS];
+    // inspect only the very first element in each dimension; this is fast,
+    // but not completely safe, e.g., length compatibility is not checked
+    mp_obj_t item = obj;
+
+    while(1) {
+        if(mp_obj_len_maybe(item) == MP_OBJ_NULL) {
+            break;
+        }
+        if(ndim == ULAB_MAX_DIMS) {
+            mp_raise_ValueError(MP_ERROR_TEXT("too many dimensions"));
+        }
+        shape[ndim] = MP_OBJ_SMALL_INT_VALUE(mp_obj_len_maybe(item));
+        if(shape[ndim] == 0) {
+            ndim++;
+            break;
+        }
+        iterable[ndim] = mp_getiter(item, &iter_buf[ndim]);
+        item = mp_iternext(iterable[ndim]);
+        ndim++;
+    }
+    for(uint8_t i = 0; i < ndim; i++) {
+        // align all values to the right
+        shape[ULAB_MAX_DIMS - i - 1] = shape[ndim - 1 - i];
+    }
+
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);
+    item = obj;
+    for(uint8_t i = 0; i < ndim - 1; i++) {
+        // if ndim > 1, descend into the hierarchy
+        iterable[ULAB_MAX_DIMS - ndim + i] = mp_getiter(item, &iter_buf[ULAB_MAX_DIMS - ndim + i]);
+        item = mp_iternext(iterable[ULAB_MAX_DIMS - ndim + i]);
+    }
+
+    size_t idx = 0;
+    // TODO: this could surely be done in a more elegant way...
+    #if ULAB_MAX_DIMS > 3
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            do {
+            #endif
+                iterable[ULAB_MAX_DIMS - 1] = mp_getiter(item, &iter_buf[ULAB_MAX_DIMS - 1]);
+                ndarray_assign_elements(ndarray, iterable[ULAB_MAX_DIMS - 1], ndarray->dtype, &idx);
+            #if ULAB_MAX_DIMS > 1
+                item = ndim > 1 ? mp_iternext(iterable[ULAB_MAX_DIMS - 2]) : MP_OBJ_STOP_ITERATION;
+            } while(item != MP_OBJ_STOP_ITERATION);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            item = ndim > 2 ? mp_iternext(iterable[ULAB_MAX_DIMS - 3]) : MP_OBJ_STOP_ITERATION;
+            if(item != MP_OBJ_STOP_ITERATION) {
+                iterable[ULAB_MAX_DIMS - 2] = mp_getiter(item, &iter_buf[ULAB_MAX_DIMS - 2]);
+                item = mp_iternext(iterable[ULAB_MAX_DIMS - 2]);
+            } else {
+                iterable[ULAB_MAX_DIMS - 2] = MP_OBJ_STOP_ITERATION;
+            }
+        } while(iterable[ULAB_MAX_DIMS - 2] != MP_OBJ_STOP_ITERATION);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        item = ndim > 3 ? mp_iternext(iterable[ULAB_MAX_DIMS - 4]) : MP_OBJ_STOP_ITERATION;
+        if(item != MP_OBJ_STOP_ITERATION) {
+            iterable[ULAB_MAX_DIMS - 3] = mp_getiter(item, &iter_buf[ULAB_MAX_DIMS - 3]);
+            item = mp_iternext(iterable[ULAB_MAX_DIMS - 3]);
+        } else {
+            iterable[ULAB_MAX_DIMS - 3] = MP_OBJ_STOP_ITERATION;
+        }
+    } while(iterable[ULAB_MAX_DIMS - 3] != MP_OBJ_STOP_ITERATION);
+    #endif
+
+    return ndarray;
+}
+
+static uint8_t ndarray_init_helper(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_INT(NDARRAY_FLOAT) } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    uint8_t _dtype;
+    #if ULAB_HAS_DTYPE_OBJECT
+    if(mp_obj_is_type(args[1].u_obj, &ulab_dtype_type)) {
+        dtype_obj_t *dtype = MP_OBJ_TO_PTR(args[1].u_obj);
+        _dtype = dtype->dtype;
+    } else { // this must be an integer defined as a class constant (ulab.numpy.uint8 etc.)
+        _dtype = mp_obj_get_int(args[1].u_obj);
+    }
+    #else
+    _dtype = mp_obj_get_int(args[1].u_obj);
+    #endif
+    return _dtype;
+}
+
+static mp_obj_t ndarray_make_new_core(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args, mp_map_t *kw_args) {
+    uint8_t dtype = ndarray_init_helper(n_args, args, kw_args);
+
+    if(mp_obj_is_type(args[0], &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0]);
+        return MP_OBJ_FROM_PTR(ndarray_copy_view_convert_type(source, dtype));
+    } else {
+        // assume that the input is an iterable
+        return MP_OBJ_FROM_PTR(ndarray_from_iterable(args[0], dtype));
+    }
+}
+
+mp_obj_t ndarray_array_constructor(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // array constructor for ndarray, equivalent to numpy.array(...)
+    return ndarray_make_new_core(&ulab_ndarray_type, n_args, kw_args->used, pos_args, kw_args);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(ndarray_array_constructor_obj, 1, ndarray_array_constructor);
+
+mp_obj_t ndarray_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+    (void) type;
+    mp_arg_check_num(n_args, n_kw, 1, 2, true);
+    mp_map_t kw_args;
+    mp_map_init_fixed_table(&kw_args, n_kw, args + n_args);
+    return ndarray_make_new_core(type, n_args, n_kw, args, &kw_args);
+}
+
+// broadcasting is used at a number of places, always include
+bool ndarray_can_broadcast(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t *ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+    // Returns true or false, depending on, whether the two arrays can be broadcast together
+    // with numpy's broadcasting rules. These are as follows:
+    //
+    // 1. the two shapes are either equal
+    // 2. one of the shapes is 1
+
+    lstrides[ULAB_MAX_DIMS - 1] = lhs->strides[ULAB_MAX_DIMS - 1];
+    rstrides[ULAB_MAX_DIMS - 1] = rhs->strides[ULAB_MAX_DIMS - 1];
+    for(uint8_t i=ULAB_MAX_DIMS; i > 0; i--) {
+        if((lhs->shape[i-1] == rhs->shape[i-1]) || (lhs->shape[i-1] == 0) || (lhs->shape[i-1] == 1) ||
+        (rhs->shape[i-1] == 0) || (rhs->shape[i-1] == 1)) {
+            shape[i-1] = MAX(lhs->shape[i-1], rhs->shape[i-1]);
+            if(shape[i-1] > 0) (*ndim)++;
+            if(lhs->shape[i-1] < 2) {
+                lstrides[i-1] = 0;
+            } else {
+                lstrides[i-1] = lhs->strides[i-1];
+            }
+            if(rhs->shape[i-1] < 2) {
+                rstrides[i-1] = 0;
+            } else {
+                rstrides[i-1] = rhs->strides[i-1];
+            }
+        } else {
+            return false;
+        }
+    }
+    return true;
+}
+
+#if NDARRAY_HAS_INPLACE_OPS
+bool ndarray_can_broadcast_inplace(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides) {
+    // returns true or false, depending on, whether the two arrays can be broadcast together inplace
+    // this means that the right hand side always must be "smaller" than the left hand side, i.e.
+    // the broadcasting rules are as follows:
+    //
+    // 1. the two shapes are either equal
+    // 2. the shapes on the right hand side is 1
+
+    rstrides[ULAB_MAX_DIMS - 1] = rhs->strides[ULAB_MAX_DIMS - 1];
+    for(uint8_t i=ULAB_MAX_DIMS; i > 0; i--) {
+        if((lhs->shape[i-1] == rhs->shape[i-1]) || (rhs->shape[i-1] == 0) || (rhs->shape[i-1] == 1)) {
+            if(rhs->shape[i-1] < 2) {
+                rstrides[i-1] = 0;
+            } else {
+                rstrides[i-1] = rhs->strides[i-1];
+            }
+        } else {
+            return false;
+        }
+    }
+    return true;
+}
+#endif
+
+#if NDARRAY_IS_SLICEABLE
+static size_t slice_length(mp_bound_slice_t slice) {
+    ssize_t len, correction = 1;
+    if(slice.step > 0) correction = -1;
+    len = (ssize_t)(slice.stop - slice.start + (slice.step + correction)) / slice.step;
+    if(len < 0) return 0;
+    return (size_t)len;
+}
+
+static mp_bound_slice_t generate_slice(mp_int_t n, mp_obj_t index) {
+    mp_bound_slice_t slice;
+    if(mp_obj_is_type(index, &mp_type_slice)) {
+        mp_obj_slice_indices(index, n, &slice);
+    } else if(mp_obj_is_int(index)) {
+        mp_int_t _index = mp_obj_get_int(index);
+        if(_index < 0) {
+            _index += n;
+        }
+        if((_index >= n) || (_index < 0)) {
+            mp_raise_msg(&mp_type_IndexError, MP_ERROR_TEXT("index is out of bounds"));
+        }
+        slice.start = _index;
+        slice.stop = _index + 1;
+        slice.step = 1;
+    } else {
+        mp_raise_msg(&mp_type_IndexError, MP_ERROR_TEXT("indices must be integers, slices, or Boolean lists"));
+    }
+    return slice;
+}
+
+static ndarray_obj_t *ndarray_view_from_slices(ndarray_obj_t *ndarray, mp_obj_tuple_t *tuple) {
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+
+    uint8_t ndim = ndarray->ndim;
+
+    for(uint8_t i=0; i < ndim; i++) {
+        // copy from the end
+        shape[ULAB_MAX_DIMS - 1 - i] = ndarray->shape[ULAB_MAX_DIMS  - 1 - i];
+        strides[ULAB_MAX_DIMS - 1 - i] = ndarray->strides[ULAB_MAX_DIMS  - 1 - i];
+    }
+    int32_t offset = 0;
+    for(uint8_t i=0; i  < tuple->len; i++) {
+        if(mp_obj_is_int(tuple->items[i])) {
+            // if item is an int, the dimension will first be reduced ...
+            ndim--;
+            int32_t k = mp_obj_get_int(tuple->items[i]);
+            if(k < 0) {
+                k += ndarray->shape[ULAB_MAX_DIMS - ndarray->ndim + i];
+            }
+            if((k >= (int32_t)ndarray->shape[ULAB_MAX_DIMS - ndarray->ndim + i]) || (k < 0)) {
+                mp_raise_msg(&mp_type_IndexError, MP_ERROR_TEXT("index is out of bounds"));
+            }
+            offset += ndarray->strides[ULAB_MAX_DIMS - ndarray->ndim + i] * k;
+            // ... and then we have to shift the shapes to the right
+            for(uint8_t j=0; j < i; j++) {
+                shape[ULAB_MAX_DIMS - ndarray->ndim + i - j] = shape[ULAB_MAX_DIMS - ndarray->ndim + i - j - 1];
+                strides[ULAB_MAX_DIMS - ndarray->ndim + i - j] = strides[ULAB_MAX_DIMS - ndarray->ndim + i - j - 1];
+            }
+        } else {
+            mp_bound_slice_t slice = generate_slice(shape[ULAB_MAX_DIMS - ndarray->ndim + i], tuple->items[i]);
+            shape[ULAB_MAX_DIMS - ndarray->ndim + i] = slice_length(slice);
+            offset += ndarray->strides[ULAB_MAX_DIMS - ndarray->ndim + i] * (int32_t)slice.start;
+            strides[ULAB_MAX_DIMS - ndarray->ndim + i] = (int32_t)slice.step * ndarray->strides[ULAB_MAX_DIMS - ndarray->ndim + i];
+        }
+    }
+    return ndarray_new_view(ndarray, ndim, shape, strides, offset);
+}
+
+void ndarray_assign_view(ndarray_obj_t *view, ndarray_obj_t *values) {
+    if(values->len == 0) {
+        return;
+    }
+    uint8_t ndim = 0;
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    int32_t *lstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    int32_t *rstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    if(!ndarray_can_broadcast(view, values, &ndim, shape, lstrides, rstrides)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("operands could not be broadcast together"));
+    } else {
+
+        ndarray_obj_t *ndarray = ndarray_copy_view_convert_type(values, view->dtype);
+        // re-calculate rstrides, since the copy operation might have changed the directions of the strides
+        ndarray_can_broadcast(view, ndarray, &ndim, shape, lstrides, rstrides);
+        uint8_t *rarray = (uint8_t *)ndarray->array;
+
+
+        uint8_t *larray = (uint8_t *)view->array;
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        memcpy(larray, rarray, view->itemsize);
+                        larray += lstrides[ULAB_MAX_DIMS - 1];
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l <  view->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    larray -= lstrides[ULAB_MAX_DIMS - 1] * view->shape[ULAB_MAX_DIMS-1];
+                    larray += lstrides[ULAB_MAX_DIMS - 2];
+                    rarray -= rstrides[ULAB_MAX_DIMS - 1] * view->shape[ULAB_MAX_DIMS-1];
+                    rarray += rstrides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k <  view->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                larray -= lstrides[ULAB_MAX_DIMS - 2] * view->shape[ULAB_MAX_DIMS-2];
+                larray += lstrides[ULAB_MAX_DIMS - 3];
+                rarray -= rstrides[ULAB_MAX_DIMS - 2] * view->shape[ULAB_MAX_DIMS-2];
+                rarray += rstrides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j <  view->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            larray -= lstrides[ULAB_MAX_DIMS - 3] * view->shape[ULAB_MAX_DIMS-3];
+            larray += lstrides[ULAB_MAX_DIMS - 4];
+            rarray -= rstrides[ULAB_MAX_DIMS - 3] * view->shape[ULAB_MAX_DIMS-3];
+            rarray += rstrides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i <  view->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+    }
+
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(int32_t, lstrides, ULAB_MAX_DIMS);
+    m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+
+    return;
+}
+
+static mp_obj_t ndarray_from_boolean_index(ndarray_obj_t *ndarray, ndarray_obj_t *index) {
+    // returns a 1D array, indexed by a Boolean array
+    if(ndarray->len != index->len) {
+        mp_raise_ValueError(MP_ERROR_TEXT("array and index length must be equal"));
+    }
+    uint8_t *iarray = (uint8_t *)index->array;
+    // first we have to find out how many trues there are
+    size_t count = 0;
+    for(size_t i=0; i < index->len; i++) {
+        count += *iarray;
+        iarray += index->strides[ULAB_MAX_DIMS - 1];
+    }
+    ndarray_obj_t *results = ndarray_new_linear_array(count, ndarray->dtype);
+    uint8_t *rarray = (uint8_t *)results->array;
+    uint8_t *array = (uint8_t *)ndarray->array;
+    // re-wind the index array
+    iarray = index->array;
+    for(size_t i=0; i < index->len; i++) {
+        if(*iarray) {
+            memcpy(rarray, array, results->itemsize);
+            rarray += results->itemsize;
+            count++;
+        }
+        array += ndarray->strides[ULAB_MAX_DIMS - 1];
+        iarray += index->strides[ULAB_MAX_DIMS - 1];
+    }
+    return MP_OBJ_FROM_PTR(results);
+}
+
+static mp_obj_t ndarray_assign_from_boolean_index(ndarray_obj_t *ndarray, ndarray_obj_t *index, ndarray_obj_t *values) {
+    // assigns values to a Boolean-indexed array
+    // first we have to find out how many trues there are
+    uint8_t *iarray = (uint8_t *)index->array;
+    size_t istride = index->strides[ULAB_MAX_DIMS - 1];
+    size_t count = 0;
+    for(size_t i=0; i < index->len; i++) {
+        count += *iarray;
+        iarray += istride;
+    }
+    // re-wind the index array
+    iarray = index->array;
+    uint8_t *varray = (uint8_t *)values->array;
+    size_t vstride;
+
+    if(count == values->len) {
+        // there are as many values as true indices
+        vstride = values->strides[ULAB_MAX_DIMS - 1];
+    } else {
+        // there is a single value
+        vstride = 0;
+    }
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if(values->dtype == NDARRAY_COMPLEX) {
+        if(ndarray->dtype != NDARRAY_COMPLEX) {
+            mp_raise_TypeError(MP_ERROR_TEXT("cannot convert complex to dtype"));
+        } else {
+            uint8_t *array = (uint8_t *)ndarray->array;
+            for(size_t i = 0; i < ndarray->len; i++) {
+                if(*iarray) {
+                    memcpy(array, varray, ndarray->itemsize);
+                    varray += vstride;
+                }
+                array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                iarray += istride;
+            } while(0);
+            return MP_OBJ_FROM_PTR(ndarray);
+        }
+    }
+    #endif
+
+    int32_t lstrides = ndarray->strides[ULAB_MAX_DIMS - 1] / ndarray->itemsize;
+
+    if(ndarray->dtype == NDARRAY_UINT8) {
+        if(values->dtype == NDARRAY_UINT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint8_t, uint8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint8_t, int8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_UINT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint8_t, uint16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint8_t, int16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_FLOAT) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint8_t, mp_float_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        }
+    } else if(ndarray->dtype == NDARRAY_INT8) {
+        if(values->dtype == NDARRAY_UINT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(int8_t, uint8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(int8_t, int8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_UINT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(int8_t, uint16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(int8_t, int16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_FLOAT) {
+            BOOLEAN_ASSIGNMENT_LOOP(int8_t, mp_float_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        }
+    } else if(ndarray->dtype == NDARRAY_UINT16) {
+        if(values->dtype == NDARRAY_UINT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint16_t, uint8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint16_t, int8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_UINT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint16_t, uint16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint16_t, int16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_FLOAT) {
+            BOOLEAN_ASSIGNMENT_LOOP(uint16_t, mp_float_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        }
+    } else if(ndarray->dtype == NDARRAY_INT16) {
+        if(values->dtype == NDARRAY_UINT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(int16_t, uint8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(int16_t, int8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_UINT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(int16_t, uint16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(int16_t, int16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_FLOAT) {
+            BOOLEAN_ASSIGNMENT_LOOP(int16_t, mp_float_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        }
+    } else {
+        #if ULAB_SUPPORTS_COMPLEX
+        if(ndarray->dtype == NDARRAY_COMPLEX) {
+            lstrides *= 2;
+        }
+        #endif
+        if(values->dtype == NDARRAY_UINT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, uint8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT8) {
+            BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, int8_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_UINT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, uint16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_INT16) {
+            BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, int16_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        } else if(values->dtype == NDARRAY_FLOAT) {
+            BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, mp_float_t, ndarray, lstrides, iarray, istride, varray, vstride);
+        }
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+static mp_obj_t ndarray_get_slice(ndarray_obj_t *ndarray, mp_obj_t index, ndarray_obj_t *values) {
+    if(mp_obj_is_type(index, &ulab_ndarray_type)) {
+        ndarray_obj_t *nindex = MP_OBJ_TO_PTR(index);
+        if((nindex->ndim > 1) || (nindex->boolean == false)) {
+            mp_raise_NotImplementedError(MP_ERROR_TEXT("operation is implemented for 1D Boolean arrays only"));
+        }
+        if(values == NULL) { // return value(s)
+            return ndarray_from_boolean_index(ndarray, nindex);
+        } else { // assign value(s)
+            ndarray_assign_from_boolean_index(ndarray, nindex, values);
+        }
+    }
+    if(mp_obj_is_type(index, &mp_type_tuple) || mp_obj_is_int(index) || mp_obj_is_type(index, &mp_type_slice)) {
+        mp_obj_tuple_t *tuple;
+        if(mp_obj_is_type(index, &mp_type_tuple)) {
+            tuple = MP_OBJ_TO_PTR(index);
+            if(tuple->len > ndarray->ndim) {
+                mp_raise_msg(&mp_type_IndexError, MP_ERROR_TEXT("too many indices"));
+            }
+        } else {
+            mp_obj_t *items = m_new(mp_obj_t, 1);
+            items[0] = index;
+            tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(1, items));
+        }
+        ndarray_obj_t *view = ndarray_view_from_slices(ndarray, tuple);
+        if(values == NULL) { // return value(s)
+            // if the view has been reduced to nothing, return a single value
+            if(view->ndim == 0) {
+                return ndarray_get_item(view, view->array);
+            } else {
+                return MP_OBJ_FROM_PTR(view);
+            }
+        } else { // assign value(s)
+            ndarray_assign_view(view, values);
+        }
+    }
+    return mp_const_none;
+}
+
+mp_obj_t ndarray_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
+    if(value == MP_OBJ_NULL) {
+        mp_raise_ValueError(MP_ERROR_TEXT("cannot delete array elements"));
+    }
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+
+    if (value == MP_OBJ_SENTINEL) { // return value(s)
+        return ndarray_get_slice(self, index, NULL);
+    } else { // assignment to slices; the value must be an ndarray, or a scalar
+        ndarray_obj_t *values = ndarray_from_mp_obj(value, 0);
+        return ndarray_get_slice(self, index, values);
+    }
+    return mp_const_none;
+}
+#endif /* NDARRAY_IS_SLICEABLE */
+
+#if NDARRAY_IS_ITERABLE
+
+// itarray iterator
+mp_obj_t ndarray_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {
+    return ndarray_new_ndarray_iterator(o_in, iter_buf);
+}
+
+typedef struct _mp_obj_ndarray_it_t {
+    mp_obj_base_t base;
+    mp_fun_1_t iternext;
+    mp_obj_t ndarray;
+    size_t cur;
+} mp_obj_ndarray_it_t;
+
+mp_obj_t ndarray_iternext(mp_obj_t self_in) {
+    mp_obj_ndarray_it_t *self = MP_OBJ_TO_PTR(self_in);
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(self->ndarray);
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    size_t iter_end = ndarray->shape[ULAB_MAX_DIMS-ndarray->ndim];
+    if(self->cur < iter_end) {
+        // separating this case out saves 50 bytes for 1D arrays
+        #if ULAB_MAX_DIMS == 1
+        array += self->cur * ndarray->strides[0];
+        self->cur++;
+        return ndarray_get_item(ndarray, array);
+        #else
+        if(ndarray->ndim == 1) { // we have a linear array
+            array += self->cur * ndarray->strides[ULAB_MAX_DIMS - 1];
+            self->cur++;
+            return ndarray_get_item(ndarray, array);
+        } else { // we have a tensor, return the reduced view
+            size_t offset = self->cur * ndarray->strides[ULAB_MAX_DIMS - ndarray->ndim];
+            self->cur++;
+            return MP_OBJ_FROM_PTR(ndarray_new_view(ndarray, ndarray->ndim-1, ndarray->shape, ndarray->strides, offset));
+        }
+        #endif
+    } else {
+        return MP_OBJ_STOP_ITERATION;
+    }
+}
+
+mp_obj_t ndarray_new_ndarray_iterator(mp_obj_t ndarray, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(mp_obj_ndarray_it_t) <= sizeof(mp_obj_iter_buf_t));
+    mp_obj_ndarray_it_t *iter = (mp_obj_ndarray_it_t *)iter_buf;
+    iter->base.type = &mp_type_polymorph_iter;
+    iter->iternext = ndarray_iternext;
+    iter->ndarray = ndarray;
+    iter->cur = 0;
+    return MP_OBJ_FROM_PTR(iter);
+}
+#endif /* NDARRAY_IS_ITERABLE */
+
+#if NDARRAY_HAS_FLATTEN
+mp_obj_t ndarray_flatten(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_order, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_QSTR(MP_QSTR_C)} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(pos_args[0]);
+    GET_STR_DATA_LEN(args[0].u_obj, order, len);
+    if((len != 1) || ((memcmp(order, "C", 1) != 0) && (memcmp(order, "F", 1) != 0))) {
+        mp_raise_ValueError(MP_ERROR_TEXT("flattening order must be either 'C', or 'F'"));
+    }
+
+    uint8_t *sarray = (uint8_t *)self->array;
+    ndarray_obj_t *ndarray = ndarray_new_linear_array(self->len, self->dtype);
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    if(memcmp(order, "C", 1) == 0) { // C-type ordering
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        memcpy(array, sarray, self->itemsize);
+                        array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                        sarray += self->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l <  self->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    sarray -= self->strides[ULAB_MAX_DIMS - 1] * self->shape[ULAB_MAX_DIMS-1];
+                    sarray += self->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k <  self->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                sarray -= self->strides[ULAB_MAX_DIMS - 2] * self->shape[ULAB_MAX_DIMS-2];
+                sarray += self->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j <  self->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            sarray -= self->strides[ULAB_MAX_DIMS - 3] * self->shape[ULAB_MAX_DIMS-3];
+            sarray += self->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i <  self->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+    } else { // 'F', Fortran-type ordering
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        memcpy(array, sarray, self->itemsize);
+                        array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                        sarray += self->strides[0];
+                        l++;
+                    } while(l < self->shape[0]);
+                #if ULAB_MAX_DIMS > 1
+                    sarray -= self->strides[0] * self->shape[0];
+                    sarray += self->strides[1];
+                    k++;
+                } while(k < self->shape[1]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                sarray -= self->strides[1] * self->shape[1];
+                sarray += self->strides[2];
+                j++;
+            } while(j < self->shape[2]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            sarray -= self->strides[2] * self->shape[2];
+            sarray += self->strides[3];
+            i++;
+        } while(i < self->shape[3]);
+        #endif
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(ndarray_flatten_obj, 1, ndarray_flatten);
+#endif
+
+#if NDARRAY_HAS_ITEMSIZE
+mp_obj_t ndarray_itemsize(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    return MP_OBJ_NEW_SMALL_INT(self->itemsize);
+}
+#endif
+
+#if NDARRAY_HAS_SHAPE
+mp_obj_t ndarray_shape(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    uint8_t nitems = MAX(1, self->ndim);
+    mp_obj_t *items = m_new(mp_obj_t, nitems);
+    for(uint8_t i = 0; i < nitems; i++) {
+        items[nitems - i - 1] = mp_obj_new_int(self->shape[ULAB_MAX_DIMS - i - 1]);
+    }
+    mp_obj_t tuple = mp_obj_new_tuple(nitems, items);
+    m_del(mp_obj_t, items, nitems);
+    return tuple;
+}
+#endif
+
+#if NDARRAY_HAS_SIZE
+mp_obj_t ndarray_size(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    return mp_obj_new_int(self->len);
+}
+#endif
+
+#if NDARRAY_HAS_STRIDES
+mp_obj_t ndarray_strides(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    mp_obj_t *items = m_new(mp_obj_t, self->ndim);
+    for(int8_t i=0; i < self->ndim; i++) {
+        items[i] = mp_obj_new_int(self->strides[ULAB_MAX_DIMS - self->ndim + i]);
+    }
+    mp_obj_t tuple = mp_obj_new_tuple(self->ndim, items);
+    m_del(mp_obj_t, items, self->ndim);
+    return tuple;
+}
+#endif
+
+#if NDARRAY_HAS_TOBYTES
+mp_obj_t ndarray_tobytes(mp_obj_t self_in) {
+    // As opposed to numpy, this function returns a bytearray object with the data pointer (i.e., not a copy)
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    // Piping into a bytearray makes sense for dense arrays only,
+    // so bail out, if that is not the case
+    if(!ndarray_is_dense(self)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("tobytes can be invoked for dense arrays only"));
+    }
+    return mp_obj_new_bytearray_by_ref(self->itemsize * self->len, self->array);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_tobytes_obj, ndarray_tobytes);
+#endif
+
+#if NDARRAY_HAS_TOLIST
+static mp_obj_t ndarray_recursive_list(ndarray_obj_t *self, uint8_t *array, uint8_t dim) {
+    int32_t stride = self->strides[ULAB_MAX_DIMS - dim];
+    size_t len = self->shape[ULAB_MAX_DIMS - dim];
+
+    mp_obj_list_t *list = MP_OBJ_TO_PTR(mp_obj_new_list(len, NULL));
+    for(size_t i = 0; i < len; i++) {
+        if(dim == 1) {
+            list->items[i] = ndarray_get_item(self, array);
+        } else {
+            list->items[i] = ndarray_recursive_list(self, array, dim-1);
+        }
+        array += stride;
+    }
+    return MP_OBJ_FROM_PTR(list);
+}
+
+mp_obj_t ndarray_tolist(mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    uint8_t *array = (uint8_t *)self->array;
+    return ndarray_recursive_list(self, array, self->ndim);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_tolist_obj, ndarray_tolist);
+#endif
+
+// Binary operations
+ndarray_obj_t *ndarray_from_mp_obj(mp_obj_t obj, uint8_t other_type) {
+    // creates an ndarray from a micropython int or float
+    // if the input is an ndarray, it is returned
+    // if other_type is 0, return the smallest type that can accommodate the object
+    ndarray_obj_t *ndarray;
+
+    if(mp_obj_is_int(obj)) {
+        int32_t ivalue = mp_obj_get_int(obj);
+        if((ivalue < -32767) || (ivalue > 32767)) {
+            // the integer value clearly does not fit the ulab integer types, so move on to float
+            ndarray = ndarray_new_linear_array(1, NDARRAY_FLOAT);
+            mp_float_t *array = (mp_float_t *)ndarray->array;
+            array[0] = (mp_float_t)ivalue;
+        } else {
+            uint8_t dtype;
+            if(ivalue < 0) {
+                if(ivalue > -128) {
+                    dtype = NDARRAY_INT8;
+                } else {
+                    dtype = NDARRAY_INT16;
+                }
+            } else { // ivalue >= 0
+                if((other_type == NDARRAY_INT8) || (other_type == NDARRAY_INT16)) {
+                    if(ivalue < 128) {
+                        dtype = NDARRAY_INT8;
+                    } else {
+                        dtype = NDARRAY_INT16;
+                    }
+                } else { // other_type = 0 is also included here
+                    if(ivalue < 256) {
+                        dtype = NDARRAY_UINT8;
+                    } else {
+                        dtype = NDARRAY_UINT16;
+                    }
+                }
+            }
+            ndarray = ndarray_new_linear_array(1, dtype);
+            ndarray_set_value(dtype, ndarray->array, 0, obj);
+        }
+    } else if(mp_obj_is_float(obj)) {
+        ndarray = ndarray_new_linear_array(1, NDARRAY_FLOAT);
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        array[0] = mp_obj_get_float(obj);
+    } else if(mp_obj_is_bool(obj)) {
+        ndarray = ndarray_new_linear_array(1, NDARRAY_BOOL);
+        uint8_t *array = (uint8_t *)ndarray->array;
+        if(obj == mp_const_true) {
+            *array = 1;
+        }
+    } else if(mp_obj_is_type(obj, &ulab_ndarray_type)){
+        return MP_OBJ_TO_PTR(obj);
+    }
+    #if ULAB_SUPPORTS_COMPLEX
+    else if(mp_obj_is_type(obj, &mp_type_complex)) {
+        ndarray = ndarray_new_linear_array(1, NDARRAY_COMPLEX);
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        mp_obj_get_complex(obj, &array[0], &array[1]);
+    }
+    #endif
+    else {
+        // assume that the input is an iterable (raises an exception, if it is not the case)
+        ndarray = ndarray_from_iterable(obj, NDARRAY_FLOAT);
+    }
+    return ndarray;
+}
+
+#if NDARRAY_HAS_BINARY_OPS || NDARRAY_HAS_INPLACE_OPS
+mp_obj_t ndarray_binary_op(mp_binary_op_t _op, mp_obj_t lobj, mp_obj_t robj) {
+    // TODO: implement in-place operators
+    // if the ndarray stands on the right hand side of the expression, simply swap the operands
+    ndarray_obj_t *lhs, *rhs;
+    mp_binary_op_t op = _op;
+    if((op == MP_BINARY_OP_REVERSE_ADD) || (op == MP_BINARY_OP_REVERSE_MULTIPLY) ||
+        (op == MP_BINARY_OP_REVERSE_POWER) || (op == MP_BINARY_OP_REVERSE_SUBTRACT) ||
+        (op == MP_BINARY_OP_REVERSE_TRUE_DIVIDE)) {
+        lhs = ndarray_from_mp_obj(robj, 0);
+        rhs = ndarray_from_mp_obj(lobj, lhs->dtype);
+    } else {
+        lhs = ndarray_from_mp_obj(lobj, 0);
+        rhs = ndarray_from_mp_obj(robj, lhs->dtype);
+    }
+    if(op == MP_BINARY_OP_REVERSE_ADD) {
+        op = MP_BINARY_OP_ADD;
+    } else if(op == MP_BINARY_OP_REVERSE_MULTIPLY) {
+        op = MP_BINARY_OP_MULTIPLY;
+    } else if(op == MP_BINARY_OP_REVERSE_POWER) {
+        op = MP_BINARY_OP_POWER;
+    } else if(op == MP_BINARY_OP_REVERSE_SUBTRACT) {
+        op = MP_BINARY_OP_SUBTRACT;
+    } else if(op == MP_BINARY_OP_REVERSE_TRUE_DIVIDE) {
+        op = MP_BINARY_OP_TRUE_DIVIDE;
+    }
+
+    uint8_t ndim = 0;
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    int32_t *lstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    int32_t *rstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    uint8_t broadcastable;
+    if((op == MP_BINARY_OP_INPLACE_ADD) || (op == MP_BINARY_OP_INPLACE_MULTIPLY) || (op == MP_BINARY_OP_INPLACE_POWER) ||
+        (op == MP_BINARY_OP_INPLACE_SUBTRACT) || (op == MP_BINARY_OP_INPLACE_TRUE_DIVIDE)) {
+        broadcastable = ndarray_can_broadcast_inplace(lhs, rhs, rstrides);
+    } else {
+        broadcastable = ndarray_can_broadcast(lhs, rhs, &ndim, shape, lstrides, rstrides);
+    }
+    if(!broadcastable) {
+        mp_raise_ValueError(MP_ERROR_TEXT("operands could not be broadcast together"));
+        m_del(size_t, shape, ULAB_MAX_DIMS);
+        m_del(int32_t, lstrides, ULAB_MAX_DIMS);
+        m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+    }
+    // the empty arrays have to be treated separately
+    uint8_t dtype = NDARRAY_INT16;
+    ndarray_obj_t *nd;
+    if((lhs->len == 0) || (rhs->len == 0)) {
+        switch(op) {
+            case MP_BINARY_OP_INPLACE_ADD:
+            case MP_BINARY_OP_INPLACE_MULTIPLY:
+            case MP_BINARY_OP_INPLACE_SUBTRACT:
+            case MP_BINARY_OP_ADD:
+            case MP_BINARY_OP_MULTIPLY:
+            case MP_BINARY_OP_SUBTRACT:
+                // here we don't have to list those cases that result in an int16,
+                // because dtype is initialised with that NDARRAY_INT16
+                if(lhs->dtype == rhs->dtype) {
+                    dtype = rhs->dtype;
+                } else if((lhs->dtype == NDARRAY_FLOAT) || (rhs->dtype == NDARRAY_FLOAT)) {
+                    dtype = NDARRAY_FLOAT;
+                } else if(((lhs->dtype == NDARRAY_UINT8) && (rhs->dtype == NDARRAY_UINT16)) ||
+                            ((lhs->dtype == NDARRAY_INT8) && (rhs->dtype == NDARRAY_UINT16)) ||
+                            ((rhs->dtype == NDARRAY_UINT8) && (lhs->dtype == NDARRAY_UINT16)) ||
+                            ((rhs->dtype == NDARRAY_INT8) && (lhs->dtype == NDARRAY_UINT16))) {
+                    dtype = NDARRAY_UINT16;
+                }
+                return MP_OBJ_FROM_PTR(ndarray_new_linear_array(0, dtype));
+                break;
+
+            case MP_BINARY_OP_INPLACE_POWER:
+            case MP_BINARY_OP_INPLACE_TRUE_DIVIDE:
+            case MP_BINARY_OP_POWER:
+            case MP_BINARY_OP_TRUE_DIVIDE:
+                return MP_OBJ_FROM_PTR(ndarray_new_linear_array(0, NDARRAY_FLOAT));
+                break;
+
+            case MP_BINARY_OP_LESS:
+            case MP_BINARY_OP_LESS_EQUAL:
+            case MP_BINARY_OP_MORE:
+            case MP_BINARY_OP_MORE_EQUAL:
+            case MP_BINARY_OP_EQUAL:
+            case MP_BINARY_OP_NOT_EQUAL:
+                nd = ndarray_new_linear_array(0, NDARRAY_UINT8);
+                nd->boolean = 1;
+                return MP_OBJ_FROM_PTR(nd);
+
+            default:
+                return mp_const_none;
+                break;
+        }
+    }
+
+    switch(op) {
+        // first the in-place operators
+        #if NDARRAY_HAS_INPLACE_ADD
+        case MP_BINARY_OP_INPLACE_ADD:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_inplace_ams(lhs, rhs, rstrides, op);
+            break;
+        #endif
+        #if NDARRAY_HAS_INPLACE_MULTIPLY
+        case MP_BINARY_OP_INPLACE_MULTIPLY:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_inplace_ams(lhs, rhs, rstrides, op);
+            break;
+        #endif
+        #if NDARRAY_HAS_INPLACE_POWER
+        case MP_BINARY_OP_INPLACE_POWER:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_inplace_power(lhs, rhs, rstrides);
+            break;
+        #endif
+        #if NDARRAY_HAS_INPLACE_SUBTRACT
+        case MP_BINARY_OP_INPLACE_SUBTRACT:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_inplace_ams(lhs, rhs, rstrides, op);
+            break;
+        #endif
+        #if NDARRAY_HAS_INPLACE_TRUE_DIVIDE
+        case MP_BINARY_OP_INPLACE_TRUE_DIVIDE:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_inplace_divide(lhs, rhs, rstrides);
+            break;
+        #endif
+        // end if in-place operators
+
+        #if NDARRAY_HAS_BINARY_OP_LESS
+        case MP_BINARY_OP_LESS:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            // here we simply swap the operands
+            return ndarray_binary_more(rhs, lhs, ndim, shape, rstrides, lstrides, MP_BINARY_OP_MORE);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_LESS_EQUAL
+        case MP_BINARY_OP_LESS_EQUAL:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            // here we simply swap the operands
+            return ndarray_binary_more(rhs, lhs, ndim, shape, rstrides, lstrides, MP_BINARY_OP_MORE_EQUAL);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_EQUAL
+        case MP_BINARY_OP_EQUAL:
+            return ndarray_binary_equality(lhs, rhs, ndim, shape, lstrides, rstrides, MP_BINARY_OP_EQUAL);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_NOT_EQUAL
+        case MP_BINARY_OP_NOT_EQUAL:
+            return ndarray_binary_equality(lhs, rhs, ndim, shape, lstrides, rstrides, MP_BINARY_OP_NOT_EQUAL);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_ADD
+        case MP_BINARY_OP_ADD:
+            return ndarray_binary_add(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_MULTIPLY
+        case MP_BINARY_OP_MULTIPLY:
+            return ndarray_binary_multiply(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_MORE
+        case MP_BINARY_OP_MORE:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_binary_more(lhs, rhs, ndim, shape, lstrides, rstrides, MP_BINARY_OP_MORE);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_MORE_EQUAL
+        case MP_BINARY_OP_MORE_EQUAL:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_binary_more(lhs, rhs, ndim, shape, lstrides, rstrides, MP_BINARY_OP_MORE_EQUAL);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_SUBTRACT
+        case MP_BINARY_OP_SUBTRACT:
+            return ndarray_binary_subtract(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE
+        case MP_BINARY_OP_TRUE_DIVIDE:
+            return ndarray_binary_true_divide(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_POWER
+        case MP_BINARY_OP_POWER:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_binary_power(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_OR | NDARRAY_HAS_BINARY_OP_XOR | NDARRAY_HAS_BINARY_OP_AND
+        case MP_BINARY_OP_OR:
+        case MP_BINARY_OP_XOR:
+        case MP_BINARY_OP_AND:
+            return ndarray_binary_logical(lhs, rhs, ndim, shape, lstrides, rstrides, op);
+        #endif
+        #if NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE
+        case MP_BINARY_OP_FLOOR_DIVIDE:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_binary_floor_divide(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        default:
+            return MP_OBJ_NULL; // op not supported
+            break;
+    }
+    return MP_OBJ_NULL;
+}
+#endif /* NDARRAY_HAS_BINARY_OPS || NDARRAY_HAS_INPLACE_OPS */
+
+#if NDARRAY_HAS_UNARY_OPS
+mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    ndarray_obj_t *ndarray = NULL;
+
+    switch (op) {
+        #if NDARRAY_HAS_UNARY_OP_ABS
+        case MP_UNARY_OP_ABS:
+            #if ULAB_SUPPORTS_COMPLEX
+            if(self->dtype == NDARRAY_COMPLEX) {
+                int32_t *strides = strides_from_shape(self->shape, NDARRAY_FLOAT);
+                ndarray_obj_t *target = ndarray_new_ndarray(self->ndim, self->shape, strides, NDARRAY_FLOAT, NULL);
+                ndarray = MP_OBJ_TO_PTR(carray_abs(self, target));
+            } else {
+            #endif
+                ndarray = ndarray_copy_view(self);
+                // if Boolean, NDARRAY_UINT8, or NDARRAY_UINT16, there is nothing to do
+                if(self->dtype == NDARRAY_INT8) {
+                    int8_t *array = (int8_t *)ndarray->array;
+                    for(size_t i=0; i < self->len; i++, array++) {
+                        if(*array < 0) *array = -(*array);
+                    }
+                } else if(self->dtype == NDARRAY_INT16) {
+                    int16_t *array = (int16_t *)ndarray->array;
+                    for(size_t i=0; i < self->len; i++, array++) {
+                        if(*array < 0) *array = -(*array);
+                    }
+                } else {
+                    mp_float_t *array = (mp_float_t *)ndarray->array;
+                    for(size_t i=0; i < self->len; i++, array++) {
+                        if(*array < 0) *array = -(*array);
+                    }
+                }
+            #if ULAB_SUPPORTS_COMPLEX
+            }
+            #endif
+            return MP_OBJ_FROM_PTR(ndarray);
+            break;
+        #endif
+        #if NDARRAY_HAS_UNARY_OP_INVERT
+        case MP_UNARY_OP_INVERT:
+            #if ULAB_SUPPORTS_COMPLEX
+            if(self->dtype == NDARRAY_FLOAT || self->dtype == NDARRAY_COMPLEX) {
+            #else
+            if(self->dtype == NDARRAY_FLOAT) {
+            #endif
+                mp_raise_ValueError(MP_ERROR_TEXT("operation is not supported for given type"));
+            }
+            // we can invert the content byte by byte, no need to distinguish between different dtypes
+            ndarray = ndarray_copy_view(self); // from this point, this is a dense copy
+            uint8_t *array = (uint8_t *)ndarray->array;
+            if(ndarray->boolean) {
+                for(size_t i=0; i < ndarray->len; i++, array++) *array = *array ^ 0x01;
+            } else {
+                uint8_t itemsize = ulab_binary_get_size(self->dtype);
+                for(size_t i=0; i < ndarray->len*itemsize; i++, array++) *array ^= 0xFF;
+            }
+            return MP_OBJ_FROM_PTR(ndarray);
+            break;
+        #endif
+        #if NDARRAY_HAS_UNARY_OP_LEN
+        case MP_UNARY_OP_LEN:
+            return mp_obj_new_int(self->shape[ULAB_MAX_DIMS - self->ndim]);
+            break;
+        #endif
+        #if NDARRAY_HAS_UNARY_OP_NEGATIVE
+        case MP_UNARY_OP_NEGATIVE:
+            ndarray = ndarray_copy_view(self); // from this point, this is a dense copy
+            if(self->dtype == NDARRAY_UINT8) {
+                uint8_t *array = (uint8_t *)ndarray->array;
+                for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
+            } else if(self->dtype == NDARRAY_INT8) {
+                int8_t *array = (int8_t *)ndarray->array;
+                for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
+            } else if(self->dtype == NDARRAY_UINT16) {
+                uint16_t *array = (uint16_t *)ndarray->array;
+                for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
+            } else if(self->dtype == NDARRAY_INT16) {
+                int16_t *array = (int16_t *)ndarray->array;
+                for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
+            } else {
+                mp_float_t *array = (mp_float_t *)ndarray->array;
+                size_t len = self->len;
+                #if ULAB_SUPPORTS_COMPLEX
+                if(self->dtype == NDARRAY_COMPLEX) {
+                    len *= 2;
+                }
+                #endif
+                for(size_t i=0; i < len; i++, array++) *array = -(*array);
+            }
+            return MP_OBJ_FROM_PTR(ndarray);
+            break;
+        #endif
+        #if NDARRAY_HAS_UNARY_OP_POSITIVE
+        case MP_UNARY_OP_POSITIVE:
+            return MP_OBJ_FROM_PTR(ndarray_copy_view(self));
+        #endif
+
+        default:
+            return MP_OBJ_NULL; // operator not supported
+            break;
+    }
+}
+#endif /* NDARRAY_HAS_UNARY_OPS */
+
+#if NDARRAY_HAS_TRANSPOSE
+mp_obj_t ndarray_transpose(mp_obj_t self_in) {
+    #if ULAB_MAX_DIMS == 1
+        return self_in;
+    #endif
+    // TODO: check, what happens to the offset here, if we have a view
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    if(self->ndim == 1) {
+        return self_in;
+    }
+    size_t *shape = m_new(size_t, self->ndim);
+    int32_t *strides = m_new(int32_t, self->ndim);
+    for(uint8_t i=0; i < self->ndim; i++) {
+        shape[ULAB_MAX_DIMS - 1 - i] = self->shape[ULAB_MAX_DIMS - self->ndim + i];
+        strides[ULAB_MAX_DIMS - 1 - i] = self->strides[ULAB_MAX_DIMS - self->ndim + i];
+    }
+    // TODO: I am not sure ndarray_new_view is OK here...
+    // should be deep copy...
+    ndarray_obj_t *ndarray = ndarray_new_view(self, self->ndim, shape, strides, 0);
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_transpose_obj, ndarray_transpose);
+#endif /* NDARRAY_HAS_TRANSPOSE */
+
+#if ULAB_MAX_DIMS > 1
+#if NDARRAY_HAS_RESHAPE
+mp_obj_t ndarray_reshape_core(mp_obj_t oin, mp_obj_t _shape, bool inplace) {
+    ndarray_obj_t *source = MP_OBJ_TO_PTR(oin);
+    if(!mp_obj_is_type(_shape, &mp_type_tuple) && !mp_obj_is_int(_shape)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("shape must be integer or tuple of integers"));
+    }
+
+    mp_obj_tuple_t *shape;
+
+    if(mp_obj_is_int(_shape)) {
+        mp_obj_t *items = m_new(mp_obj_t, 1);
+        items[0] = _shape;
+        shape = mp_obj_new_tuple(1, items);
+    } else { // at this point it's certain that _shape is a tuple
+        shape = MP_OBJ_TO_PTR(_shape);
+    }
+
+    if(shape->len > ULAB_MAX_DIMS) {
+        mp_raise_ValueError(MP_ERROR_TEXT("maximum number of dimensions is " MP_STRINGIFY(ULAB_MAX_DIMS)));
+    }
+
+    size_t new_length = 1;
+    size_t *new_shape = m_new0(size_t, ULAB_MAX_DIMS);
+    uint8_t unknown_dim = 0;
+    uint8_t unknown_index = 0;
+
+    for(uint8_t i = 0; i < shape->len; i++) {
+        int32_t ax_len = mp_obj_get_int(shape->items[shape->len - i - 1]);
+        if(ax_len >= 0) {
+            new_shape[ULAB_MAX_DIMS - i - 1] = (size_t)ax_len;
+            new_length *= new_shape[ULAB_MAX_DIMS - i - 1];
+        } else {
+            unknown_dim++;
+            unknown_index = ULAB_MAX_DIMS - i - 1;
+        }
+    }
+
+    if(unknown_dim > 1) {
+        mp_raise_ValueError(MP_ERROR_TEXT("can only specify one unknown dimension"));
+    } else if(unknown_dim == 1) {
+        new_shape[unknown_index] = source->len / new_length;
+        new_length = source->len;
+    }
+
+    if(source->len != new_length) {
+        mp_raise_ValueError(MP_ERROR_TEXT("cannot reshape array"));
+    }
+
+    ndarray_obj_t *ndarray;
+    if(ndarray_is_dense(source)) {
+        int32_t *new_strides = strides_from_shape(new_shape, source->dtype);
+        if(inplace) {
+            for(uint8_t i = 0; i < ULAB_MAX_DIMS; i++) {
+                source->shape[i] = new_shape[i];
+                source->strides[i] = new_strides[i];
+            }
+            return MP_OBJ_FROM_PTR(oin);
+        } else {
+            ndarray = ndarray_new_view(source, shape->len, new_shape, new_strides, 0);
+        }
+    } else {
+        if(inplace) {
+            mp_raise_ValueError(MP_ERROR_TEXT("cannot assign new shape"));
+        }
+        ndarray = ndarray_new_dense_ndarray(shape->len, new_shape, source->dtype);
+        ndarray_copy_array(source, ndarray, 0);
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+mp_obj_t ndarray_reshape(mp_obj_t oin, mp_obj_t _shape) {
+    return ndarray_reshape_core(oin, _shape, 0);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(ndarray_reshape_obj, ndarray_reshape);
+#endif /* NDARRAY_HAS_RESHAPE */
+#endif /* ULAB_MAX_DIMS > 1 */
+
+#if ULAB_NUMPY_HAS_NDINFO
+mp_obj_t ndarray_info(mp_obj_t obj_in) {
+    if(!mp_obj_is_type(obj_in, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("function is defined for ndarrays only"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(obj_in);
+    mp_printf(MP_PYTHON_PRINTER, "class: ndarray\n");
+    mp_printf(MP_PYTHON_PRINTER, "shape: (");
+    if(ndarray->ndim == 1) {
+        mp_printf(MP_PYTHON_PRINTER, "%d,", ndarray->shape[ULAB_MAX_DIMS-1]);
+    } else {
+        for(uint8_t i=0; i < ndarray->ndim-1; i++) mp_printf(MP_PYTHON_PRINTER, "%d, ", ndarray->shape[i]);
+        mp_printf(MP_PYTHON_PRINTER, "%d", ndarray->shape[ULAB_MAX_DIMS-1]);
+    }
+    mp_printf(MP_PYTHON_PRINTER, ")\n");
+    mp_printf(MP_PYTHON_PRINTER, "strides: (");
+    if(ndarray->ndim == 1) {
+        mp_printf(MP_PYTHON_PRINTER, "%d,", ndarray->strides[ULAB_MAX_DIMS-1]);
+    } else {
+        for(uint8_t i=0; i < ndarray->ndim-1; i++) mp_printf(MP_PYTHON_PRINTER, "%d, ", ndarray->strides[i]);
+        mp_printf(MP_PYTHON_PRINTER, "%d", ndarray->strides[ULAB_MAX_DIMS-1]);
+    }
+    mp_printf(MP_PYTHON_PRINTER, ")\n");
+    mp_printf(MP_PYTHON_PRINTER, "itemsize: %d\n", ndarray->itemsize);
+    mp_printf(MP_PYTHON_PRINTER, "data pointer: 0x%p\n", ndarray->array);
+    mp_printf(MP_PYTHON_PRINTER, "type: ");
+    if(ndarray->boolean) {
+        mp_printf(MP_PYTHON_PRINTER, "bool\n");
+    } else if(ndarray->dtype == NDARRAY_UINT8) {
+        mp_printf(MP_PYTHON_PRINTER, "uint8\n");
+    } else if(ndarray->dtype == NDARRAY_INT8) {
+        mp_printf(MP_PYTHON_PRINTER, "int8\n");
+    } else if(ndarray->dtype == NDARRAY_UINT16) {
+        mp_printf(MP_PYTHON_PRINTER, "uint16\n");
+    } else if(ndarray->dtype == NDARRAY_INT16) {
+        mp_printf(MP_PYTHON_PRINTER, "int16\n");
+    } else if(ndarray->dtype == NDARRAY_FLOAT) {
+        mp_printf(MP_PYTHON_PRINTER, "float\n");
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_info_obj, ndarray_info);
+#endif
+
+// (the get_buffer protocol returns 0 for success, 1 for failure)
+mp_int_t ndarray_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags) {
+    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    if(!ndarray_is_dense(self)) {
+        return 1;
+    }
+    bufinfo->len = self->itemsize * self->len;
+    bufinfo->buf = self->array;
+    bufinfo->typecode = self->dtype;
+    return 0;
+}
diff --git a/tulip/shared/ulab/code/ndarray.h b/tulip/shared/ulab/code/ndarray.h
new file mode 100644
index 000000000..3e82b3856
--- /dev/null
+++ b/tulip/shared/ulab/code/ndarray.h
@@ -0,0 +1,712 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *               2020 Jeff Epler for Adafruit Industries
+*/
+
+#ifndef _NDARRAY_
+#define _NDARRAY_
+
+#include "py/objarray.h"
+#include "py/binary.h"
+#include "py/objstr.h"
+#include "py/objlist.h"
+
+#include "ulab.h"
+
+#ifndef MP_PI
+#define MP_PI MICROPY_FLOAT_CONST(3.14159265358979323846)
+#endif
+#ifndef MP_E
+#define MP_E MICROPY_FLOAT_CONST(2.71828182845904523536)
+#endif
+
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define FLOAT_TYPECODE 'f'
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define FLOAT_TYPECODE 'd'
+#endif
+
+#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B
+
+// For object representations A and B a Python float object is allocated as a
+// concrete object in a struct, with the first entry pointing to &mp_type_float.
+// Constant float objects are a struct in ROM and are referenced via their pointer.
+
+// Use ULAB_DEFINE_FLOAT_CONST to define a constant float object.
+// id is the name of the constant, num is its floating point value.
+// hex32 is computed as: hex(int.from_bytes(array.array('f', [num]), 'little'))
+// hex64 is computed as: hex(int.from_bytes(array.array('d', [num]), 'little'))
+
+// Use ULAB_REFERENCE_FLOAT_CONST to reference a constant float object in code.
+
+#define ULAB_DEFINE_FLOAT_CONST(id, num, hex32, hex64) \
+    const mp_obj_float_t id##_obj = {{&mp_type_float}, (num)}
+
+#define ULAB_REFERENCE_FLOAT_CONST(id) MP_ROM_PTR(&id##_obj)
+
+// this typedef is lifted from objfloat.c, because mp_obj_float_t is not exposed
+typedef struct _mp_obj_float_t {
+    mp_obj_base_t base;
+    mp_float_t value;
+} mp_obj_float_t;
+
+#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
+
+// For object representation C a Python float object is stored directly in the
+// mp_obj_t value.
+
+// See above for how to use ULAB_DEFINE_FLOAT_CONST and ULAB_REFERENCE_FLOAT_CONST.
+
+#define ULAB_DEFINE_FLOAT_CONST(id, num, hex32, hex64) \
+    enum { \
+        id = (((((uint32_t)hex32) & ~3) | 2) + 0x80800000) \
+    }
+
+#define ULAB_REFERENCE_FLOAT_CONST(id) ((mp_obj_t)(id))
+
+#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
+
+// For object representation D (nan-boxing) a Python float object is stored
+// directly in the mp_obj_t value.
+
+// See above for how to use ULAB_DEFINE_FLOAT_CONST and ULAB_REFERENCE_FLOAT_CONST.
+
+#define ULAB_DEFINE_FLOAT_CONST(id, num, hex32, hex64) \
+    const uint64_t id = (((uint64_t)hex64) + 0x8004000000000000ULL)
+
+#define ULAB_REFERENCE_FLOAT_CONST(id) {id}
+
+#endif
+
+#if defined(MICROPY_VERSION_MAJOR) && MICROPY_VERSION_MAJOR == 1 && MICROPY_VERSION_MINOR == 11
+typedef struct _mp_obj_slice_t {
+    mp_obj_base_t base;
+    mp_obj_t start;
+    mp_obj_t stop;
+    mp_obj_t step;
+} mp_obj_slice_t;
+#define MP_ERROR_TEXT(x) x
+#endif
+
+#if !defined(MP_OBJ_TYPE_GET_SLOT)
+#if defined(MP_TYPE_FLAG_EXTENDED)
+// Provide MP_OBJ_TYPE_{HAS,GET}_SLOT for CircuitPython.
+#define MP_OBJ_TYPE_HAS_SLOT(t, f) (mp_type_get_##f##_slot(t) != NULL)
+#define MP_OBJ_TYPE_GET_SLOT(t, f) mp_type_get_##f##_slot(t)
+#else
+// Provide MP_OBJ_TYPE_{HAS,GET}_SLOT for older revisions of MicroPython.
+#define MP_OBJ_TYPE_HAS_SLOT(t, f) ((t)->f != NULL)
+#define MP_OBJ_TYPE_GET_SLOT(t, f) (t)->f
+
+// Also allow CiruitPython-style mp_obj_type_t definitions.
+#define MP_TYPE_FLAG_EXTENDED (0)
+#define MP_TYPE_EXTENDED_FIELDS(...) __VA_ARGS__
+#endif
+#endif
+
+#define ndarray_set_value(a, b, c, d) mp_binary_set_val_array(a, b, c, d)
+void ndarray_set_complex_value(void *, size_t , mp_obj_t );
+
+#define NDARRAY_NUMERIC   0
+#define NDARRAY_BOOLEAN   1
+
+#define NDARRAY_NDARRAY_TYPE    1
+#define NDARRAY_ITERABLE_TYPE   2
+
+extern const mp_obj_type_t ulab_ndarray_type;
+
+enum NDARRAY_TYPE {
+    NDARRAY_BOOL = '?', // this must never be assigned to the dtype!
+    NDARRAY_UINT8 = 'B',
+    NDARRAY_INT8 = 'b',
+    NDARRAY_UINT16 = 'H',
+    NDARRAY_INT16 = 'h',
+    #if ULAB_SUPPORTS_COMPLEX
+        NDARRAY_COMPLEX = 'c',
+    #endif
+    NDARRAY_FLOAT = FLOAT_TYPECODE,
+};
+
+typedef struct _ndarray_obj_t {
+    mp_obj_base_t base;
+    uint8_t dtype;
+    uint8_t itemsize;
+    uint8_t boolean;
+    uint8_t ndim;
+    size_t len;
+    size_t shape[ULAB_MAX_DIMS];
+    int32_t strides[ULAB_MAX_DIMS];
+    void *array;
+    void *origin;
+} ndarray_obj_t;
+
+#if ULAB_HAS_DTYPE_OBJECT
+extern const mp_obj_type_t ulab_dtype_type;
+
+typedef struct _dtype_obj_t {
+    mp_obj_base_t base;
+    uint8_t dtype;
+} dtype_obj_t;
+
+void ndarray_dtype_print(const mp_print_t *, mp_obj_t , mp_print_kind_t );
+
+mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *, size_t , size_t , const mp_obj_t *);
+#endif /* ULAB_HAS_DTYPE_OBJECT */
+
+extern const mp_obj_type_t ndarray_flatiter_type;
+
+mp_obj_t ndarray_new_ndarray_iterator(mp_obj_t , mp_obj_iter_buf_t *);
+
+mp_obj_t ndarray_get_item(ndarray_obj_t *, void *);
+mp_float_t ndarray_get_float_value(void *, uint8_t );
+mp_float_t ndarray_get_float_index(void *, uint8_t , size_t );
+bool ndarray_object_is_array_like(mp_obj_t );
+void fill_array_iterable(mp_float_t *, mp_obj_t );
+size_t *ndarray_shape_vector(size_t , size_t , size_t , size_t );
+
+void ndarray_print(const mp_print_t *, mp_obj_t , mp_print_kind_t );
+
+#if ULAB_HAS_PRINTOPTIONS
+mp_obj_t ndarray_set_printoptions(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(ndarray_set_printoptions_obj);
+
+mp_obj_t ndarray_get_printoptions(void);
+MP_DECLARE_CONST_FUN_OBJ_0(ndarray_get_printoptions_obj);
+#endif
+
+void ndarray_assign_elements(ndarray_obj_t *, mp_obj_t , uint8_t , size_t *);
+size_t *ndarray_contract_shape(ndarray_obj_t *, uint8_t );
+int32_t *ndarray_contract_strides(ndarray_obj_t *, uint8_t );
+
+ndarray_obj_t *ndarray_from_iterable(mp_obj_t , uint8_t );
+ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t , size_t *, uint8_t );
+ndarray_obj_t *ndarray_new_ndarray_from_tuple(mp_obj_tuple_t *, uint8_t );
+ndarray_obj_t *ndarray_new_ndarray(uint8_t , size_t *, int32_t *, uint8_t , uint8_t *);
+ndarray_obj_t *ndarray_new_linear_array(size_t , uint8_t );
+ndarray_obj_t *ndarray_new_view(ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t );
+bool ndarray_is_dense(ndarray_obj_t *);
+ndarray_obj_t *ndarray_copy_view(ndarray_obj_t *);
+ndarray_obj_t *ndarray_copy_view_convert_type(ndarray_obj_t *, uint8_t );
+void ndarray_copy_array(ndarray_obj_t *, ndarray_obj_t *, uint8_t );
+
+MP_DECLARE_CONST_FUN_OBJ_KW(ndarray_array_constructor_obj);
+mp_obj_t ndarray_make_new(const mp_obj_type_t *, size_t , size_t , const mp_obj_t *);
+mp_obj_t ndarray_subscr(mp_obj_t , mp_obj_t , mp_obj_t );
+mp_obj_t ndarray_getiter(mp_obj_t , mp_obj_iter_buf_t *);
+bool ndarray_can_broadcast(ndarray_obj_t *, ndarray_obj_t *, uint8_t *, size_t *, int32_t *, int32_t *);
+bool ndarray_can_broadcast_inplace(ndarray_obj_t *, ndarray_obj_t *, int32_t *);
+mp_obj_t ndarray_binary_op(mp_binary_op_t , mp_obj_t , mp_obj_t );
+mp_obj_t ndarray_unary_op(mp_unary_op_t , mp_obj_t );
+
+size_t *ndarray_new_coords(uint8_t );
+void ndarray_rewind_array(uint8_t , uint8_t *, size_t *, int32_t *, size_t *);
+
+// various ndarray methods
+#if NDARRAY_HAS_BYTESWAP
+mp_obj_t ndarray_byteswap(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(ndarray_byteswap_obj);
+#endif
+
+#if NDARRAY_HAS_COPY
+mp_obj_t ndarray_copy(mp_obj_t );
+MP_DECLARE_CONST_FUN_OBJ_1(ndarray_copy_obj);
+#endif
+
+#if NDARRAY_HAS_FLATTEN
+mp_obj_t ndarray_flatten(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(ndarray_flatten_obj);
+#endif
+
+#if NDARRAY_HAS_DTYPE
+mp_obj_t ndarray_dtype(mp_obj_t );
+#endif
+
+#if NDARRAY_HAS_ITEMSIZE
+mp_obj_t ndarray_itemsize(mp_obj_t );
+#endif
+
+#if NDARRAY_HAS_SIZE
+mp_obj_t ndarray_size(mp_obj_t );
+#endif
+
+#if NDARRAY_HAS_SHAPE
+mp_obj_t ndarray_shape(mp_obj_t );
+#endif
+
+#if NDARRAY_HAS_STRIDES
+mp_obj_t ndarray_strides(mp_obj_t );
+#endif
+
+#if NDARRAY_HAS_RESHAPE
+mp_obj_t ndarray_reshape_core(mp_obj_t , mp_obj_t , bool );
+mp_obj_t ndarray_reshape(mp_obj_t , mp_obj_t );
+MP_DECLARE_CONST_FUN_OBJ_2(ndarray_reshape_obj);
+#endif
+
+#if NDARRAY_HAS_TOBYTES
+mp_obj_t ndarray_tobytes(mp_obj_t );
+MP_DECLARE_CONST_FUN_OBJ_1(ndarray_tobytes_obj);
+#endif
+
+#if NDARRAY_HAS_TOLIST
+mp_obj_t ndarray_tolist(mp_obj_t );
+MP_DECLARE_CONST_FUN_OBJ_1(ndarray_tolist_obj);
+#endif
+
+#if NDARRAY_HAS_TRANSPOSE
+mp_obj_t ndarray_transpose(mp_obj_t );
+MP_DECLARE_CONST_FUN_OBJ_1(ndarray_transpose_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_NDINFO
+mp_obj_t ndarray_info(mp_obj_t );
+MP_DECLARE_CONST_FUN_OBJ_1(ndarray_info_obj);
+#endif
+
+mp_int_t ndarray_get_buffer(mp_obj_t , mp_buffer_info_t *, mp_uint_t );
+//void ndarray_attributes(mp_obj_t , qstr , mp_obj_t *);
+
+ndarray_obj_t *ndarray_from_mp_obj(mp_obj_t , uint8_t );
+
+
+#define BOOLEAN_ASSIGNMENT_LOOP(type_left, type_right, ndarray, lstrides, iarray, istride, varray, vstride)\
+    type_left *array = (type_left *)(ndarray)->array;\
+    for(size_t i=0; i < (ndarray)->len; i++) {\
+        if(*(iarray)) {\
+            *array = (type_left)(*((type_right *)(varray)));\
+            (varray) += (vstride);\
+        }\
+        array += (lstrides);\
+        (iarray) += (istride);\
+    } while(0)
+
+#if ULAB_HAS_FUNCTION_ITERATOR
+#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    type_out *array = (type_out *)(results)->array;\
+    size_t *lcoords = ndarray_new_coords((results)->ndim);\
+    size_t *rcoords = ndarray_new_coords((results)->ndim);\
+    for(size_t i=0; i < (results)->len/(results)->shape[ULAB_MAX_DIMS -1]; i++) {\
+        size_t l = 0;\
+        do {\
+            *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        ndarray_rewind_array((results)->ndim, (larray), (results)->shape, (lstrides), lcoords);\
+        ndarray_rewind_array((results)->ndim, (rarray), (results)->shape, (rstrides), rcoords);\
+    } while(0)
+
+#define INPLACE_LOOP(results, type_left, type_right, larray, rarray, rstrides, OPERATOR)\
+    size_t *lcoords = ndarray_new_coords((results)->ndim);\
+    size_t *rcoords = ndarray_new_coords((results)->ndim);\
+    for(size_t i=0; i < (results)->len/(results)->shape[ULAB_MAX_DIMS -1]; i++) {\
+        size_t l = 0;\
+        do {\
+            *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        ndarray_rewind_array((results)->ndim, (larray), (results)->shape, (results)->strides, lcoords);\
+        ndarray_rewind_array((results)->ndim, (rarray), (results)->shape, (rstrides), rcoords);\
+    } while(0)
+
+#define EQUALITY_LOOP(results, array, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t *lcoords = ndarray_new_coords((results)->ndim);\
+    size_t *rcoords = ndarray_new_coords((results)->ndim);\
+    for(size_t i=0; i < (results)->len/(results)->shape[ULAB_MAX_DIMS -1]; i++) {\
+        size_t l = 0;\
+        do {\
+            *(array)++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? 1 : 0;\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        ndarray_rewind_array((results)->ndim, (larray), (results)->shape, (lstrides), lcoords);\
+        ndarray_rewind_array((results)->ndim, (rarray), (results)->shape, (rstrides), rcoords);\
+    } while(0)
+
+#define POWER_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides)\
+    type_out *array = (type_out *)(results)->array;\
+    size_t *lcoords = ndarray_new_coords((results)->ndim);\
+    size_t *rcoords = ndarray_new_coords((results)->ndim);\
+    for(size_t i=0; i < (results)->len/(results)->shape[ULAB_MAX_DIMS -1]; i++) {\
+        size_t l = 0;\
+        do {\
+            *array++ = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        ndarray_rewind_array((results)->ndim, (larray), (results)->shape, (lstrides), lcoords);\
+        ndarray_rewind_array((results)->ndim, (rarray), (results)->shape, (rstrides), rcoords);\
+    } while(0)
+
+#else
+
+#if ULAB_MAX_DIMS == 1
+#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    type_out *array = (type_out *)results->array;\
+    size_t l = 0;\
+    do {\
+        *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define INPLACE_LOOP(results, type_left, type_right, larray, rarray, rstrides, OPERATOR)\
+    size_t l = 0;\
+    do {\
+        *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define EQUALITY_LOOP(results, array, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t l = 0;\
+    do {\
+        *(array)++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? 1 : 0;\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define POWER_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides)\
+    type_out *array = (type_out *)results->array;\
+    size_t l = 0;\
+    do {\
+        *array++ = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    type_out *array = (type_out *)(results)->array;\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define INPLACE_LOOP(results, type_left, type_right, larray, rarray, rstrides, OPERATOR)\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define EQUALITY_LOOP(results, array, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *(array)++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? 1 : 0;\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define POWER_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides)\
+    type_out *array = (type_out *)(results)->array;\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *array++ = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    type_out *array = (type_out *)results->array;\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define INPLACE_LOOP(results, type_left, type_right, larray, rarray, rstrides, OPERATOR)\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+                (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define EQUALITY_LOOP(results, array, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *(array)++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? 1 : 0;\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define POWER_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides)\
+    type_out *array = (type_out *)results->array;\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *array++ = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    type_out *array = (type_out *)results->array;\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+                    (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#define INPLACE_LOOP(results, type_left, type_right, larray, rarray, rstrides, OPERATOR)\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\
+                    (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#define EQUALITY_LOOP(results, array, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *(array)++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? 1 : 0;\
+                    (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#define POWER_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides)\
+    type_out *array = (type_out *)results->array;\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *array++ = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+                    (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#endif /* ULAB_MAX_DIMS == 4 */
+#endif /* ULAB_HAS_FUNCTION_ITERATOR */
+
+#endif
diff --git a/tulip/shared/ulab/code/ndarray_operators.c b/tulip/shared/ulab/code/ndarray_operators.c
new file mode 100644
index 000000000..e8de4d48f
--- /dev/null
+++ b/tulip/shared/ulab/code/ndarray_operators.c
@@ -0,0 +1,1123 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+
+#include <math.h>
+
+#include "py/runtime.h"
+#include "py/objtuple.h"
+#include "ndarray.h"
+#include "ndarray_operators.h"
+#include "ulab.h"
+#include "ulab_tools.h"
+#include "numpy/carray/carray.h"
+
+/*
+    This file contains the actual implementations of the various
+    ndarray operators.
+
+    These are the upcasting rules of the binary operators
+
+    - if complex is supported, and if one of the operarands is a complex, the result is always complex
+    - if both operarands are real one of them is a float, then the result is also a float
+    - operation on identical types preserves type
+
+    uint8 + int8 => int16
+    uint8 + int16 => int16
+    uint8 + uint16 => uint16
+    int8 + int16 => int16
+    int8 + uint16 => uint16
+    uint16 + int16 => float
+*/
+
+#if NDARRAY_HAS_BINARY_OP_EQUAL | NDARRAY_HAS_BINARY_OP_NOT_EQUAL
+mp_obj_t ndarray_binary_equality(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape,  int32_t *lstrides, int32_t *rstrides, mp_binary_op_t op) {
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX))  {
+        return carray_binary_equal_not_equal(lhs, rhs, ndim, shape, lstrides, rstrides, op);
+    }
+    #endif
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+    results->boolean = 1;
+    uint8_t *array = (uint8_t *)results->array;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    #if NDARRAY_HAS_BINARY_OP_EQUAL
+    if(op == MP_BINARY_OP_EQUAL) {
+        if(lhs->dtype == NDARRAY_UINT8) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
+            }
+        } else if(lhs->dtype == NDARRAY_INT8) {
+            if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        } else if(lhs->dtype == NDARRAY_UINT16) {
+            if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        } else if(lhs->dtype == NDARRAY_INT16) {
+            if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, ==);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        } else if(lhs->dtype == NDARRAY_FLOAT) {
+            if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        }
+    }
+    #endif /* NDARRAY_HAS_BINARY_OP_EQUAL */
+
+    #if NDARRAY_HAS_BINARY_OP_NOT_EQUAL
+    if(op == MP_BINARY_OP_NOT_EQUAL) {
+        if(lhs->dtype == NDARRAY_UINT8) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
+            }
+        } else if(lhs->dtype == NDARRAY_INT8) {
+            if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        } else if(lhs->dtype == NDARRAY_UINT16) {
+            if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        } else if(lhs->dtype == NDARRAY_INT16) {
+            if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, !=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        } else if(lhs->dtype == NDARRAY_FLOAT) {
+            if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
+            } else {
+                return ndarray_binary_op(op, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+            }
+        }
+    }
+    #endif /* NDARRAY_HAS_BINARY_OP_NOT_EQUAL */
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_EQUAL | NDARRAY_HAS_BINARY_OP_NOT_EQUAL */
+
+#if NDARRAY_HAS_BINARY_OP_ADD
+mp_obj_t ndarray_binary_add(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                        uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX))  {
+        return carray_binary_add(lhs, rhs, ndim, shape, lstrides, rstrides);
+    }
+    #endif
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_ADD, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_ADD, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, +);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_ADD, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_ADD, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_ADD */
+
+#if NDARRAY_HAS_BINARY_OP_MULTIPLY
+mp_obj_t ndarray_binary_multiply(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX))  {
+        return carray_binary_multiply(lhs, rhs, ndim, shape, lstrides, rstrides);
+    }
+    #endif
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, *);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
+        } else {
+            return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_MULTIPLY */
+
+#if NDARRAY_HAS_BINARY_OP_MORE | NDARRAY_HAS_BINARY_OP_MORE_EQUAL | NDARRAY_HAS_BINARY_OP_LESS | NDARRAY_HAS_BINARY_OP_LESS_EQUAL
+mp_obj_t ndarray_binary_more(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides, mp_binary_op_t op) {
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+    results->boolean = 1;
+    uint8_t *array = (uint8_t *)results->array;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    #if NDARRAY_HAS_BINARY_OP_MORE | NDARRAY_HAS_BINARY_OP_LESS
+    if(op == MP_BINARY_OP_MORE) {
+        if(lhs->dtype == NDARRAY_UINT8) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
+            }
+        } else if(lhs->dtype == NDARRAY_INT8) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, int8_t, uint8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
+            }
+        } else if(lhs->dtype == NDARRAY_UINT16) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, uint16_t, int8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
+            }
+        } else if(lhs->dtype == NDARRAY_INT16) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, int16_t, uint8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, int16_t, int8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, int16_t, uint16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
+            }
+        } else if(lhs->dtype == NDARRAY_FLOAT) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, >);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
+            }
+        }
+    }
+    #endif /* NDARRAY_HAS_BINARY_OP_MORE | NDARRAY_HAS_BINARY_OP_LESS*/
+    #if NDARRAY_HAS_BINARY_OP_MORE_EQUAL | NDARRAY_HAS_BINARY_OP_LESS_EQUAL
+    if(op == MP_BINARY_OP_MORE_EQUAL) {
+        if(lhs->dtype == NDARRAY_UINT8) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
+            }
+        } else if(lhs->dtype == NDARRAY_INT8) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, int8_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
+            }
+        } else if(lhs->dtype == NDARRAY_UINT16) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, uint16_t, int8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
+            }
+        } else if(lhs->dtype == NDARRAY_INT16) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, int16_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, int16_t, int8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, int16_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
+            }
+        } else if(lhs->dtype == NDARRAY_FLOAT) {
+            if(rhs->dtype == NDARRAY_UINT8) {
+                EQUALITY_LOOP(results, array, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                EQUALITY_LOOP(results, array, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                EQUALITY_LOOP(results, array, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                EQUALITY_LOOP(results, array, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, >=);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
+            }
+        }
+    }
+    #endif /* NDARRAY_HAS_BINARY_OP_MORE_EQUAL | NDARRAY_HAS_BINARY_OP_LESS_EQUAL */
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_MORE | NDARRAY_HAS_BINARY_OP_MORE_EQUAL | NDARRAY_HAS_BINARY_OP_LESS | NDARRAY_HAS_BINARY_OP_LESS_EQUAL */
+
+#if NDARRAY_HAS_BINARY_OP_SUBTRACT
+mp_obj_t ndarray_binary_subtract(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX))  {
+        return carray_binary_subtract(lhs, rhs, ndim, shape, lstrides, rstrides);
+    }
+    #endif
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, -);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_SUBTRACT */
+
+#if NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE
+mp_obj_t ndarray_binary_true_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX))  {
+        return carray_binary_divide(lhs, rhs, ndim, shape, lstrides, rstrides);
+    }
+    #endif
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    #if NDARRAY_BINARY_USES_FUN_POINTER
+    mp_float_t (*get_lhs)(void *) = ndarray_get_float_function(lhs->dtype);
+    mp_float_t (*get_rhs)(void *) = ndarray_get_float_function(rhs->dtype);
+
+    uint8_t *array = (uint8_t *)results->array;
+    void (*set_result)(void *, mp_float_t ) = ndarray_set_float_function(NDARRAY_FLOAT);
+
+    // Note that lvalue and rvalue are local variables in the macro itself
+    FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, lvalue/rvalue);
+
+    #else
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            BINARY_LOOP(results, mp_float_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            BINARY_LOOP(results, mp_float_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            BINARY_LOOP(results, mp_float_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            BINARY_LOOP(results, mp_float_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            BINARY_LOOP(results, mp_float_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            BINARY_LOOP(results, mp_float_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            BINARY_LOOP(results, mp_float_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            BINARY_LOOP(results, mp_float_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            BINARY_LOOP(results, mp_float_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            BINARY_LOOP(results, mp_float_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            BINARY_LOOP(results, mp_float_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            BINARY_LOOP(results, mp_float_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            BINARY_LOOP(results, mp_float_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            BINARY_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            BINARY_LOOP(results, mp_float_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            BINARY_LOOP(results, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            BINARY_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            BINARY_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            BINARY_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            BINARY_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, /);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
+        }
+    }
+    #endif /* NDARRAY_BINARY_USES_FUN_POINTER */
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE */
+
+#if NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE
+mp_obj_t ndarray_binary_floor_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, 
+                                                    uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            FLOOR_DIVIDE_LOOP_UINT(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            FLOOR_DIVIDE_LOOP_UINT(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            FLOOR_DIVIDE_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            FLOOR_DIVIDE_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            FLOOR_DIVIDE_LOOP_UINT(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            FLOOR_DIVIDE_LOOP(results, uint16_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            FLOOR_DIVIDE_LOOP_UINT(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            FLOOR_DIVIDE_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+        if(rhs->dtype == NDARRAY_UINT8) {
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+
+}
+#endif /* NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE */
+
+#if NDARRAY_HAS_BINARY_OP_POWER
+mp_obj_t ndarray_binary_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    // Note that numpy upcasts the results to int64, if the inputs are of integer type,
+    // while we always return a float array.
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    #if NDARRAY_BINARY_USES_FUN_POINTER
+    mp_float_t (*get_lhs)(void *) = ndarray_get_float_function(lhs->dtype);
+    mp_float_t (*get_rhs)(void *) = ndarray_get_float_function(rhs->dtype);
+
+    uint8_t *array = (uint8_t *)results->array;
+    void (*set_result)(void *, mp_float_t ) = ndarray_set_float_function(NDARRAY_FLOAT);
+
+    // Note that lvalue and rvalue are local variables in the macro itself
+    FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, MICROPY_FLOAT_C_FUN(pow)(lvalue, rvalue));
+
+    #else
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            POWER_LOOP(results, mp_float_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            POWER_LOOP(results, mp_float_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            POWER_LOOP(results, mp_float_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            POWER_LOOP(results, mp_float_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            POWER_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            POWER_LOOP(results, mp_float_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            POWER_LOOP(results, mp_float_t, int8_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            POWER_LOOP(results, mp_float_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            POWER_LOOP(results, mp_float_t, int8_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            POWER_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            POWER_LOOP(results, mp_float_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            POWER_LOOP(results, mp_float_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            POWER_LOOP(results, mp_float_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            POWER_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            POWER_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            POWER_LOOP(results, mp_float_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            POWER_LOOP(results, mp_float_t, int16_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            POWER_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            POWER_LOOP(results, mp_float_t, int16_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            POWER_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            POWER_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            POWER_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            POWER_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            POWER_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            POWER_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    }
+    #endif /* NDARRAY_BINARY_USES_FUN_POINTER */
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_POWER */
+
+#if NDARRAY_HAS_BINARY_OP_OR | NDARRAY_HAS_BINARY_OP_XOR | NDARRAY_HAS_BINARY_OP_AND
+mp_obj_t ndarray_binary_logical(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                            uint8_t ndim, size_t *shape,  int32_t *lstrides, int32_t *rstrides, mp_binary_op_t op) {
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX) || (lhs->dtype == NDARRAY_FLOAT) || (rhs->dtype == NDARRAY_FLOAT))  {
+        mp_raise_TypeError(MP_ERROR_TEXT("operation not supported for the input types"));
+    }
+    #else    
+    if((lhs->dtype == NDARRAY_FLOAT) || (rhs->dtype == NDARRAY_FLOAT)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("operation not supported for the input types"));
+    }
+    #endif
+
+    // bail out, if both inputs are of 16-bit types, but differ in sign;
+    // numpy promotes the result to int32
+    if(((lhs->dtype == NDARRAY_INT16) && (rhs->dtype == NDARRAY_UINT16)) || 
+        ((lhs->dtype == NDARRAY_UINT16) && (rhs->dtype == NDARRAY_INT16))) {
+        mp_raise_TypeError(MP_ERROR_TEXT("dtype of int32 is not supported"));
+    }
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+
+    switch(op) {
+        case MP_BINARY_OP_XOR:
+            if(lhs->dtype == NDARRAY_UINT8) {
+                if(rhs->dtype == NDARRAY_UINT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+                    if(lhs->boolean & rhs->boolean) {
+                        results->boolean = 1;
+                    }
+                    BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, ^);
+                } else if(rhs->dtype == NDARRAY_INT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, ^);
+                } else if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+                    BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, ^);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+                }
+            } else if(lhs->dtype == NDARRAY_INT8) {
+                if(rhs->dtype == NDARRAY_INT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+                    BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, ^);
+                } else if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, ^);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_XOR, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            } else if(lhs->dtype == NDARRAY_UINT16) {
+                if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+                    BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, ^);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+                    BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_XOR, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            } else if(lhs->dtype == NDARRAY_INT16) {
+                if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_XOR, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            }
+            break;
+
+        case MP_BINARY_OP_OR:
+            if(lhs->dtype == NDARRAY_UINT8) {
+                if(rhs->dtype == NDARRAY_UINT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+                    if(lhs->boolean & rhs->boolean) {
+                        results->boolean = 1;
+                    }
+                    BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, |);
+                } else if(rhs->dtype == NDARRAY_INT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, |);
+                } else if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+                    BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, |);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, |);
+                }
+            } else if(lhs->dtype == NDARRAY_INT8) {
+                if(rhs->dtype == NDARRAY_INT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+                    BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, |);
+                } else if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, |);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, |);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_OR, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            } else if(lhs->dtype == NDARRAY_UINT16) {
+                if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+                    BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, |);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+                    BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, |);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_OR, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            } else if(lhs->dtype == NDARRAY_INT16) {
+                if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, |);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_OR, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            }
+            break;
+
+            case MP_BINARY_OP_AND:
+            if(lhs->dtype == NDARRAY_UINT8) {
+                if(rhs->dtype == NDARRAY_UINT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+                    if(lhs->boolean & rhs->boolean) {
+                        results->boolean = 1;
+                    }
+                    BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, &);
+                } else if(rhs->dtype == NDARRAY_INT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, &);
+                } else if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+                    BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, &);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, &);
+                }
+            } else if(lhs->dtype == NDARRAY_INT8) {
+                if(rhs->dtype == NDARRAY_INT8) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+                    BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, &);
+                } else if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, &);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, &);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_AND, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            } else if(lhs->dtype == NDARRAY_UINT16) {
+                if(rhs->dtype == NDARRAY_UINT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+                    BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, &);
+                } else if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+                    BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, &);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_AND, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            } else if(lhs->dtype == NDARRAY_INT16) {
+                if(rhs->dtype == NDARRAY_INT16) {
+                    results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+                    BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, &);
+                } else {
+                    return ndarray_binary_op(MP_BINARY_OP_AND, MP_OBJ_FROM_PTR(rhs), MP_OBJ_FROM_PTR(lhs));
+                }
+            }
+            break;
+        default:
+            return MP_OBJ_NULL; // op not supported
+            break;
+    }
+    return MP_OBJ_FROM_PTR(results);
+}
+
+#endif /* NDARRAY_HAS_BINARY_OP_OR | NDARRAY_HAS_BINARY_OP_XOR | NDARRAY_HAS_BINARY_OP_AND */
+
+#if NDARRAY_HAS_INPLACE_ADD || NDARRAY_HAS_INPLACE_MULTIPLY || NDARRAY_HAS_INPLACE_SUBTRACT
+mp_obj_t ndarray_inplace_ams(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides, uint8_t optype) {
+
+    if((lhs->dtype != NDARRAY_FLOAT) && (rhs->dtype == NDARRAY_FLOAT)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("cannot cast output with casting rule"));
+    }
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    #if NDARRAY_HAS_INPLACE_ADD
+    if(optype == MP_BINARY_OP_INPLACE_ADD) {
+        UNWRAP_INPLACE_OPERATOR(lhs, larray, rarray, rstrides, +=);
+    }
+    #endif
+    #if NDARRAY_HAS_INPLACE_MULTIPLY
+    if(optype == MP_BINARY_OP_INPLACE_MULTIPLY) {
+        UNWRAP_INPLACE_OPERATOR(lhs, larray, rarray, rstrides, *=);
+    }
+    #endif
+    #if NDARRAY_HAS_INPLACE_SUBTRACT
+    if(optype == MP_BINARY_OP_INPLACE_SUBTRACT) {
+        UNWRAP_INPLACE_OPERATOR(lhs, larray, rarray, rstrides, -=);
+    }
+    #endif
+
+    return MP_OBJ_FROM_PTR(lhs);
+}
+#endif /* NDARRAY_HAS_INPLACE_ADD || NDARRAY_HAS_INPLACE_MULTIPLY || NDARRAY_HAS_INPLACE_SUBTRACT */
+
+#if NDARRAY_HAS_INPLACE_TRUE_DIVIDE
+mp_obj_t ndarray_inplace_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides) {
+
+    if((lhs->dtype != NDARRAY_FLOAT)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("results cannot be cast to specified type"));
+    }
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(rhs->dtype == NDARRAY_UINT8) {
+        INPLACE_LOOP(lhs, mp_float_t, uint8_t, larray, rarray, rstrides, /=);
+    } else if(rhs->dtype == NDARRAY_INT8) {
+        INPLACE_LOOP(lhs, mp_float_t, int8_t, larray, rarray, rstrides, /=);
+    } else if(rhs->dtype == NDARRAY_UINT16) {
+        INPLACE_LOOP(lhs, mp_float_t, uint16_t, larray, rarray, rstrides, /=);
+    } else if(rhs->dtype == NDARRAY_INT16) {
+        INPLACE_LOOP(lhs, mp_float_t, int16_t, larray, rarray, rstrides, /=);
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        INPLACE_LOOP(lhs, mp_float_t, mp_float_t, larray, rarray, rstrides, /=);
+    }
+    return MP_OBJ_FROM_PTR(lhs);
+}
+#endif /* NDARRAY_HAS_INPLACE_TRUE_DIVIDE */
+
+#if NDARRAY_HAS_INPLACE_POWER
+mp_obj_t ndarray_inplace_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides) {
+
+    if((lhs->dtype != NDARRAY_FLOAT)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("results cannot be cast to specified type"));
+    }
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(rhs->dtype == NDARRAY_UINT8) {
+        INPLACE_POWER(lhs, mp_float_t, uint8_t, larray, rarray, rstrides);
+    } else if(rhs->dtype == NDARRAY_INT8) {
+        INPLACE_POWER(lhs, mp_float_t, int8_t, larray, rarray, rstrides);
+    } else if(rhs->dtype == NDARRAY_UINT16) {
+        INPLACE_POWER(lhs, mp_float_t, uint16_t, larray, rarray, rstrides);
+    } else if(rhs->dtype == NDARRAY_INT16) {
+        INPLACE_POWER(lhs, mp_float_t, int16_t, larray, rarray, rstrides);
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        INPLACE_POWER(lhs, mp_float_t, mp_float_t, larray, rarray, rstrides);
+    }
+    return MP_OBJ_FROM_PTR(lhs);
+}
+#endif /* NDARRAY_HAS_INPLACE_POWER */
diff --git a/tulip/shared/ulab/code/ndarray_operators.h b/tulip/shared/ulab/code/ndarray_operators.h
new file mode 100644
index 000000000..f0f3c89d8
--- /dev/null
+++ b/tulip/shared/ulab/code/ndarray_operators.h
@@ -0,0 +1,539 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2023 Zoltán Vörös
+*/
+
+#include "ndarray.h"
+
+mp_obj_t ndarray_binary_equality(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *,  int32_t *, int32_t *, mp_binary_op_t );
+mp_obj_t ndarray_binary_add(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t ndarray_binary_multiply(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t ndarray_binary_more(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *, mp_binary_op_t );
+mp_obj_t ndarray_binary_power(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t ndarray_binary_subtract(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t ndarray_binary_true_divide(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t ndarray_binary_logical(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *,  int32_t *, int32_t *, mp_binary_op_t );
+mp_obj_t ndarray_binary_floor_divide(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+
+mp_obj_t ndarray_inplace_ams(ndarray_obj_t *, ndarray_obj_t *, int32_t *, uint8_t );
+mp_obj_t ndarray_inplace_power(ndarray_obj_t *, ndarray_obj_t *, int32_t *);
+mp_obj_t ndarray_inplace_divide(ndarray_obj_t *, ndarray_obj_t *, int32_t *);
+
+#define UNWRAP_INPLACE_OPERATOR(lhs, larray, rarray, rstrides, OPERATOR)\
+({\
+    if((lhs)->dtype == NDARRAY_UINT8) {\
+        if((rhs)->dtype == NDARRAY_UINT8) {\
+            INPLACE_LOOP((lhs), uint8_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_INT8) {\
+            INPLACE_LOOP((lhs), uint8_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_UINT16) {\
+            INPLACE_LOOP((lhs), uint8_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else {\
+            INPLACE_LOOP((lhs), uint8_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        }\
+    } else if(lhs->dtype == NDARRAY_INT8) {\
+        if(rhs->dtype == NDARRAY_UINT8) {\
+            INPLACE_LOOP((lhs), int8_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_INT8) {\
+            INPLACE_LOOP((lhs), int8_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_UINT16) {\
+            INPLACE_LOOP((lhs), int8_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else {\
+            INPLACE_LOOP((lhs), int8_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        }\
+    } else if(lhs->dtype == NDARRAY_UINT16) {\
+        if(rhs->dtype == NDARRAY_UINT8) {\
+            INPLACE_LOOP((lhs), uint16_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_INT8) {\
+            INPLACE_LOOP((lhs), uint16_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_UINT16) {\
+            INPLACE_LOOP((lhs), uint16_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else {\
+            INPLACE_LOOP((lhs), uint16_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        }\
+    } else if(lhs->dtype == NDARRAY_INT16) {\
+        if(rhs->dtype == NDARRAY_UINT8) {\
+            INPLACE_LOOP((lhs), int16_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_INT8) {\
+            INPLACE_LOOP((lhs), int16_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_UINT16) {\
+            INPLACE_LOOP((lhs), int16_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else {\
+            INPLACE_LOOP((lhs), int16_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        }\
+    } else if(lhs->dtype == NDARRAY_FLOAT) {\
+        if(rhs->dtype == NDARRAY_UINT8) {\
+            INPLACE_LOOP((lhs), mp_float_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_INT8) {\
+            INPLACE_LOOP((lhs), mp_float_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_UINT16) {\
+            INPLACE_LOOP((lhs), mp_float_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else if(rhs->dtype == NDARRAY_INT16) {\
+            INPLACE_LOOP((lhs), mp_float_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
+        } else {\
+            INPLACE_LOOP((lhs), mp_float_t, mp_float_t, (larray), (rarray), (rstrides), OPERATOR);\
+        }\
+    }\
+})
+
+#if ULAB_MAX_DIMS == 1
+#define INPLACE_POWER(results, type_left, type_right, larray, rarray, rstrides)\
+({  size_t l = 0;\
+    do {\
+        *((type_left *)(larray)) = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+})
+
+#define FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, OPERATION)\
+({  size_t l = 0;\
+    do {\
+        mp_float_t lvalue = (get_lhs)((larray));\
+        mp_float_t rvalue = (get_rhs)((rarray));\
+        (set_result)((array), OPERATION);\
+        (array) += (results)->itemsize;\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+})
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define INPLACE_POWER(results, type_left, type_right, larray, rarray, rstrides)\
+({  size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *((type_left *)(larray)) = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+})
+
+#define FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, OPERATION)\
+({  size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            mp_float_t lvalue = (get_lhs)((larray));\
+            mp_float_t rvalue = (get_rhs)((rarray));\
+            (set_result)((array), OPERATION);\
+            (array) += (results)->itemsize;\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < results->shape[ULAB_MAX_DIMS - 2]);\
+})
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define INPLACE_POWER(results, type_left, type_right, larray, rarray, rstrides)\
+({  size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *((type_left *)(larray)) = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+                (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+})
+
+
+#define FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, OPERATION)\
+({  size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                mp_float_t lvalue = (get_lhs)((larray));\
+                mp_float_t rvalue = (get_rhs)((rarray));\
+                (set_result)((array), OPERATION);\
+                (array) += (results)->itemsize;\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < results->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+})
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define INPLACE_POWER(results, type_left, type_right, larray, rarray, rstrides)\
+({  size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *((type_left *)(larray)) = MICROPY_FLOAT_C_FUN(pow)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+                    (larray) += (results)->strides[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+})
+
+#define FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, OPERATION)\
+({  size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    mp_float_t lvalue = (get_lhs)((larray));\
+                    mp_float_t rvalue = (get_rhs)((rarray));\
+                    (set_result)((array), OPERATION);\
+                    (array) += (results)->itemsize;\
+                    (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < results->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (results)->strides[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (results)->strides[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+})
+#endif /* ULAB_MAX_DIMS == 4 */
+
+#define FLOOR_DIVIDE_UINT1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\
+({\
+    size_t l = 0;\
+    do {\
+        *(array)++ = *((type_left *)(larray)) / *((type_right *)(rarray));\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+})
+
+#define FLOOR_DIVIDE1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\
+({\
+    size_t l = 0;\
+    int16_t num;\
+    int16_t denom = (int16_t)*((type_right *)(rarray));\
+    do {\
+        num = (int16_t)*((type_left *)(larray));\
+        if(num >= 0) {\
+            if(denom < 0) {\
+                num += -denom - 1;\
+            }\
+        } else {\
+            if(denom >= 0) {\
+                num += -denom + 1;\
+            }\
+        }\
+        *(array)++ =  num / denom;\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+})
+
+#define FLOOR_DIVIDE_FLOAT1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\
+({\
+    size_t l = 0;\
+    do {\
+        *(array)++ = MICROPY_FLOAT_C_FUN(floor)(*((type_left *)(larray)) / *((type_right *)(rarray)));\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+})
+
+#if ULAB_MAX_DIMS == 1
+#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t l = 0;\
+    do {\
+        FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t l = 0;\
+    do {\
+        FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t l = 0;\
+    do {\
+        FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+  type_out *array = (type_out *)(results)->array;\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\
+} while(0)
+
+#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\
+} while(0)
+
+#endif /* ULAB_MAX_DIMS == 4 */
diff --git a/tulip/shared/ulab/code/ndarray_properties.c b/tulip/shared/ulab/code/ndarray_properties.c
new file mode 100644
index 000000000..aa2971589
--- /dev/null
+++ b/tulip/shared/ulab/code/ndarray_properties.c
@@ -0,0 +1,99 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Zoltán Vörös
+ *
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+
+#include "ulab.h"
+#include "ndarray.h"
+#include "numpy/ndarray/ndarray_iter.h"
+#if ULAB_SUPPORTS_COMPLEX
+#include "numpy/carray/carray.h"
+#endif
+
+void ndarray_properties_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
+    if (dest[0] == MP_OBJ_NULL) {
+        switch(attr) {
+            #if NDARRAY_HAS_DTYPE
+            case MP_QSTR_dtype:
+                dest[0] = ndarray_dtype(self_in);
+                break;
+            #endif
+            #if NDARRAY_HAS_FLATITER
+            case MP_QSTR_flat:
+                dest[0] = ndarray_flatiter_make_new(self_in);
+                break;
+            #endif
+            #if NDARRAY_HAS_ITEMSIZE
+            case MP_QSTR_itemsize:
+                dest[0] = ndarray_itemsize(self_in);
+                break;
+            #endif
+            #if NDARRAY_HAS_SHAPE
+            case MP_QSTR_shape:
+                dest[0] = ndarray_shape(self_in);
+                break;
+            #endif
+            #if NDARRAY_HAS_SIZE
+            case MP_QSTR_size:
+                dest[0] = ndarray_size(self_in);
+                break;
+            #endif
+            #if NDARRAY_HAS_STRIDES
+            case MP_QSTR_strides:
+                dest[0] = ndarray_strides(self_in);
+                break;
+            #endif
+            #if NDARRAY_HAS_TRANSPOSE
+            case MP_QSTR_T:
+                dest[0] = ndarray_transpose(self_in);
+                break;
+            #endif
+            #if ULAB_SUPPORTS_COMPLEX
+            #if ULAB_NUMPY_HAS_IMAG
+            case MP_QSTR_imag:
+                dest[0] = carray_imag(self_in);
+                break;
+            #endif
+            #if ULAB_NUMPY_HAS_IMAG
+            case MP_QSTR_real:
+                dest[0] = carray_real(self_in);
+                break;
+            #endif
+            #endif /* ULAB_SUPPORTS_COMPLEX */
+            default:
+                // forward to locals dict
+                dest[1] = MP_OBJ_SENTINEL;
+                break;
+        }
+    } else {
+        if(dest[1]) {
+            switch(attr) {
+                #if ULAB_MAX_DIMS > 1
+                #if NDARRAY_HAS_RESHAPE
+                case MP_QSTR_shape:
+                    ndarray_reshape_core(self_in, dest[1], 1);
+                    break;
+                #endif
+                #endif
+                default:
+                    return;
+                    break;
+            }
+            dest[0] = MP_OBJ_NULL;
+        }
+    }
+}
diff --git a/tulip/shared/ulab/code/ndarray_properties.h b/tulip/shared/ulab/code/ndarray_properties.h
new file mode 100644
index 000000000..3e0b9a40b
--- /dev/null
+++ b/tulip/shared/ulab/code/ndarray_properties.h
@@ -0,0 +1,51 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+*/
+
+#ifndef _NDARRAY_PROPERTIES_
+#define _NDARRAY_PROPERTIES_
+
+#include "py/runtime.h"
+#include "py/binary.h"
+#include "py/obj.h"
+#include "py/objarray.h"
+
+#include "ulab.h"
+#include "ndarray.h"
+#include "numpy/ndarray/ndarray_iter.h"
+
+void ndarray_properties_attr(mp_obj_t , qstr , mp_obj_t *);
+
+#if NDARRAY_HAS_DTYPE
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_dtype_obj, ndarray_dtype);
+#endif
+
+#if NDARRAY_HAS_FLATITER
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_flatiter_make_new_obj, ndarray_flatiter_make_new);
+#endif
+
+#if NDARRAY_HAS_ITEMSIZE
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_itemsize_obj, ndarray_itemsize);
+#endif
+
+#if NDARRAY_HAS_SHAPE
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_shape_obj, ndarray_shape);
+#endif
+
+#if NDARRAY_HAS_SIZE
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_size_obj, ndarray_size);
+#endif
+
+#if NDARRAY_HAS_STRIDES
+MP_DEFINE_CONST_FUN_OBJ_1(ndarray_strides_obj, ndarray_strides);
+#endif
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/approx.c b/tulip/shared/ulab/code/numpy/approx.c
new file mode 100644
index 000000000..a268bb1d3
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/approx.c
@@ -0,0 +1,227 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *               2020 Diego Elio Pettenò
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../ulab_tools.h"
+#include "carray/carray_tools.h"
+#include "approx.h"
+
+//| """Numerical approximation methods"""
+//|
+
+ULAB_DEFINE_FLOAT_CONST(approx_trapz_dx, MICROPY_FLOAT_CONST(1.0), 0x3f800000UL, 0x3ff0000000000000ULL);
+
+#if ULAB_NUMPY_HAS_INTERP
+//| def interp(
+//|     x: ulab.numpy.ndarray,
+//|     xp: ulab.numpy.ndarray,
+//|     fp: ulab.numpy.ndarray,
+//|     *,
+//|     left: Optional[_float] = None,
+//|     right: Optional[_float] = None
+//| ) -> ulab.numpy.ndarray:
+//|     """
+//|     :param ulab.numpy.ndarray x: The x-coordinates at which to evaluate the interpolated values.
+//|     :param ulab.numpy.ndarray xp: The x-coordinates of the data points, must be increasing
+//|     :param ulab.numpy.ndarray fp: The y-coordinates of the data points, same length as xp
+//|     :param left: Value to return for ``x < xp[0]``, default is ``fp[0]``.
+//|     :param right: Value to return for ``x > xp[-1]``, default is ``fp[-1]``.
+//|
+//|     Returns the one-dimensional piecewise linear interpolant to a function with given discrete data points (xp, fp), evaluated at x."""
+//|     ...
+//|
+
+static mp_obj_t approx_interp(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_left, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+        { MP_QSTR_right, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+    };
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    ndarray_obj_t *x = ndarray_from_mp_obj(args[0].u_obj, 0);
+    ndarray_obj_t *xp = ndarray_from_mp_obj(args[1].u_obj, 0); // xp must hold an increasing sequence of independent values
+    ndarray_obj_t *fp = ndarray_from_mp_obj(args[2].u_obj, 0);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(x->dtype)
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(xp->dtype)
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(fp->dtype)
+    if((xp->ndim != 1) || (fp->ndim != 1) || (xp->len < 2) || (fp->len < 2) || (xp->len != fp->len)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("interp is defined for 1D iterables of equal length"));
+    }
+
+    ndarray_obj_t *y = ndarray_new_linear_array(x->len, NDARRAY_FLOAT);
+    mp_float_t left_value, right_value;
+    uint8_t *xparray = (uint8_t *)xp->array;
+
+    mp_float_t xp_left = ndarray_get_float_value(xparray, xp->dtype);
+    xparray += (xp->len-1) * xp->strides[ULAB_MAX_DIMS - 1];
+    mp_float_t xp_right = ndarray_get_float_value(xparray, xp->dtype);
+
+    uint8_t *fparray = (uint8_t *)fp->array;
+
+    if(args[3].u_obj == mp_const_none) {
+        left_value = ndarray_get_float_value(fparray, fp->dtype);
+    } else {
+        left_value = mp_obj_get_float(args[3].u_obj);
+    }
+    if(args[4].u_obj == mp_const_none) {
+        fparray += (fp->len-1) * fp->strides[ULAB_MAX_DIMS - 1];
+        right_value = ndarray_get_float_value(fparray, fp->dtype);
+    } else {
+        right_value = mp_obj_get_float(args[4].u_obj);
+    }
+
+    xparray = xp->array;
+    fparray = fp->array;
+
+    uint8_t *xarray = (uint8_t *)x->array;
+    mp_float_t *yarray = (mp_float_t *)y->array;
+    uint8_t *temp;
+
+    for(size_t i=0; i < x->len; i++, yarray++) {
+        mp_float_t x_value = ndarray_get_float_value(xarray, x->dtype);
+        xarray += x->strides[ULAB_MAX_DIMS - 1];
+        if(x_value < xp_left) {
+            *yarray = left_value;
+        } else if(x_value > xp_right) {
+            *yarray = right_value;
+        } else { // do the binary search here
+            mp_float_t xp_left_, xp_right_;
+            mp_float_t fp_left, fp_right;
+            size_t left_index = 0, right_index = xp->len - 1, middle_index;
+            while(right_index - left_index > 1) {
+                middle_index = left_index + (right_index - left_index) / 2;
+                temp = xparray + middle_index * xp->strides[ULAB_MAX_DIMS - 1];
+                mp_float_t xp_middle = ndarray_get_float_value(temp, xp->dtype);
+                if(x_value <= xp_middle) {
+                    right_index = middle_index;
+                } else {
+                    left_index = middle_index;
+                }
+            }
+            temp = xparray + left_index * xp->strides[ULAB_MAX_DIMS - 1];
+            xp_left_ = ndarray_get_float_value(temp, xp->dtype);
+
+            temp = xparray + right_index * xp->strides[ULAB_MAX_DIMS - 1];
+            xp_right_ = ndarray_get_float_value(temp, xp->dtype);
+
+            temp = fparray + left_index * fp->strides[ULAB_MAX_DIMS - 1];
+            fp_left = ndarray_get_float_value(temp, fp->dtype);
+
+            temp = fparray + right_index * fp->strides[ULAB_MAX_DIMS - 1];
+            fp_right = ndarray_get_float_value(temp, fp->dtype);
+
+            *yarray = fp_left + (x_value - xp_left_) * (fp_right - fp_left) / (xp_right_ - xp_left_);
+        }
+    }
+    return MP_OBJ_FROM_PTR(y);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(approx_interp_obj, 2, approx_interp);
+#endif
+
+#if ULAB_NUMPY_HAS_TRAPZ
+//| def trapz(y: ulab.numpy.ndarray, x: Optional[ulab.numpy.ndarray] = None, dx: _float = 1.0) -> _float:
+//|     """
+//|     :param 1D ulab.numpy.ndarray y: the values of the dependent variable
+//|     :param 1D ulab.numpy.ndarray x: optional, the coordinates of the independent variable. Defaults to uniformly spaced values.
+//|     :param float dx: the spacing between sample points, if x=None
+//|
+//|     Returns the integral of y(x) using the trapezoidal rule.
+//|     """
+//|     ...
+//|
+
+static mp_obj_t approx_trapz(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_x, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_dx, MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(approx_trapz_dx)} },
+    };
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    ndarray_obj_t *y = ndarray_from_mp_obj(args[0].u_obj, 0);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(y->dtype)
+    ndarray_obj_t *x;
+    mp_float_t mean = MICROPY_FLOAT_CONST(0.0);
+    if(y->len < 2) {
+        return mp_obj_new_float(mean);
+    }
+    if((y->ndim != 1)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("trapz is defined for 1D iterables"));
+    }
+
+    mp_float_t (*funcy)(void *) = ndarray_get_float_function(y->dtype);
+    uint8_t *yarray = (uint8_t *)y->array;
+
+    size_t count = 1;
+    mp_float_t y1, y2, m;
+
+    if(args[1].u_obj != mp_const_none) {
+        x = ndarray_from_mp_obj(args[1].u_obj, 0); // x must hold an increasing sequence of independent values
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(x->dtype)
+        if((x->ndim != 1) || (y->len != x->len)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("trapz is defined for 1D arrays of equal length"));
+        }
+
+        mp_float_t (*funcx)(void *) = ndarray_get_float_function(x->dtype);
+        uint8_t *xarray = (uint8_t *)x->array;
+        mp_float_t x1, x2;
+
+        y1 = funcy(yarray);
+        yarray += y->strides[ULAB_MAX_DIMS - 1];
+        x1 = funcx(xarray);
+        xarray += x->strides[ULAB_MAX_DIMS - 1];
+
+        for(size_t i=1; i < y->len; i++) {
+            y2 = funcy(yarray);
+            yarray += y->strides[ULAB_MAX_DIMS - 1];
+            x2 = funcx(xarray);
+            xarray += x->strides[ULAB_MAX_DIMS - 1];
+            mp_float_t value = (x2 - x1) * (y2 + y1);
+            m = mean + (value - mean) / (mp_float_t)count;
+            mean = m;
+            x1 = x2;
+            y1 = y2;
+            count++;
+        }
+    } else {
+        mp_float_t dx = mp_obj_get_float(args[2].u_obj);
+        y1 = funcy(yarray);
+        yarray += y->strides[ULAB_MAX_DIMS - 1];
+
+        for(size_t i=1; i < y->len; i++) {
+            y2 = ndarray_get_float_index(y->array, y->dtype, i);
+            mp_float_t value = (y2 + y1);
+            m = mean + (value - mean) / (mp_float_t)count;
+            mean = m;
+            y1 = y2;
+            count++;
+        }
+        mean *= dx;
+    }
+    return mp_obj_new_float(MICROPY_FLOAT_CONST(0.5)*mean*(y->len-1));
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(approx_trapz_obj, 1, approx_trapz);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/approx.h b/tulip/shared/ulab/code/numpy/approx.h
new file mode 100644
index 000000000..487a98b58
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/approx.h
@@ -0,0 +1,29 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+#ifndef _APPROX_
+#define _APPROX_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+#define     APPROX_EPS          MICROPY_FLOAT_CONST(1.0e-4)
+#define     APPROX_NONZDELTA    MICROPY_FLOAT_CONST(0.05)
+#define     APPROX_ZDELTA       MICROPY_FLOAT_CONST(0.00025)
+#define     APPROX_ALPHA        MICROPY_FLOAT_CONST(1.0)
+#define     APPROX_BETA         MICROPY_FLOAT_CONST(2.0)
+#define     APPROX_GAMMA        MICROPY_FLOAT_CONST(0.5)
+#define     APPROX_DELTA        MICROPY_FLOAT_CONST(0.5)
+
+MP_DECLARE_CONST_FUN_OBJ_KW(approx_interp_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(approx_trapz_obj);
+
+#endif  /* _APPROX_ */
diff --git a/tulip/shared/ulab/code/numpy/bitwise.c b/tulip/shared/ulab/code/numpy/bitwise.c
new file mode 100644
index 000000000..0aa5bac99
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/bitwise.c
@@ -0,0 +1,431 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023 Zoltán Vörös
+ *
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+
+#include "bitwise.h"
+
+
+#if ULAB_NUMPY_HAS_BITWISE_AND
+ndarray_obj_t *bitwise_bitwise_and_loop(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+    // AND is commutative, so simply swap the order, if a particular combination has already been inspected
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, &);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, &);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, &);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, &);
+        } 
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, &);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, &);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, &);
+        } else {
+            return bitwise_bitwise_and_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, &);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, &);
+        } else {
+            return bitwise_bitwise_and_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, &);
+        } else {
+            return bitwise_bitwise_and_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    }
+    return results;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_BITWISE_OR
+ndarray_obj_t *bitwise_bitwise_or_loop(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+    // OR is commutative, so simply swap the order, if a particular combination has already been inspected
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, |);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, |);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, |);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, |);
+        } 
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, |);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, |);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, |);
+        } else {
+            return bitwise_bitwise_or_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, |);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, |);
+        } else {
+            return bitwise_bitwise_or_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, |);
+        } else {
+            return bitwise_bitwise_or_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    }
+    return results;
+}
+#endif
+
+
+#if ULAB_NUMPY_HAS_BITWISE_XOR
+ndarray_obj_t *bitwise_bitwise_xor_loop(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+    // OR is commutative, so simply swap the order, if a particular combination has already been inspected
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, ^);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, ^);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, ^);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+        } 
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, ^);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, ^);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+        } else {
+            return bitwise_bitwise_xor_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, ^);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+        } else {
+            return bitwise_bitwise_xor_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, ^);
+        } else {
+            return bitwise_bitwise_xor_loop(rhs, lhs, ndim, shape, rstrides, lstrides);
+        }
+    }
+    return results;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_LEFT_SHIFT
+ndarray_obj_t *bitwise_left_shift_loop(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, <<);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, <<);
+        } 
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, <<);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, <<);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, <<);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, <<);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, <<);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, <<);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, <<);
+        }
+    }
+    return results;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_RIGHT_SHIFT
+ndarray_obj_t *bitwise_right_shift_loop(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, >>);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, >>);
+        } 
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, >>);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, >>);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, >>);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, >>);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, >>);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, >>);
+        } else {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, >>);
+        }
+    }
+    return results;
+}
+#endif
+
+mp_obj_t *bitwise_binary_operators(mp_obj_t x1, mp_obj_t x2, uint8_t optype) {
+    ndarray_obj_t *lhs = ndarray_from_mp_obj(x1, 0);
+    ndarray_obj_t *rhs = ndarray_from_mp_obj(x2, 0);
+    
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_FLOAT) || (rhs->dtype == NDARRAY_FLOAT) || (lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("not supported for input types"));
+    }
+    #else
+    if((lhs->dtype == NDARRAY_FLOAT) || (rhs->dtype == NDARRAY_FLOAT)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("not supported for input types"));
+    }
+    #endif
+    
+    uint8_t ndim = 0;
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    int32_t *lstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    int32_t *rstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    
+    if(!ndarray_can_broadcast(lhs, rhs, &ndim, shape, lstrides, rstrides)) {
+        m_del(size_t, shape, ULAB_MAX_DIMS);
+        m_del(int32_t, lstrides, ULAB_MAX_DIMS);
+        m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+        mp_raise_ValueError(MP_ERROR_TEXT("operands could not be broadcast together"));
+    }
+
+    ndarray_obj_t *results = NULL;
+    
+    switch(optype) {
+        #if ULAB_NUMPY_HAS_BITWISE_AND
+        case BITWISE_AND:
+            results = bitwise_bitwise_and_loop(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if ULAB_NUMPY_HAS_BITWISE_OR
+        case BITWISE_OR:
+            results = bitwise_bitwise_or_loop(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if ULAB_NUMPY_HAS_BITWISE_XOR
+        case BITWISE_XOR:
+            results = bitwise_bitwise_xor_loop(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if ULAB_NUMPY_HAS_LEFT_SHIFT
+        case BITWISE_LEFT_SHIFT:
+            results = bitwise_left_shift_loop(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        #if ULAB_NUMPY_HAS_RIGHT_SHIFT
+        case BITWISE_RIGHT_SHIFT:
+            results = bitwise_right_shift_loop(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
+        default:
+            break; 
+    }
+
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(int32_t, lstrides, ULAB_MAX_DIMS);
+    m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+
+    return MP_OBJ_FROM_PTR(results);
+}
+
+#if ULAB_NUMPY_HAS_BITWISE_AND
+mp_obj_t bitwise_bitwise_and(mp_obj_t x1, mp_obj_t x2) {
+    return bitwise_binary_operators(x1, x2, BITWISE_AND);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(bitwise_bitwise_and_obj, bitwise_bitwise_and);
+#endif
+
+#if ULAB_NUMPY_HAS_BITWISE_OR
+mp_obj_t bitwise_bitwise_or(mp_obj_t x1, mp_obj_t x2) {
+    return bitwise_binary_operators(x1, x2, BITWISE_OR);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(bitwise_bitwise_or_obj, bitwise_bitwise_or);
+#endif
+
+#if ULAB_NUMPY_HAS_BITWISE_XOR
+mp_obj_t bitwise_bitwise_xor(mp_obj_t x1, mp_obj_t x2) {
+    return bitwise_binary_operators(x1, x2, BITWISE_XOR);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(bitwise_bitwise_xor_obj, bitwise_bitwise_xor);
+#endif
+
+#if ULAB_NUMPY_HAS_LEFT_SHIFT
+mp_obj_t bitwise_left_shift(mp_obj_t x1, mp_obj_t x2) {
+    return bitwise_binary_operators(x1, x2, BITWISE_LEFT_SHIFT);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(left_shift_obj, bitwise_left_shift);
+#endif
+
+#if ULAB_NUMPY_HAS_RIGHT_SHIFT
+mp_obj_t bitwise_right_shift(mp_obj_t x1, mp_obj_t x2) {
+    return bitwise_binary_operators(x1, x2, BITWISE_RIGHT_SHIFT);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(right_shift_obj, bitwise_right_shift);
+#endif
\ No newline at end of file
diff --git a/tulip/shared/ulab/code/numpy/bitwise.h b/tulip/shared/ulab/code/numpy/bitwise.h
new file mode 100644
index 000000000..bddd8b4a3
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/bitwise.h
@@ -0,0 +1,32 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2023 Zoltán Vörös
+*/
+
+#ifndef _BITWISE_
+#define _BITWISE_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+enum BITWISE_FUNCTION_TYPE {
+    BITWISE_AND,
+    BITWISE_OR,
+    BITWISE_XOR,
+    BITWISE_LEFT_SHIFT,
+    BITWISE_RIGHT_SHIFT,   
+};
+
+MP_DECLARE_CONST_FUN_OBJ_2(bitwise_bitwise_and_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(bitwise_bitwise_or_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(bitwise_bitwise_xor_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(left_shift_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(right_shift_obj);
+
+#endif  /* _BITWISE_ */
diff --git a/tulip/shared/ulab/code/numpy/carray/carray.c b/tulip/shared/ulab/code/numpy/carray/carray.c
new file mode 100644
index 000000000..bf0d71662
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/carray/carray.c
@@ -0,0 +1,834 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021-2022 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/objint.h"
+#include "py/runtime.h"
+#include "py/builtin.h"
+#include "py/misc.h"
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+#include "../../ulab_tools.h"
+#include "carray.h"
+
+#if ULAB_SUPPORTS_COMPLEX
+
+//| import builtins
+//|
+//| import ulab.numpy
+
+//| def real(val: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     Return the real part of the complex argument, which can be
+//|     either an ndarray, or a scalar."""
+//|     ...
+//|
+
+mp_obj_t carray_real(mp_obj_t _source) {
+    if(mp_obj_is_type(_source, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(_source);
+        if(source->dtype != NDARRAY_COMPLEX) {
+            ndarray_obj_t *target = ndarray_new_dense_ndarray(source->ndim, source->shape, source->dtype);
+            ndarray_copy_array(source, target, 0);
+            return MP_OBJ_FROM_PTR(target);
+        } else { // the input is most definitely a complex array
+            ndarray_obj_t *target = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
+            ndarray_copy_array(source, target, 0);
+            return MP_OBJ_FROM_PTR(target);
+        }
+    } else {
+        mp_raise_NotImplementedError(MP_ERROR_TEXT("function is implemented for ndarrays only"));
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(carray_real_obj, carray_real);
+
+//| def imag(val: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     Return the imaginary part of the complex argument, which can be
+//|     either an ndarray, or a scalar."""
+//|     ...
+//|
+
+mp_obj_t carray_imag(mp_obj_t _source) {
+    if(mp_obj_is_type(_source, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(_source);
+        if(source->dtype != NDARRAY_COMPLEX) { // if not complex, then the imaginary part is zero
+            ndarray_obj_t *target = ndarray_new_dense_ndarray(source->ndim, source->shape, source->dtype);
+            return MP_OBJ_FROM_PTR(target);
+        } else { // the input is most definitely a complex array
+            ndarray_obj_t *target = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
+            ndarray_copy_array(source, target, source->itemsize / 2);
+            return MP_OBJ_FROM_PTR(target);
+        }
+    } else {
+        mp_raise_NotImplementedError(MP_ERROR_TEXT("function is implemented for ndarrays only"));
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(carray_imag_obj, carray_imag);
+
+#if ULAB_NUMPY_HAS_CONJUGATE
+
+//| def conjugate(
+//|     val: builtins.complex | ulab.numpy.ndarray
+//| ) -> builtins.complex | ulab.numpy.ndarray:
+//|     """
+//|     Return the conjugate of the complex argument, which can be
+//|     either an ndarray, or a scalar."""
+//|     ...
+//|
+mp_obj_t carray_conjugate(mp_obj_t _source) {
+    if(mp_obj_is_type(_source, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(_source);
+        ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, source->dtype);
+        ndarray_copy_array(source, ndarray, 0);
+        if(source->dtype == NDARRAY_COMPLEX) {
+            mp_float_t *array = (mp_float_t *)ndarray->array;
+            array++;
+            for(size_t i = 0; i < ndarray->len; i++) {
+                *array *= MICROPY_FLOAT_CONST(-1.0);
+                array += 2;
+            }
+        }
+        return MP_OBJ_FROM_PTR(ndarray);
+    } else {
+        if(mp_obj_is_type(_source, &mp_type_complex)) {
+            mp_float_t real, imag;
+            mp_obj_get_complex(_source, &real, &imag);
+            imag = imag * MICROPY_FLOAT_CONST(-1.0);
+            return mp_obj_new_complex(real, imag);
+        } else if(mp_obj_is_int(_source) || mp_obj_is_float(_source)) {
+            return _source;
+        } else {
+            mp_raise_TypeError(MP_ERROR_TEXT("input must be an ndarray, or a scalar"));
+        }
+    }
+    // this should never happen
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(carray_conjugate_obj, carray_conjugate);
+#endif
+
+#if ULAB_NUMPY_HAS_SORT_COMPLEX
+//| def sort_complex(a: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: a
+//|       a one-dimensional ndarray
+//|
+//|     Sort a complex array using the real part first, then the imaginary part.
+//|     Always returns a sorted complex array, even if the input was real."""
+//|     ...
+//|
+
+static void carray_sort_complex_(mp_float_t *array, size_t len) {
+    // array is assumed to be a floating vector containing the real and imaginary parts
+    // of a complex array at alternating positions as
+    // array[0] = real[0]
+    // array[1] = imag[0]
+    // array[2] = real[1]
+    // array[3] = imag[1]
+
+    mp_float_t real, imag;
+    size_t c, q = len, p, r = len >> 1;
+    for (;;) {
+        if (r > 0) {
+            r--;
+            real = array[2 * r];
+            imag = array[2 * r + 1];
+        } else {
+            q--;
+            if(q == 0) {
+                break;
+            }
+            real = array[2 * q];
+            imag = array[2 * q + 1];
+            array[2 * q] = array[0];
+            array[2 * q + 1] = array[1];
+        }
+        p = r;
+        c = r + r + 1;
+        while (c < q) {
+            if(c + 1 < q) {
+                if((array[2 * (c+1)] > array[2 * c]) ||
+                    ((array[2 * (c+1)] == array[2 * c]) && (array[2 * (c+1) + 1] > array[2 * c + 1]))) {
+                    c++;
+                }
+            }
+            if((array[2 * c] > real) ||
+                ((array[2 * c] == real) && (array[2 * c + 1] > imag))) {
+                array[2 * p] = array[2 * c]; // real part
+                array[2 * p + 1] = array[2 * c + 1]; // imag part
+                p = c;
+                c = p + p + 1;
+            } else {
+                break;
+            }
+        }
+        array[2 * p] = real;
+        array[2 * p + 1] = imag;
+    }
+}
+
+mp_obj_t carray_sort_complex(mp_obj_t _source) {
+    if(!mp_obj_is_type(_source, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be a 1D ndarray"));
+    }
+    ndarray_obj_t *source = MP_OBJ_TO_PTR(_source);
+    if(source->ndim != 1) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be a 1D ndarray"));
+    }
+
+    ndarray_obj_t *ndarray = ndarray_copy_view_convert_type(source, NDARRAY_COMPLEX);
+
+    if(ndarray->len != 0) {
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        carray_sort_complex_(array, ndarray->len);
+    }
+    
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(carray_sort_complex_obj, carray_sort_complex);
+#endif
+
+//| def abs(a: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: a
+//|       a one-dimensional ndarray
+//|
+//|     Return the absolute value of complex ndarray."""
+//|     ...
+//|
+
+mp_obj_t carray_abs(ndarray_obj_t *source, ndarray_obj_t *target) {
+    // calculates the absolute value of a complex array and returns a dense array
+    uint8_t *sarray = (uint8_t *)source->array;
+    mp_float_t *tarray = (mp_float_t *)target->array;
+    uint8_t itemsize = mp_binary_get_size('@', NDARRAY_FLOAT, NULL);
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    mp_float_t rvalue = *(mp_float_t *)sarray;
+                    mp_float_t ivalue = *(mp_float_t *)(sarray + itemsize);
+                    *tarray++ = MICROPY_FLOAT_C_FUN(sqrt)(rvalue * rvalue + ivalue * ivalue);
+                    sarray += source->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                sarray += source->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+            sarray += source->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+        sarray += source->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+    return MP_OBJ_FROM_PTR(target);
+}
+
+static void carray_copy_part(uint8_t *tarray, uint8_t *sarray, size_t *shape, int32_t *strides) {
+    // copies the real or imaginary part of an array
+    // into the respective part of a dense complex array
+    uint8_t sz = sizeof(mp_float_t);
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    memcpy(tarray, sarray, sz);
+                    tarray += 2 * sz;
+                    sarray += strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                sarray -= strides[ULAB_MAX_DIMS - 1] * shape[ULAB_MAX_DIMS-1];
+                sarray += strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < shape[ULAB_MAX_DIMS - 2]);
+            #endif /* ULAB_MAX_DIMS > 1 */
+        #if ULAB_MAX_DIMS > 2
+            sarray -= strides[ULAB_MAX_DIMS - 2] * shape[ULAB_MAX_DIMS-2];
+            sarray += strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < shape[ULAB_MAX_DIMS - 3]);
+        #endif /* ULAB_MAX_DIMS > 2 */
+    #if ULAB_MAX_DIMS > 3
+        sarray -= strides[ULAB_MAX_DIMS - 3] * shape[ULAB_MAX_DIMS-3];
+        sarray += strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < shape[ULAB_MAX_DIMS - 4]);
+    #endif /* ULAB_MAX_DIMS > 3 */
+}
+
+mp_obj_t carray_binary_equal_not_equal(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides, mp_binary_op_t op) {
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+    results->boolean = 1;
+    uint8_t *array = (uint8_t *)results->array;
+
+    if(op == MP_BINARY_OP_NOT_EQUAL) {
+        memset(array, 1, results->len);
+    }
+
+    if((lhs->dtype == NDARRAY_COMPLEX) && (rhs->dtype == NDARRAY_COMPLEX)) {
+        mp_float_t *larray = (mp_float_t *)lhs->array;
+        mp_float_t *rarray = (mp_float_t *)rhs->array;
+
+        ulab_rescale_float_strides(lstrides);
+        ulab_rescale_float_strides(rstrides);
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        if((larray[0] == rarray[0]) && (larray[1] == rarray[1])) {
+                            *array ^= 0x01;
+                        }
+                        array++;
+                        larray += lstrides[ULAB_MAX_DIMS - 1];
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    larray -= lstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    larray += lstrides[ULAB_MAX_DIMS - 2];
+                    rarray -= rstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    rarray += rstrides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < results->shape[ULAB_MAX_DIMS - 2]);
+                #endif /* ULAB_MAX_DIMS > 1 */
+            #if ULAB_MAX_DIMS > 2
+                larray -= lstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                larray += lstrides[ULAB_MAX_DIMS - 3];
+                rarray -= rstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                rarray += rstrides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < results->shape[ULAB_MAX_DIMS - 3]);
+            #endif /* ULAB_MAX_DIMS > 2 */
+        #if ULAB_MAX_DIMS > 3
+            larray -= lstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            larray += lstrides[ULAB_MAX_DIMS - 4];
+            rarray -= rstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            rarray += rstrides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < results->shape[ULAB_MAX_DIMS - 4]);
+        #endif /* ULAB_MAX_DIMS > 3 */
+    } else { // only one of the operands is complex
+        mp_float_t *larray = (mp_float_t *)lhs->array;
+        uint8_t *rarray = (uint8_t *)rhs->array;
+
+        // align the complex array to the left
+        uint8_t rdtype = rhs->dtype;
+        int32_t *lstrides_ = lstrides;
+        int32_t *rstrides_ = rstrides;
+
+        if(rhs->dtype == NDARRAY_COMPLEX) {
+            larray = (mp_float_t *)rhs->array;
+            rarray = (uint8_t *)lhs->array;
+            lstrides_ = rstrides;
+            rstrides_ = lstrides;
+            rdtype = lhs->dtype;
+        }
+
+        ulab_rescale_float_strides(lstrides_);
+
+        if(rdtype == NDARRAY_UINT8) {
+            BINARY_LOOP_COMPLEX_EQUAL(results, array, uint8_t, larray, lstrides_, rarray, rstrides_);
+        } else if(rdtype == NDARRAY_INT8) {
+            BINARY_LOOP_COMPLEX_EQUAL(results, array, int8_t, larray, lstrides_, rarray, rstrides_);
+        } else if(rdtype == NDARRAY_UINT16) {
+            BINARY_LOOP_COMPLEX_EQUAL(results, array, uint16_t, larray, lstrides_, rarray, rstrides_);
+        } else if(rdtype == NDARRAY_INT16) {
+            BINARY_LOOP_COMPLEX_EQUAL(results, array, int16_t, larray, lstrides_, rarray, rstrides_);
+        } else if(rdtype == NDARRAY_FLOAT) {
+            BINARY_LOOP_COMPLEX_EQUAL(results, array, mp_float_t, larray, lstrides_, rarray, rstrides_);
+        }
+    }
+    return MP_OBJ_FROM_PTR(results);
+}
+
+mp_obj_t carray_binary_add(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_COMPLEX);
+    mp_float_t *resarray = (mp_float_t *)results->array;
+
+    if((lhs->dtype == NDARRAY_COMPLEX) && (rhs->dtype == NDARRAY_COMPLEX)) {
+        mp_float_t *larray = (mp_float_t *)lhs->array;
+        mp_float_t *rarray = (mp_float_t *)rhs->array;
+
+        ulab_rescale_float_strides(lstrides);
+        ulab_rescale_float_strides(rstrides);
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        // real part
+                        *resarray++ = larray[0] + rarray[0];
+                        // imaginary part
+                        *resarray++ = larray[1] + rarray[1];
+                        larray += lstrides[ULAB_MAX_DIMS - 1];
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    larray -= lstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    larray += lstrides[ULAB_MAX_DIMS - 2];
+                    rarray -= rstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    rarray += rstrides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < results->shape[ULAB_MAX_DIMS - 2]);
+                #endif /* ULAB_MAX_DIMS > 1 */
+            #if ULAB_MAX_DIMS > 2
+                larray -= lstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                larray += lstrides[ULAB_MAX_DIMS - 3];
+                rarray -= rstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                rarray += rstrides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < results->shape[ULAB_MAX_DIMS - 3]);
+            #endif /* ULAB_MAX_DIMS > 2 */
+        #if ULAB_MAX_DIMS > 3
+            larray -= lstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            larray += lstrides[ULAB_MAX_DIMS - 4];
+            rarray -= rstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            rarray += rstrides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < results->shape[ULAB_MAX_DIMS - 4]);
+        #endif /* ULAB_MAX_DIMS > 3 */
+    } else { // only one of the operands is complex
+        uint8_t *larray = (uint8_t *)lhs->array;
+        uint8_t *rarray = (uint8_t *)rhs->array;
+
+        // align the complex array to the left
+        uint8_t rdtype = rhs->dtype;
+        int32_t *lstrides_ = lstrides;
+        int32_t *rstrides_ = rstrides;
+
+        if(rhs->dtype == NDARRAY_COMPLEX) {
+            larray = (uint8_t *)rhs->array;
+            rarray = (uint8_t *)lhs->array;
+            lstrides_ = rstrides;
+            rstrides_ = lstrides;
+            rdtype = lhs->dtype;
+        }
+
+        if(rdtype == NDARRAY_UINT8) {
+            BINARY_LOOP_COMPLEX(results, resarray, uint8_t, larray, lstrides_, rarray, rstrides_, +);
+        } else if(rdtype == NDARRAY_INT8) {
+            BINARY_LOOP_COMPLEX(results, resarray, int8_t, larray, lstrides_, rarray, rstrides_, +);
+        } else if(rdtype == NDARRAY_UINT16) {
+            BINARY_LOOP_COMPLEX(results, resarray, uint16_t, larray, lstrides_, rarray, rstrides_, +);
+        } else if(rdtype == NDARRAY_INT16) {
+            BINARY_LOOP_COMPLEX(results, resarray, int16_t, larray, lstrides_, rarray, rstrides_, +);
+        } else if(rdtype == NDARRAY_FLOAT) {
+            BINARY_LOOP_COMPLEX(results, resarray, mp_float_t, larray, lstrides_, rarray, rstrides_, +);
+        }
+
+        // simply copy the imaginary part
+        uint8_t *tarray = (uint8_t *)results->array;
+        tarray += sizeof(mp_float_t);
+
+        if(lhs->dtype == NDARRAY_COMPLEX) {
+            rarray = (uint8_t *)lhs->array;
+            rstrides = lstrides;
+        } else {
+            rarray = (uint8_t *)rhs->array;
+        }
+        rarray += sizeof(mp_float_t);
+        carray_copy_part(tarray, rarray, results->shape, rstrides);
+    }
+    return MP_OBJ_FROM_PTR(results);
+}
+
+static void carray_binary_multiply_(ndarray_obj_t *results, mp_float_t *resarray, uint8_t *larray, uint8_t *rarray,
+                            int32_t *lstrides, int32_t *rstrides, uint8_t rdtype) {
+
+    if(rdtype == NDARRAY_UINT8) {
+        BINARY_LOOP_COMPLEX(results, resarray, uint8_t, larray, lstrides, rarray, rstrides, *);
+    } else if(rdtype == NDARRAY_INT8) {
+        BINARY_LOOP_COMPLEX(results, resarray, int8_t, larray, lstrides, rarray, rstrides, *);
+    } else if(rdtype == NDARRAY_UINT16) {
+        BINARY_LOOP_COMPLEX(results, resarray, uint16_t, larray, lstrides, rarray, rstrides, *);
+    } else if(rdtype == NDARRAY_INT16) {
+        BINARY_LOOP_COMPLEX(results, resarray, int16_t, larray, lstrides, rarray, rstrides, *);
+    } else if(rdtype == NDARRAY_FLOAT) {
+        BINARY_LOOP_COMPLEX(results, resarray, mp_float_t, larray, lstrides, rarray, rstrides, *);
+    }
+}
+
+mp_obj_t carray_binary_multiply(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_COMPLEX);
+    mp_float_t *resarray = (mp_float_t *)results->array;
+
+    if((lhs->dtype == NDARRAY_COMPLEX) && (rhs->dtype == NDARRAY_COMPLEX)) {
+        mp_float_t *larray = (mp_float_t *)lhs->array;
+        mp_float_t *rarray = (mp_float_t *)rhs->array;
+
+        ulab_rescale_float_strides(lstrides);
+        ulab_rescale_float_strides(rstrides);
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        // real part
+                        *resarray++ = larray[0] * rarray[0] - larray[1] * rarray[1];
+                        // imaginary part
+                        *resarray++ = larray[0] * rarray[1] + larray[1] * rarray[0];
+                        larray += lstrides[ULAB_MAX_DIMS - 1];
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    larray -= lstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    larray += lstrides[ULAB_MAX_DIMS - 2];
+                    rarray -= rstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    rarray += rstrides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < results->shape[ULAB_MAX_DIMS - 2]);
+                #endif /* ULAB_MAX_DIMS > 1 */
+            #if ULAB_MAX_DIMS > 2
+                larray -= lstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                larray += lstrides[ULAB_MAX_DIMS - 3];
+                rarray -= rstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                rarray += rstrides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < results->shape[ULAB_MAX_DIMS - 3]);
+            #endif /* ULAB_MAX_DIMS > 2 */
+        #if ULAB_MAX_DIMS > 3
+            larray -= lstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            larray += lstrides[ULAB_MAX_DIMS - 4];
+            rarray -= rstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            rarray += rstrides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < results->shape[ULAB_MAX_DIMS - 4]);
+        #endif /* ULAB_MAX_DIMS > 3 */
+    } else { // only one of the operands is complex
+
+        uint8_t *larray = (uint8_t *)lhs->array;
+        uint8_t *rarray = (uint8_t *)rhs->array;
+        uint8_t *lo = larray, *ro = rarray;
+        int32_t *left_strides = lstrides;
+        int32_t *right_strides = rstrides;
+        uint8_t rdtype = rhs->dtype;
+
+        // align the complex array to the left
+        if(rhs->dtype == NDARRAY_COMPLEX) {
+            lo = (uint8_t *)rhs->array;
+            ro = (uint8_t *)lhs->array;
+            rdtype = lhs->dtype;
+            left_strides = rstrides;
+            right_strides = lstrides;
+        }
+
+        larray = lo;
+        rarray = ro;
+        // real part
+        carray_binary_multiply_(results, resarray, larray, rarray, left_strides, right_strides, rdtype);
+
+        larray = lo + sizeof(mp_float_t);
+        rarray = ro;
+        resarray = (mp_float_t *)results->array;
+        resarray++;
+        // imaginary part
+        carray_binary_multiply_(results, resarray, larray, rarray, left_strides, right_strides, rdtype);
+    }
+    return MP_OBJ_FROM_PTR(results);
+}
+
+mp_obj_t carray_binary_subtract(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_COMPLEX);
+    mp_float_t *resarray = (mp_float_t *)results->array;
+
+    if((lhs->dtype == NDARRAY_COMPLEX) && (rhs->dtype == NDARRAY_COMPLEX)) {
+        mp_float_t *larray = (mp_float_t *)lhs->array;
+        mp_float_t *rarray = (mp_float_t *)rhs->array;
+
+        ulab_rescale_float_strides(lstrides);
+        ulab_rescale_float_strides(rstrides);
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        // real part
+                        *resarray++ = larray[0] - rarray[0];
+                        // imaginary part
+                        *resarray++ = larray[1] - rarray[1];
+                        larray += lstrides[ULAB_MAX_DIMS - 1];
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    larray -= lstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    larray += lstrides[ULAB_MAX_DIMS - 2];
+                    rarray -= rstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    rarray += rstrides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < results->shape[ULAB_MAX_DIMS - 2]);
+                #endif /* ULAB_MAX_DIMS > 1 */
+            #if ULAB_MAX_DIMS > 2
+                larray -= lstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                larray += lstrides[ULAB_MAX_DIMS - 3];
+                rarray -= rstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                rarray += rstrides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < results->shape[ULAB_MAX_DIMS - 3]);
+            #endif /* ULAB_MAX_DIMS > 2 */
+        #if ULAB_MAX_DIMS > 3
+            larray -= lstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            larray += lstrides[ULAB_MAX_DIMS - 4];
+            rarray -= rstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            rarray += rstrides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < results->shape[ULAB_MAX_DIMS - 4]);
+        #endif /* ULAB_MAX_DIMS > 3 */
+    } else {
+        uint8_t *larray = (uint8_t *)lhs->array;
+        if(lhs->dtype == NDARRAY_COMPLEX) {
+            uint8_t *rarray = (uint8_t *)rhs->array;
+            if(rhs->dtype == NDARRAY_UINT8) {
+                BINARY_LOOP_COMPLEX(results, resarray, uint8_t, larray, lstrides, rarray, rstrides, -);
+            } else if(rhs->dtype == NDARRAY_INT8) {
+                BINARY_LOOP_COMPLEX(results, resarray, int8_t, larray, lstrides, rarray, rstrides, -);
+            } else if(rhs->dtype == NDARRAY_UINT16) {
+                BINARY_LOOP_COMPLEX(results, resarray, uint16_t, larray, lstrides, rarray, rstrides, -);
+            } else if(rhs->dtype == NDARRAY_INT16) {
+                BINARY_LOOP_COMPLEX(results, resarray, int16_t, larray, lstrides, rarray, rstrides, -);
+            } else if(rhs->dtype == NDARRAY_FLOAT) {
+                BINARY_LOOP_COMPLEX(results, resarray, mp_float_t, larray, lstrides, rarray, rstrides, -);
+            }
+            // copy the imaginary part
+            uint8_t *tarray = (uint8_t *)results->array;
+            tarray += sizeof(mp_float_t);
+
+            larray = (uint8_t *)lhs->array;
+            larray += sizeof(mp_float_t);
+
+            carray_copy_part(tarray, larray, results->shape, lstrides);
+        } else if(rhs->dtype == NDARRAY_COMPLEX) {
+            mp_float_t *rarray = (mp_float_t *)rhs->array;
+            ulab_rescale_float_strides(rstrides);
+
+            if(lhs->dtype == NDARRAY_UINT8) {
+                BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT(results, resarray, uint8_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_INT8) {
+                BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT(results, resarray, int8_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_UINT16) {
+                BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT(results, resarray, uint16_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_INT16) {
+                BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT(results, resarray, int16_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_FLOAT) {
+                BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT(results, resarray, mp_float_t, larray, lstrides, rarray, rstrides);
+            }
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+
+static void carray_binary_left_divide_(ndarray_obj_t *results, mp_float_t *resarray, uint8_t *larray, uint8_t *rarray,
+                            int32_t *lstrides, int32_t *rstrides, uint8_t rdtype) {
+
+    if(rdtype == NDARRAY_UINT8) {
+        BINARY_LOOP_COMPLEX(results, resarray, uint8_t, larray, lstrides, rarray, rstrides, /);
+    } else if(rdtype == NDARRAY_INT8) {
+        BINARY_LOOP_COMPLEX(results, resarray, int8_t, larray, lstrides, rarray, rstrides, /);
+    } else if(rdtype == NDARRAY_UINT16) {
+        BINARY_LOOP_COMPLEX(results, resarray, uint16_t, larray, lstrides, rarray, rstrides, /);
+    } else if(rdtype == NDARRAY_INT16) {
+        BINARY_LOOP_COMPLEX(results, resarray, int16_t, larray, lstrides, rarray, rstrides, /);
+    } else if(rdtype == NDARRAY_FLOAT) {
+        BINARY_LOOP_COMPLEX(results, resarray, mp_float_t, larray, lstrides, rarray, rstrides, /);
+    }
+}
+
+mp_obj_t carray_binary_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_COMPLEX);
+    mp_float_t *resarray = (mp_float_t *)results->array;
+
+    if((lhs->dtype == NDARRAY_COMPLEX) && (rhs->dtype == NDARRAY_COMPLEX)) {
+        mp_float_t *larray = (mp_float_t *)lhs->array;
+        mp_float_t *rarray = (mp_float_t *)rhs->array;
+
+        ulab_rescale_float_strides(lstrides);
+        ulab_rescale_float_strides(rstrides);
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        // (a + bi) / (c + di) =
+                        // (ac + bd) / (c^2 + d^2) + i (bc - ad) / (c^2 + d^2)
+                        // denominator
+                        mp_float_t denom = rarray[0] * rarray[0] + rarray[1] * rarray[1];
+
+                        // real part
+                        *resarray++ = (larray[0] * rarray[0] + larray[1] * rarray[1]) / denom;
+                        // imaginary part
+                        *resarray++ = (larray[1] * rarray[0] - larray[0] * rarray[1]) / denom;
+                        larray += lstrides[ULAB_MAX_DIMS - 1];
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    larray -= lstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    larray += lstrides[ULAB_MAX_DIMS - 2];
+                    rarray -= rstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                    rarray += rstrides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < results->shape[ULAB_MAX_DIMS - 2]);
+                #endif /* ULAB_MAX_DIMS > 1 */
+            #if ULAB_MAX_DIMS > 2
+                larray -= lstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                larray += lstrides[ULAB_MAX_DIMS - 3];
+                rarray -= rstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+                rarray += rstrides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < results->shape[ULAB_MAX_DIMS - 3]);
+            #endif /* ULAB_MAX_DIMS > 2 */
+        #if ULAB_MAX_DIMS > 3
+            larray -= lstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            larray += lstrides[ULAB_MAX_DIMS - 4];
+            rarray -= rstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+            rarray += rstrides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < results->shape[ULAB_MAX_DIMS - 4]);
+        #endif /* ULAB_MAX_DIMS > 3 */
+    } else {
+        uint8_t *larray = (uint8_t *)lhs->array;
+        uint8_t *rarray = (uint8_t *)rhs->array;
+        if(lhs->dtype == NDARRAY_COMPLEX) {
+            // real part
+            carray_binary_left_divide_(results, resarray, larray, rarray, lstrides, rstrides, rhs->dtype);
+            // imaginary part
+            resarray = (mp_float_t *)results->array;
+            resarray++;
+            larray = (uint8_t *)lhs->array;
+            larray += sizeof(mp_float_t);
+            rarray = (uint8_t *)rhs->array;
+            carray_binary_left_divide_(results, resarray, larray, rarray, lstrides, rstrides, rhs->dtype);
+        } else {
+            if(lhs->dtype == NDARRAY_UINT8) {
+                BINARY_LOOP_COMPLEX_RIGHT_DIVIDE(results, resarray, uint8_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_INT8) {
+                BINARY_LOOP_COMPLEX_RIGHT_DIVIDE(results, resarray, int8_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_UINT16) {
+                BINARY_LOOP_COMPLEX_RIGHT_DIVIDE(results, resarray, uint16_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_INT16) {
+                BINARY_LOOP_COMPLEX_RIGHT_DIVIDE(results, resarray, int16_t, larray, lstrides, rarray, rstrides);
+            } else if(lhs->dtype == NDARRAY_FLOAT) {
+                BINARY_LOOP_COMPLEX_RIGHT_DIVIDE(results, resarray, mp_float_t, larray, lstrides, rarray, rstrides);
+            }
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/carray/carray.h b/tulip/shared/ulab/code/numpy/carray/carray.h
new file mode 100644
index 000000000..8ca5de2dd
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/carray/carray.h
@@ -0,0 +1,237 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021-2022 Zoltán Vörös
+*/
+
+#ifndef _CARRAY_
+#define _CARRAY_
+
+MP_DECLARE_CONST_FUN_OBJ_1(carray_real_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(carray_imag_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(carray_conjugate_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(carray_sort_complex_obj);
+
+
+mp_obj_t carray_imag(mp_obj_t );
+mp_obj_t carray_real(mp_obj_t );
+
+mp_obj_t carray_abs(ndarray_obj_t *, ndarray_obj_t *);
+mp_obj_t carray_binary_add(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t carray_binary_multiply(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t carray_binary_subtract(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t carray_binary_divide(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t carray_binary_equal_not_equal(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *, mp_binary_op_t );
+
+#define BINARY_LOOP_COMPLEX1(results, resarray, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t l = 0;\
+    do {\
+        *(resarray) = *((mp_float_t *)(larray)) OPERATOR *((type_right *)(rarray));\
+        (resarray) += 2;\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define BINARY_LOOP_COMPLEX2(results, resarray, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t k = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX1((results), (resarray), type_right, (larray), (lstrides), (rarray), (rstrides), OPERATOR);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define BINARY_LOOP_COMPLEX3(results, resarray, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t j = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX2((results), (resarray), type_right, (larray), (lstrides), (rarray), (rstrides), OPERATOR);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define BINARY_LOOP_COMPLEX4(results, resarray, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t i = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX3((results), (resarray), type_right, (larray), (lstrides), (rarray), (rstrides), OPERATOR);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT1(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t l = 0;\
+    do {\
+        *(resarray)++ = *((type_left *)(larray)) - (rarray)[0];\
+        *(resarray)++ = -(rarray)[1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT2(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t k = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT1((results), (resarray), type_left, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT3(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t j = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT2((results), (resarray), type_left, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT4(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t i = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT3((results), (resarray), type_left, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE1(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t l = 0;\
+    do {\
+        mp_float_t *c = (mp_float_t *)(rarray);\
+        mp_float_t denom = c[0] * c[0] + c[1] * c[1];\
+        mp_float_t a = *((type_left *)(larray)) / denom;\
+        *(resarray)++ = a * c[0];\
+        *(resarray)++ = -a * c[1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE2(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t k = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_RIGHT_DIVIDE1((results), (resarray), type_left, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE3(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t j = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_RIGHT_DIVIDE2((results), (resarray), type_left, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE4(results, resarray, type_left, larray, lstrides, rarray, rstrides)\
+    size_t i = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_RIGHT_DIVIDE3((results), (resarray), type_left, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+
+#define BINARY_LOOP_COMPLEX_EQUAL1(results, array, type_right, larray, lstrides, rarray, rstrides)\
+    size_t l = 0;\
+    do {\
+        if((*(larray) == *((type_right *)(rarray))) && ((larray)[1] == MICROPY_FLOAT_CONST(0.0))) {\
+            *(array) ^= 0x01;\
+        }\
+        (array)++;\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+
+#define BINARY_LOOP_COMPLEX_EQUAL2(results, array, type_right, larray, lstrides, rarray, rstrides)\
+    size_t k = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_EQUAL1((results), (array), type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\
+
+#define BINARY_LOOP_COMPLEX_EQUAL3(results, array, type_right, larray, lstrides, rarray, rstrides)\
+    size_t j = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_EQUAL2((results), (array), type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\
+
+#define BINARY_LOOP_COMPLEX_EQUAL4(results, array, type_right, larray, lstrides, rarray, rstrides)\
+    size_t i = 0;\
+    do {\
+        BINARY_LOOP_COMPLEX_EQUAL3((results), (array), type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\
+
+#if ULAB_MAX_DIMS == 1
+#define BINARY_LOOP_COMPLEX BINARY_LOOP_COMPLEX1
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT1
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE BINARY_LOOP_COMPLEX_RIGHT_DIVIDE1
+#define BINARY_LOOP_COMPLEX_EQUAL BINARY_LOOP_COMPLEX_EQUAL1
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define BINARY_LOOP_COMPLEX BINARY_LOOP_COMPLEX2
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT2
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE BINARY_LOOP_COMPLEX_RIGHT_DIVIDE2
+#define BINARY_LOOP_COMPLEX_EQUAL BINARY_LOOP_COMPLEX_EQUAL2
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define BINARY_LOOP_COMPLEX BINARY_LOOP_COMPLEX3
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT3
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE BINARY_LOOP_COMPLEX_RIGHT_DIVIDE3
+#define BINARY_LOOP_COMPLEX_EQUAL BINARY_LOOP_COMPLEX_EQUAL3
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define BINARY_LOOP_COMPLEX BINARY_LOOP_COMPLEX4
+#define BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT BINARY_LOOP_COMPLEX_REVERSED_SUBTRACT4
+#define BINARY_LOOP_COMPLEX_RIGHT_DIVIDE BINARY_LOOP_COMPLEX_RIGHT_DIVIDE4
+#define BINARY_LOOP_COMPLEX_EQUAL BINARY_LOOP_COMPLEX_EQUAL4
+#endif /* ULAB_MAX_DIMS == 4 */
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/carray/carray_tools.c b/tulip/shared/ulab/code/numpy/carray/carray_tools.c
new file mode 100644
index 000000000..d8c7b183e
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/carray/carray_tools.c
@@ -0,0 +1,28 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+
+#if ULAB_SUPPORTS_COMPLEX
+
+void raise_complex_NotImplementedError(void) {
+    mp_raise_NotImplementedError(MP_ERROR_TEXT("not implemented for complex dtype"));
+}
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/carray/carray_tools.h b/tulip/shared/ulab/code/numpy/carray/carray_tools.h
new file mode 100644
index 000000000..3ac79b5f4
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/carray/carray_tools.h
@@ -0,0 +1,25 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#ifndef _CARRAY_TOOLS_
+#define _CARRAY_TOOLS_
+
+void raise_complex_NotImplementedError(void);
+
+#if ULAB_SUPPORTS_COMPLEX
+    #define NOT_IMPLEMENTED_FOR_COMPLEX() raise_complex_NotImplementedError();
+    #define COMPLEX_DTYPE_NOT_IMPLEMENTED(dtype) if((dtype) == NDARRAY_COMPLEX) raise_complex_NotImplementedError();
+#else
+    #define NOT_IMPLEMENTED_FOR_COMPLEX() // do nothing
+    #define COMPLEX_DTYPE_NOT_IMPLEMENTED(dtype) // do nothing
+#endif
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/compare.c b/tulip/shared/ulab/code/numpy/compare.c
new file mode 100644
index 000000000..b2762e411
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/compare.c
@@ -0,0 +1,677 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *               2020 Jeff Epler for Adafruit Industries
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../ndarray_operators.h"
+#include "../ulab_tools.h"
+#include "carray/carray_tools.h"
+#include "compare.h"
+
+static mp_obj_t compare_function(mp_obj_t x1, mp_obj_t x2, uint8_t op) {
+    ndarray_obj_t *lhs = ndarray_from_mp_obj(x1, 0);
+    ndarray_obj_t *rhs = ndarray_from_mp_obj(x2, 0);
+    #if ULAB_SUPPORTS_COMPLEX
+    if((lhs->dtype == NDARRAY_COMPLEX) || (rhs->dtype == NDARRAY_COMPLEX)) {
+        NOT_IMPLEMENTED_FOR_COMPLEX()
+    }
+    #endif
+    uint8_t ndim = 0;
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    int32_t *lstrides = m_new(int32_t, ULAB_MAX_DIMS);
+    int32_t *rstrides = m_new(int32_t, ULAB_MAX_DIMS);
+    if(!ndarray_can_broadcast(lhs, rhs, &ndim, shape, lstrides, rstrides)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("operands could not be broadcast together"));
+        m_del(size_t, shape, ULAB_MAX_DIMS);
+        m_del(int32_t, lstrides, ULAB_MAX_DIMS);
+        m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+    }
+
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(op == COMPARE_EQUAL) {
+        return ndarray_binary_equality(lhs, rhs, ndim, shape, lstrides, rstrides, MP_BINARY_OP_EQUAL);
+    } else if(op == COMPARE_NOT_EQUAL) {
+        return ndarray_binary_equality(lhs, rhs, ndim, shape, lstrides, rstrides, MP_BINARY_OP_NOT_EQUAL);
+    }
+    // These are the upcasting rules
+    // float always becomes float
+    // operation on identical types preserves type
+    // uint8 + int8 => int16
+    // uint8 + int16 => int16
+    // uint8 + uint16 => uint16
+    // int8 + int16 => int16
+    // int8 + uint16 => uint16
+    // uint16 + int16 => float
+    // The parameters of RUN_COMPARE_LOOP are
+    // typecode of result, type_out, type_left, type_right, lhs operand, rhs operand, operator
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            RUN_COMPARE_LOOP(NDARRAY_UINT8, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            RUN_COMPARE_LOOP(NDARRAY_INT8, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
+        }
+    }
+    return mp_const_none; // we should never reach this point
+}
+
+#if ULAB_NUMPY_HAS_EQUAL | ULAB_NUMPY_HAS_NOTEQUAL
+static mp_obj_t compare_equal_helper(mp_obj_t x1, mp_obj_t x2, uint8_t comptype) {
+    // scalar comparisons should return a single object of mp_obj_t type
+    mp_obj_t result = compare_function(x1, x2, comptype);
+    if((mp_obj_is_int(x1) || mp_obj_is_float(x1)) && (mp_obj_is_int(x2) || mp_obj_is_float(x2))) {
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t iterable = mp_getiter(result, &iter_buf);
+        mp_obj_t item = mp_iternext(iterable);
+        return item;
+    }
+    return result;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_CLIP
+//| def clip(
+//|     a: _ScalarOrArrayLike,
+//|     a_min: _ScalarOrArrayLike,
+//|     a_max: _ScalarOrArrayLike,
+//| ) -> _ScalarOrNdArray:
+//|     """
+//|     Clips (limits) the values in an array.
+//|
+//|     :param a: Scalar or array containing elements to clip.
+//|     :param a_min: Minimum value, it will be broadcast against ``a``.
+//|     :param a_max: Maximum value, it will be broadcast against ``a``.
+//|     :return:
+//|         A scalar or array with the elements of ``a``, but where
+//|         values < ``a_min`` are replaced with ``a_min``, and those
+//|         > ``a_max`` with ``a_max``.
+//|     """
+//|     ...
+mp_obj_t compare_clip(mp_obj_t x1, mp_obj_t x2, mp_obj_t x3) {
+    // Note: this function could be made faster by implementing a single-loop comparison in
+    // RUN_COMPARE_LOOP. However, that would add around 2 kB of compile size, while we
+    // would not gain a factor of two in speed, since the two comparisons should still be
+    // evaluated. In contrast, calling the function twice adds only 140 bytes to the firmware
+    if(mp_obj_is_int(x1) || mp_obj_is_float(x1)) {
+        mp_float_t v1 = mp_obj_get_float(x1);
+        mp_float_t v2 = mp_obj_get_float(x2);
+        mp_float_t v3 = mp_obj_get_float(x3);
+        if(v1 < v2) {
+            return x2;
+        } else if(v1 > v3) {
+            return x3;
+        } else {
+            return x1;
+        }
+    } else { // assume ndarrays
+        return compare_function(x2, compare_function(x1, x3, COMPARE_MINIMUM), COMPARE_MAXIMUM);
+    }
+}
+
+MP_DEFINE_CONST_FUN_OBJ_3(compare_clip_obj, compare_clip);
+#endif
+
+#if ULAB_NUMPY_HAS_EQUAL
+//| def equal(x: _ScalarOrArrayLike, y: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|     """
+//|     Returns ``x == y`` element-wise.
+//|
+//|     :param x, y:
+//|         Input scalar or array. If ``x.shape != y.shape`` they must
+//|         be broadcastable to a common shape (which becomes the
+//|         shape of the output.)
+//|     :return:
+//|         A boolean scalar or array with the element-wise result of ``x == y``.
+//|     """
+//|     ...
+mp_obj_t compare_equal(mp_obj_t x1, mp_obj_t x2) {
+    return compare_equal_helper(x1, x2, COMPARE_EQUAL);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(compare_equal_obj, compare_equal);
+#endif
+
+#if ULAB_NUMPY_HAS_NOTEQUAL
+//| def not_equal(
+//|     x: _ScalarOrArrayLike,
+//|     y: _ScalarOrArrayLike,
+//| ) -> Union[_bool, ulab.numpy.ndarray]:
+//|     """
+//|     Returns ``x != y`` element-wise.
+//|
+//|     :param x, y:
+//|         Input scalar or array. If ``x.shape != y.shape`` they must
+//|         be broadcastable to a common shape (which becomes the
+//|         shape of the output.)
+//|     :return:
+//|         A boolean scalar or array with the element-wise result of ``x != y``.
+//|     """
+//|     ...
+mp_obj_t compare_not_equal(mp_obj_t x1, mp_obj_t x2) {
+    return compare_equal_helper(x1, x2, COMPARE_NOT_EQUAL);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(compare_not_equal_obj, compare_not_equal);
+#endif
+
+#if ULAB_NUMPY_HAS_ISFINITE | ULAB_NUMPY_HAS_ISINF
+static mp_obj_t compare_isinf_isfinite(mp_obj_t _x, uint8_t mask) {
+    // mask should signify, whether the function is called from isinf (mask = 1),
+    // or from isfinite (mask = 0)
+    if(mp_obj_is_int(_x)) {
+        if(mask) {
+            return mp_const_false;
+        } else {
+            return mp_const_true;
+        }
+    } else if(mp_obj_is_float(_x)) {
+        mp_float_t x = mp_obj_get_float(_x);
+        if(isnan(x)) {
+            return mp_const_false;
+        }
+        if(mask) { // called from isinf
+            return isinf(x) ? mp_const_true : mp_const_false;
+        } else { // called from isfinite
+            return isinf(x) ? mp_const_false : mp_const_true;
+        }
+    } else if(mp_obj_is_type(_x, &ulab_ndarray_type)) {
+        ndarray_obj_t *x = MP_OBJ_TO_PTR(_x);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(x->dtype)
+        ndarray_obj_t *results = ndarray_new_dense_ndarray(x->ndim, x->shape, NDARRAY_BOOL);
+        // At this point, results is all False
+        uint8_t *rarray = (uint8_t *)results->array;
+        if(x->dtype != NDARRAY_FLOAT) {
+            // int types can never be infinite...
+            if(!mask) {
+                // ...so flip all values in the array, if the function was called from isfinite
+                memset(rarray, 1, results->len);
+            }
+            return MP_OBJ_FROM_PTR(results);
+        }
+        uint8_t *xarray = (uint8_t *)x->array;
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        mp_float_t value = *(mp_float_t *)xarray;
+                        if(isnan(value)) {
+                            *rarray++ = 0;
+                        } else {
+                            *rarray++ = isinf(value) ? mask : 1 - mask;
+                        }
+                        xarray += x->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < x->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    xarray -= x->strides[ULAB_MAX_DIMS - 1] * x->shape[ULAB_MAX_DIMS-1];
+                    xarray += x->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < x->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                xarray -= x->strides[ULAB_MAX_DIMS - 2] * x->shape[ULAB_MAX_DIMS-2];
+                xarray += x->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < x->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            xarray -= x->strides[ULAB_MAX_DIMS - 3] * x->shape[ULAB_MAX_DIMS-3];
+            xarray += x->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < x->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+
+        return MP_OBJ_FROM_PTR(results);
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong input type"));
+    }
+    return mp_const_none;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_ISFINITE
+//| def isfinite(x: _ScalarOrNdArray) -> Union[_bool, ulab.numpy.ndarray]:
+//|     """
+//|     Tests element-wise for finiteness (i.e., it should not be infinity or a NaN).
+//|
+//|     :param x: Input scalar or ndarray.
+//|     :return:
+//|         A boolean scalar or array with True where ``x`` is finite, and
+//|         False otherwise.
+//|     """
+//|     ...
+mp_obj_t compare_isfinite(mp_obj_t _x) {
+    return compare_isinf_isfinite(_x, 0);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(compare_isfinite_obj, compare_isfinite);
+#endif
+
+#if ULAB_NUMPY_HAS_ISINF
+//| def isinf(x: _ScalarOrNdArray) -> Union[_bool, ulab.numpy.ndarray]:
+//|     """
+//|     Tests element-wise for positive or negative infinity.
+//|
+//|     :param x: Input scalar or ndarray.
+//|     :return:
+//|         A boolean scalar or array with True where ``x`` is positive or
+//|         negative infinity, and False otherwise.
+//|     """
+//|     ...
+mp_obj_t compare_isinf(mp_obj_t _x) {
+    return compare_isinf_isfinite(_x, 1);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(compare_isinf_obj, compare_isinf);
+#endif
+
+#if ULAB_NUMPY_HAS_MAXIMUM
+//| def maximum(x1: _ScalarOrArrayLike, x2: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|     """
+//|     Returns the element-wise maximum.
+//|
+//|     :param x1, x2:
+//|         Input scalar or array. If ``x.shape != y.shape`` they must
+//|         be broadcastable to a common shape (which becomes the
+//|         shape of the output.)
+//|     :return:
+//|         A scalar or array with the element-wise maximum of ``x1`` and ``x2``.
+//|     """
+//|     ...
+mp_obj_t compare_maximum(mp_obj_t x1, mp_obj_t x2) {
+    // extra round, so that we can return maximum(3, 4) properly
+    mp_obj_t result = compare_function(x1, x2, COMPARE_MAXIMUM);
+    if((mp_obj_is_int(x1) || mp_obj_is_float(x1)) && (mp_obj_is_int(x2) || mp_obj_is_float(x2))) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(result);
+        return mp_binary_get_val_array(ndarray->dtype, ndarray->array, 0);
+    }
+    return result;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(compare_maximum_obj, compare_maximum);
+#endif
+
+#if ULAB_NUMPY_HAS_MINIMUM
+
+//| def minimum(x1: _ScalarOrArrayLike, x2: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|     """
+//|     Returns the element-wise minimum.
+//|
+//|     :param x1, x2:
+//|         Input scalar or array. If ``x.shape != y.shape`` they must
+//|         be broadcastable to a common shape (which becomes the
+//|         shape of the output.)
+//|     :return:
+//|         A scalar or array with the element-wise minimum of ``x1`` and ``x2``.
+//|     """
+//|     ...
+mp_obj_t compare_minimum(mp_obj_t x1, mp_obj_t x2) {
+    // extra round, so that we can return minimum(3, 4) properly
+    mp_obj_t result = compare_function(x1, x2, COMPARE_MINIMUM);
+    if((mp_obj_is_int(x1) || mp_obj_is_float(x1)) && (mp_obj_is_int(x2) || mp_obj_is_float(x2))) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(result);
+        return mp_binary_get_val_array(ndarray->dtype, ndarray->array, 0);
+    }
+    return result;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(compare_minimum_obj, compare_minimum);
+#endif
+
+#if ULAB_NUMPY_HAS_NONZERO
+
+//| def nonzero(x: _ScalarOrArrayLike) -> ulab.numpy.ndarray:
+//|     """
+//|     Returns the indices of elements that are non-zero.
+//|
+//|     :param x:
+//|         Input scalar or array. If ``x`` is a scalar, it is treated
+//|         as a single-element 1-d array.
+//|     :return:
+//|         An array of indices that are non-zero.
+//|     """
+//|     ...
+mp_obj_t compare_nonzero(mp_obj_t x) {
+    ndarray_obj_t *ndarray_x = ndarray_from_mp_obj(x, 0);
+    // since ndarray_new_linear_array calls m_new0, the content of zero is a single zero
+    ndarray_obj_t *zero = ndarray_new_linear_array(1, NDARRAY_UINT8);
+
+    uint8_t ndim = 0;
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    int32_t *x_strides = m_new(int32_t, ULAB_MAX_DIMS);
+    int32_t *zero_strides = m_new(int32_t, ULAB_MAX_DIMS);
+    // we don't actually have to inspect the outcome of ndarray_can_broadcast,
+    // because the right hand side is a linear array with a single element
+    ndarray_can_broadcast(ndarray_x, zero, &ndim, shape, x_strides, zero_strides);
+
+    // equal_obj is a Boolean ndarray
+    mp_obj_t equal_obj = ndarray_binary_equality(ndarray_x, zero, ndim, shape, x_strides, zero_strides, MP_BINARY_OP_NOT_EQUAL);
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(equal_obj);
+
+    // these are no longer needed, get rid of them
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(int32_t, x_strides, ULAB_MAX_DIMS);
+    m_del(int32_t, zero_strides, ULAB_MAX_DIMS);
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+    uint8_t *origin = (uint8_t *)ndarray->array;
+
+    // First, count the number of Trues:
+    uint16_t count = 0;
+    size_t indices[ULAB_MAX_DIMS];
+
+    #if ULAB_MAX_DIMS > 3
+    indices[3] = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        indices[2] = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            indices[1] = 0;
+            do {
+            #endif
+                indices[0] = 0;
+                do {
+                    if(*array != 0) {
+                        count++;
+                    }
+                    array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                    indices[0]++;
+                } while(indices[0] < ndarray->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                indices[1]++;
+            } while(indices[1] < ndarray->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+            array += ndarray->strides[ULAB_MAX_DIMS - 3];
+            indices[2]++;
+        } while(indices[2] < ndarray->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+        array += ndarray->strides[ULAB_MAX_DIMS - 4];
+        indices[3]++;
+    } while(indices[3] < ndarray->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    mp_obj_t *items = m_new(mp_obj_t, ndarray->ndim);
+    uint16_t *arrays[ULAB_MAX_DIMS];
+
+    for(uint8_t i = 0; i < ndarray->ndim; i++) {
+        ndarray_obj_t *item_array = ndarray_new_linear_array(count, NDARRAY_UINT16);
+        uint16_t *iarray = (uint16_t *)item_array->array;
+        arrays[ULAB_MAX_DIMS - 1 - i] = iarray;
+        items[ndarray->ndim - 1 - i] = MP_OBJ_FROM_PTR(item_array);
+    }
+    array = origin;
+    count = 0;
+
+    #if ULAB_MAX_DIMS > 3
+    indices[3] = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        indices[2] = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            indices[1] = 0;
+            do {
+            #endif
+                indices[0] = 0;
+                do {
+                    if(*array != 0) {
+                        for(uint8_t d = 0; d < ndarray->ndim; d++) {
+                            arrays[ULAB_MAX_DIMS - 1 - d][count] = indices[d];
+                        }
+                        count++;
+                    }
+                    array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                    indices[0]++;
+                } while(indices[0] < ndarray->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                indices[1]++;
+            } while(indices[1] < ndarray->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+            array += ndarray->strides[ULAB_MAX_DIMS - 3];
+            indices[2]++;
+        } while(indices[2] < ndarray->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+        array += ndarray->strides[ULAB_MAX_DIMS - 4];
+        indices[3]++;
+    } while(indices[3] < ndarray->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    return mp_obj_new_tuple(ndarray->ndim, items);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(compare_nonzero_obj, compare_nonzero);
+#endif /* ULAB_NUMPY_HAS_NONZERO */
+
+#if ULAB_NUMPY_HAS_WHERE
+
+//| def where(
+//|     condition: _ScalarOrArrayLike,
+//|     x: _ScalarOrArrayLike,
+//|     y: _ScalarOrArrayLike,
+//|     ) -> ulab.numpy.ndarray:
+//|     """
+//|     Returns elements from ``x`` or ``y`` depending on ``condition``.
+//|
+//|     :param condition:
+//|         Input scalar or array. If an element (or scalar) is truthy,
+//|         the corresponding element from ``x`` is chosen, otherwise
+//|         ``y`` is used. ``condition``, ``x`` and ``y`` must also be
+//|         broadcastable to the same shape (which becomes the output
+//|         shape.)
+//|     :param x, y:
+//|         Input scalar or array.
+//|     :return:
+//|         An array with elements from ``x`` when ``condition`` is
+//|         truthy, and ``y`` elsewhere.
+//|     """
+//|     ...
+mp_obj_t compare_where(mp_obj_t _condition, mp_obj_t _x, mp_obj_t _y) {
+    // this implementation will work with ndarrays, and scalars only
+    ndarray_obj_t *c = ndarray_from_mp_obj(_condition, 0);
+    ndarray_obj_t *x = ndarray_from_mp_obj(_x, 0);
+    ndarray_obj_t *y = ndarray_from_mp_obj(_y, 0);
+
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(c->dtype)
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(x->dtype)
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(y->dtype)
+
+    int32_t *cstrides = m_new(int32_t, ULAB_MAX_DIMS);
+    int32_t *xstrides = m_new(int32_t, ULAB_MAX_DIMS);
+    int32_t *ystrides = m_new(int32_t, ULAB_MAX_DIMS);
+
+    size_t *oshape = m_new(size_t, ULAB_MAX_DIMS);
+
+    uint8_t ndim;
+
+    // establish the broadcasting conditions first
+    // if any two of the arrays can be broadcast together, then
+    // the three arrays can also be broadcast together
+    if(!ndarray_can_broadcast(c, x, &ndim, oshape, cstrides, ystrides) ||
+        !ndarray_can_broadcast(c, y, &ndim, oshape, cstrides, ystrides) ||
+        !ndarray_can_broadcast(x, y, &ndim, oshape, xstrides, ystrides)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("operands could not be broadcast together"));
+    }
+
+    ndim = MAX(MAX(c->ndim, x->ndim), y->ndim);
+
+    for(uint8_t i = 1; i <= ndim; i++) {
+        cstrides[ULAB_MAX_DIMS - i] = c->shape[ULAB_MAX_DIMS - i] < 2 ? 0 : c->strides[ULAB_MAX_DIMS - i];
+        xstrides[ULAB_MAX_DIMS - i] = x->shape[ULAB_MAX_DIMS - i] < 2 ? 0 : x->strides[ULAB_MAX_DIMS - i];
+        ystrides[ULAB_MAX_DIMS - i] = y->shape[ULAB_MAX_DIMS - i] < 2 ? 0 : y->strides[ULAB_MAX_DIMS - i];
+        oshape[ULAB_MAX_DIMS - i] = MAX(MAX(c->shape[ULAB_MAX_DIMS - i], x->shape[ULAB_MAX_DIMS - i]), y->shape[ULAB_MAX_DIMS - i]);
+    }
+
+    uint8_t out_dtype = ndarray_upcast_dtype(x->dtype, y->dtype);
+    ndarray_obj_t *out = ndarray_new_dense_ndarray(ndim, oshape, out_dtype);
+
+    mp_float_t (*cfunc)(void *) = ndarray_get_float_function(c->dtype);
+    mp_float_t (*xfunc)(void *) = ndarray_get_float_function(x->dtype);
+    mp_float_t (*yfunc)(void *) = ndarray_get_float_function(y->dtype);
+    mp_float_t (*ofunc)(void *, mp_float_t ) = ndarray_set_float_function(out->dtype);
+
+    uint8_t *oarray = (uint8_t *)out->array;
+    uint8_t *carray = (uint8_t *)c->array;
+    uint8_t *xarray = (uint8_t *)x->array;
+    uint8_t *yarray = (uint8_t *)y->array;
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    mp_float_t value;
+                    mp_float_t cvalue = cfunc(carray);
+                    if(cvalue != MICROPY_FLOAT_CONST(0.0)) {
+                        value = xfunc(xarray);
+                    } else {
+                        value = yfunc(yarray);
+                    }
+                    ofunc(oarray, value);
+                    oarray += out->itemsize;
+                    carray += cstrides[ULAB_MAX_DIMS - 1];
+                    xarray += xstrides[ULAB_MAX_DIMS - 1];
+                    yarray += ystrides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < out->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                carray -= cstrides[ULAB_MAX_DIMS - 1] * c->shape[ULAB_MAX_DIMS-1];
+                carray += cstrides[ULAB_MAX_DIMS - 2];
+                xarray -= xstrides[ULAB_MAX_DIMS - 1] * x->shape[ULAB_MAX_DIMS-1];
+                xarray += xstrides[ULAB_MAX_DIMS - 2];
+                yarray -= ystrides[ULAB_MAX_DIMS - 1] * y->shape[ULAB_MAX_DIMS-1];
+                yarray += ystrides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < out->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            carray -= cstrides[ULAB_MAX_DIMS - 2] * c->shape[ULAB_MAX_DIMS-2];
+            carray += cstrides[ULAB_MAX_DIMS - 3];
+            xarray -= xstrides[ULAB_MAX_DIMS - 2] * x->shape[ULAB_MAX_DIMS-2];
+            xarray += xstrides[ULAB_MAX_DIMS - 3];
+            yarray -= ystrides[ULAB_MAX_DIMS - 2] * y->shape[ULAB_MAX_DIMS-2];
+            yarray += ystrides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < out->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        carray -= cstrides[ULAB_MAX_DIMS - 3] * c->shape[ULAB_MAX_DIMS-3];
+        carray += cstrides[ULAB_MAX_DIMS - 4];
+        xarray -= xstrides[ULAB_MAX_DIMS - 3] * x->shape[ULAB_MAX_DIMS-3];
+        xarray += xstrides[ULAB_MAX_DIMS - 4];
+        yarray -= ystrides[ULAB_MAX_DIMS - 3] * y->shape[ULAB_MAX_DIMS-3];
+        yarray += ystrides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < out->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+    return MP_OBJ_FROM_PTR(out);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_3(compare_where_obj, compare_where);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/compare.h b/tulip/shared/ulab/code/numpy/compare.h
new file mode 100644
index 000000000..de3d7e656
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/compare.h
@@ -0,0 +1,151 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+#ifndef _COMPARE_
+#define _COMPARE_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+enum COMPARE_FUNCTION_TYPE {
+    COMPARE_EQUAL,
+    COMPARE_NOT_EQUAL,
+    COMPARE_MINIMUM,
+    COMPARE_MAXIMUM,
+    COMPARE_CLIP,
+};
+
+MP_DECLARE_CONST_FUN_OBJ_3(compare_clip_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(compare_equal_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(compare_isfinite_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(compare_isinf_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(compare_minimum_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(compare_maximum_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(compare_nonzero_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(compare_not_equal_obj);
+MP_DECLARE_CONST_FUN_OBJ_3(compare_where_obj);
+
+#if ULAB_MAX_DIMS == 1
+#define COMPARE_LOOP(results, array, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t l = 0;\
+    do {\
+        *((type_out *)(array)) = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? (type_out)(*((type_left *)(larray))) : (type_out)(*((type_right *)(rarray)));\
+        (array) += (results)->strides[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l <  results->shape[ULAB_MAX_DIMS - 1]);\
+    return MP_OBJ_FROM_PTR(results);\
+
+#endif // ULAB_MAX_DIMS == 1
+
+#if ULAB_MAX_DIMS == 2
+#define COMPARE_LOOP(results, array, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *((type_out *)(array)) = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? (type_out)(*((type_left *)(larray))) : (type_out)(*((type_right *)(rarray)));\
+            (array) += (results)->strides[ULAB_MAX_DIMS - 1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l <  results->shape[ULAB_MAX_DIMS - 1]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k <  results->shape[ULAB_MAX_DIMS - 2]);\
+    return MP_OBJ_FROM_PTR(results);\
+
+#endif // ULAB_MAX_DIMS == 2
+
+#if ULAB_MAX_DIMS == 3
+#define COMPARE_LOOP(results, array, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *((type_out *)(array)) = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? (type_out)(*((type_left *)(larray))) : (type_out)(*((type_right *)(rarray)));\
+                (array) += (results)->strides[ULAB_MAX_DIMS - 1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l <  results->shape[ULAB_MAX_DIMS - 1]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k <  results->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j <  results->shape[ULAB_MAX_DIMS - 3]);\
+    return MP_OBJ_FROM_PTR(results);\
+
+#endif // ULAB_MAX_DIMS == 3
+
+#if ULAB_MAX_DIMS == 4
+#define COMPARE_LOOP(results, array, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *((type_out *)(array)) = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray)) ? (type_out)(*((type_left *)(larray))) : (type_out)(*((type_right *)(rarray)));\
+                    (array) += (results)->strides[ULAB_MAX_DIMS - 1];\
+                    (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+                    (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l <  results->shape[ULAB_MAX_DIMS - 1]);\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k <  results->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j <  results->shape[ULAB_MAX_DIMS - 3]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i <  results->shape[ULAB_MAX_DIMS - 4]);\
+    return MP_OBJ_FROM_PTR(results);\
+
+#endif // ULAB_MAX_DIMS == 4
+
+#define RUN_COMPARE_LOOP(dtype, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, ndim, shape, op) do {\
+    ndarray_obj_t *results = ndarray_new_dense_ndarray((ndim), (shape), (dtype));\
+    uint8_t *array = (uint8_t *)results->array;\
+    if((op) == COMPARE_MINIMUM) {\
+        COMPARE_LOOP(results, array, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, <);\
+    }\
+    if((op) == COMPARE_MAXIMUM) {\
+        COMPARE_LOOP(results, array, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, >);\
+    }\
+} while(0)
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/create.c b/tulip/shared/ulab/code/numpy/create.c
new file mode 100644
index 000000000..ad957ce7a
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/create.c
@@ -0,0 +1,1079 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2019-2024 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+
+#include "create.h"
+#include "../ulab.h"
+#include "../ulab_tools.h"
+
+#if ULAB_NUMPY_HAS_ONES | ULAB_NUMPY_HAS_ZEROS | ULAB_NUMPY_HAS_FULL | ULAB_NUMPY_HAS_EMPTY
+static mp_obj_t create_zeros_ones_full(mp_obj_t oshape, uint8_t dtype, mp_obj_t value) {
+    if(!mp_obj_is_int(oshape) && !mp_obj_is_type(oshape, &mp_type_tuple) && !mp_obj_is_type(oshape, &mp_type_list)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input argument must be an integer, a tuple, or a list"));
+    }
+    ndarray_obj_t *ndarray = NULL;
+    if(mp_obj_is_int(oshape)) {
+        size_t n = mp_obj_get_int(oshape);
+        ndarray = ndarray_new_linear_array(n, dtype);
+    } else if(mp_obj_is_type(oshape, &mp_type_tuple) || mp_obj_is_type(oshape, &mp_type_list)) {
+        uint8_t len = (uint8_t)mp_obj_get_int(mp_obj_len_maybe(oshape));
+        if(len > ULAB_MAX_DIMS) {
+            mp_raise_TypeError(MP_ERROR_TEXT("too many dimensions"));
+        }
+        size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+
+        size_t i = 0;
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t item, iterable = mp_getiter(oshape, &iter_buf);
+        while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION){
+            shape[ULAB_MAX_DIMS - len + i] = (size_t)mp_obj_get_int(item);
+            i++;
+        }
+        ndarray = ndarray_new_dense_ndarray(len, shape, dtype);
+    }
+    if(value != mp_const_none) {
+        if(dtype == NDARRAY_BOOL) {
+            dtype = NDARRAY_UINT8;
+            if(mp_obj_is_true(value)) {
+                value = mp_obj_new_int(1);
+            } else {
+                value = mp_obj_new_int(0);
+            }
+        }
+        for(size_t i=0; i < ndarray->len; i++) {
+            #if ULAB_SUPPORTS_COMPLEX
+            if(dtype == NDARRAY_COMPLEX) {
+                ndarray_set_complex_value(ndarray->array, i, value);
+            } else {
+                ndarray_set_value(dtype, ndarray->array, i, value);
+            }
+            #else
+            ndarray_set_value(dtype, ndarray->array, i, value);
+            #endif
+        }
+    }
+    // if zeros calls the function, we don't have to do anything
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+#endif
+
+#if ULAB_NUMPY_HAS_ARANGE | ULAB_NUMPY_HAS_LINSPACE
+static ndarray_obj_t *create_linspace_arange(mp_float_t start, mp_float_t step, mp_float_t stop, size_t len, uint8_t dtype) {
+    mp_float_t value = start;
+
+    ndarray_obj_t *ndarray = ndarray_new_linear_array(len, dtype);
+    if(ndarray->boolean == NDARRAY_BOOLEAN) {
+        uint8_t *array = (uint8_t *)ndarray->array;
+        for(size_t i=0; i < len; i++, value += step) {
+            *array++ = value == MICROPY_FLOAT_CONST(0.0) ? 0 : 1;
+        }
+    } else if(dtype == NDARRAY_UINT8) {
+        ARANGE_LOOP(uint8_t, ndarray, len, step, stop);
+    } else if(dtype == NDARRAY_INT8) {
+        ARANGE_LOOP(int8_t, ndarray, len, step, stop);
+    } else if(dtype == NDARRAY_UINT16) {
+        ARANGE_LOOP(uint16_t, ndarray, len, step, stop);
+    } else if(dtype == NDARRAY_INT16) {
+        ARANGE_LOOP(int16_t, ndarray, len, step, stop);
+    } else {
+        ARANGE_LOOP(mp_float_t, ndarray, len, step, stop);
+    }
+    return ndarray;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_ARANGE
+//| @overload
+//| def arange(stop: _float, step: _float = 1, *, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray: ...
+//| @overload
+//| def arange(start: _float, stop: _float, step: _float = 1, *, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: start
+//|       First value in the array, optional, defaults to 0
+//|     .. param: stop
+//|       Final value in the array
+//|     .. param: step
+//|       Difference between consecutive elements, optional, defaults to 1.0
+//|     .. param: dtype
+//|       Type of values in the array
+//|
+//|     Return a new 1-D array with elements ranging from ``start`` to ``stop``, with step size ``step``."""
+//|     ...
+//|
+
+mp_obj_t create_arange(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    uint8_t dtype = NDARRAY_FLOAT;
+    mp_float_t start, stop, step;
+    if(n_args == 1) {
+        start = MICROPY_FLOAT_CONST(0.0);
+        stop = mp_obj_get_float(args[0].u_obj);
+        step = MICROPY_FLOAT_CONST(1.0);
+        if(mp_obj_is_int(args[0].u_obj)) dtype = NDARRAY_INT16;
+    } else if(n_args == 2) {
+        start = mp_obj_get_float(args[0].u_obj);
+        stop = mp_obj_get_float(args[1].u_obj);
+        step = MICROPY_FLOAT_CONST(1.0);
+        if(mp_obj_is_int(args[0].u_obj) && mp_obj_is_int(args[1].u_obj)) dtype = NDARRAY_INT16;
+    } else if(n_args == 3) {
+        start = mp_obj_get_float(args[0].u_obj);
+        stop = mp_obj_get_float(args[1].u_obj);
+        step = mp_obj_get_float(args[2].u_obj);
+        if(mp_obj_is_int(args[0].u_obj) && mp_obj_is_int(args[1].u_obj) && mp_obj_is_int(args[2].u_obj)) dtype = NDARRAY_INT16;
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong number of arguments"));
+    }
+    if((MICROPY_FLOAT_C_FUN(fabs)(stop) > 32768) || (MICROPY_FLOAT_C_FUN(fabs)(start) > 32768) || (MICROPY_FLOAT_C_FUN(fabs)(step) > 32768)) {
+        dtype = NDARRAY_FLOAT;
+    }
+    if(args[3].u_obj != mp_const_none) {
+        dtype = (uint8_t)mp_obj_get_int(args[3].u_obj);
+    }
+
+    // bail out, if the range cannot be constructed
+    if(step == MICROPY_FLOAT_CONST(0.0)) {
+        mp_raise_msg(&mp_type_ZeroDivisionError, MP_ERROR_TEXT("divide by zero"));
+    }
+
+    if(!isfinite(start) || !isfinite(stop) || !isfinite(step)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("arange: cannot compute length"));
+    }
+
+    ndarray_obj_t *ndarray;
+    if((stop - start)/step <= 0) {
+        ndarray = ndarray_new_linear_array(0, dtype);
+    } else {
+        size_t len = (size_t)(MICROPY_FLOAT_C_FUN(ceil)((stop - start) / step));
+        stop = start + (len - 1) * step;
+        ndarray = create_linspace_arange(start, step, stop, len, dtype);
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_arange_obj, 1, create_arange);
+#endif
+
+
+#if ULAB_NUMPY_HAS_ASARRAY
+mp_obj_t create_asarray(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    uint8_t _dtype;
+    #if ULAB_HAS_DTYPE_OBJECT
+    if(mp_obj_is_type(args[1].u_obj, &ulab_dtype_type)) {
+        dtype_obj_t *dtype = MP_OBJ_TO_PTR(args[1].u_obj);
+        _dtype = dtype->dtype;
+    } else { // this must be an integer defined as a class constant (ulab.numpy.uint8 etc.)
+        if(args[1].u_obj == mp_const_none) {
+            _dtype = 0;
+        } else {
+            _dtype = mp_obj_get_int(args[1].u_obj);
+        }
+    }
+    #else
+    if(args[1].u_obj == mp_const_none) {
+        _dtype = 0;
+    } else {
+        _dtype = mp_obj_get_int(args[1].u_obj);
+    }
+    #endif
+
+    if(ulab_tools_mp_obj_is_scalar(args[0].u_obj)) {
+        return args[0].u_obj;
+    } else if(mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+        if((_dtype == ndarray->dtype) || (_dtype == 0)) {
+            return args[0].u_obj;
+        } else {
+            return MP_OBJ_FROM_PTR(ndarray_copy_view_convert_type(ndarray, _dtype));
+        }
+    } else if(ndarray_object_is_array_like(args[0].u_obj)) {
+        if(_dtype == 0) {
+            _dtype = NDARRAY_FLOAT;
+        }
+        return MP_OBJ_FROM_PTR(ndarray_from_iterable(args[0].u_obj, _dtype));
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong input type"));
+    }
+    return mp_const_none; // this should never happen
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_asarray_obj, 1, create_asarray);
+#endif
+
+#if ULAB_NUMPY_HAS_CONCATENATE
+//| def concatenate(arrays: Tuple[ulab.numpy.ndarray], *, axis: int = 0) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: arrays
+//|       tuple of ndarrays
+//|     .. param: axis
+//|       axis along which the arrays will be joined
+//|
+//|     Join a sequence of arrays along an existing axis."""
+//|     ...
+//|
+
+mp_obj_t create_concatenate(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 0 } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &mp_type_tuple)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a tuple of ndarrays"));
+    }
+    int8_t axis = (int8_t)args[1].u_int;
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    mp_obj_tuple_t *ndarrays = MP_OBJ_TO_PTR(args[0].u_obj);
+
+    for(uint8_t i = 0; i < ndarrays->len; i++) {
+        if(!mp_obj_is_type(ndarrays->items[i], &ulab_ndarray_type)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("only ndarrays can be concatenated"));
+        }
+    }
+
+    // first check, whether the arrays are compatible
+    ndarray_obj_t *_ndarray = MP_OBJ_TO_PTR(ndarrays->items[0]);
+    uint8_t dtype = _ndarray->dtype;
+    uint8_t ndim = _ndarray->ndim;
+    if(axis < 0) {
+        axis += ndim;
+    }
+    if((axis < 0) || (axis >= ndim)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("wrong axis specified"));
+    }
+    // shift axis
+    axis = ULAB_MAX_DIMS - ndim + axis;
+    for(uint8_t j=0; j < ULAB_MAX_DIMS; j++) {
+        shape[j] = _ndarray->shape[j];
+    }
+
+    for(uint8_t i=1; i < ndarrays->len; i++) {
+        _ndarray = MP_OBJ_TO_PTR(ndarrays->items[i]);
+        // check, whether the arrays are compatible
+        if((dtype != _ndarray->dtype) || (ndim != _ndarray->ndim)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("input arrays are not compatible"));
+        }
+        for(uint8_t j=0; j < ULAB_MAX_DIMS; j++) {
+            if(j == axis) {
+                shape[j] += _ndarray->shape[j];
+            } else {
+                if(shape[j] != _ndarray->shape[j]) {
+                    mp_raise_ValueError(MP_ERROR_TEXT("input arrays are not compatible"));
+                }
+            }
+        }
+    }
+
+    ndarray_obj_t *target = ndarray_new_dense_ndarray(ndim, shape, dtype);
+    uint8_t *tpos = (uint8_t *)target->array;
+    uint8_t *tarray;
+
+    for(uint8_t p=0; p < ndarrays->len; p++) {
+        // reset the pointer along the axis
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(ndarrays->items[p]);
+        uint8_t *sarray = (uint8_t *)source->array;
+        tarray = tpos;
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        memcpy(tarray, sarray, source->itemsize);
+                        tarray += target->strides[ULAB_MAX_DIMS - 1];
+                        sarray += source->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    tarray -= target->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                    tarray += target->strides[ULAB_MAX_DIMS - 2];
+                    sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                    sarray += source->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                tarray -= target->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                tarray += target->strides[ULAB_MAX_DIMS - 3];
+                sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                sarray += source->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            tarray -= target->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+            tarray += target->strides[ULAB_MAX_DIMS - 4];
+            sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+            sarray += source->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+        if(p < ndarrays->len - 1) {
+            tpos += target->strides[axis] * source->shape[axis];
+        }
+    }
+    return MP_OBJ_FROM_PTR(target);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_concatenate_obj, 1, create_concatenate);
+#endif
+
+#if ULAB_MAX_DIMS > 1
+#if ULAB_NUMPY_HAS_DIAG
+//| def diag(a: ulab.numpy.ndarray, *, k: int = 0) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: a
+//|       an ndarray
+//|     .. param: k
+//|       Offset of the diagonal from the main diagonal. Can be positive or negative.
+//|
+//|     Return specified diagonals."""
+//|     ...
+//|
+
+mp_obj_t create_diag(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_k, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 0 } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    ndarray_obj_t *source = ndarray_from_iterable(args[0].u_obj, NDARRAY_FLOAT);
+    ndarray_obj_t *target = NULL;
+
+    int32_t k = args[1].u_int;
+    size_t k_abs = k >= 0 ? (size_t)k : (size_t)(-k);
+    if(source->ndim == 2) { // return the diagonal
+        size_t len;
+        if(k >= 0) {
+           len = (k_abs <= source->shape[ULAB_MAX_DIMS - 1]) ? source->shape[ULAB_MAX_DIMS - 1] - k_abs : 0;
+        } else {
+           len = (k_abs <= source->shape[ULAB_MAX_DIMS - 2]) ? source->shape[ULAB_MAX_DIMS - 2] - k_abs : 0;
+        }
+        target = ndarray_new_linear_array(len, source->dtype);
+
+        if(len == 0) {
+            return MP_OBJ_FROM_PTR(target);
+        }
+
+        uint8_t *sarray = (uint8_t *)source->array;
+        uint8_t *tarray = (uint8_t *)target->array;
+        if(k >= 0) {
+            sarray += source->strides[ULAB_MAX_DIMS - 1] * k;
+        } else {
+            sarray += source->strides[ULAB_MAX_DIMS - 2] * k_abs;
+        }
+        for(size_t i=0; i < len; i++) {
+            memcpy(tarray, sarray, source->itemsize);
+            sarray += (source->strides[ULAB_MAX_DIMS - 1] + source->strides[ULAB_MAX_DIMS - 2]);
+            tarray += target->itemsize;
+        }
+    } else if(source->ndim == 1) { // return a rank-2 tensor with the prescribed diagonal
+        size_t len = source->len + k_abs;
+        target = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, len, len), source->dtype);
+        uint8_t *sarray = (uint8_t *)source->array;
+        uint8_t *tarray = (uint8_t *)target->array;
+
+        if(k < 0) {
+            tarray += len * k_abs * target->itemsize;
+        } else {
+            tarray += k_abs * target->itemsize;
+        }
+        for(size_t i = 0; i < source->len; i++) {
+            memcpy(tarray, sarray, source->itemsize);
+            sarray += source->strides[ULAB_MAX_DIMS - 1];
+            tarray += (len + 1) * target->itemsize;
+        }
+    }
+    #if ULAB_MAX_DIMS > 2
+    else {
+        mp_raise_ValueError(MP_ERROR_TEXT("input must be 1- or 2-d"));
+    }
+    #endif
+
+    return MP_OBJ_FROM_PTR(target);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_diag_obj, 1, create_diag);
+#endif /* ULAB_NUMPY_HAS_DIAG */
+
+#if ULAB_NUMPY_HAS_EMPTY
+// This function is bound in numpy.c to numpy.zeros(), and is simply an alias for that
+
+//| def empty(shape: Union[int, Tuple[int, ...]], *, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray:
+//|    """
+//|    .. param: shape
+//|       Shape of the array, either an integer (for a 1-D array) or a tuple of 2 integers (for a 2-D array)
+//|    .. param: dtype
+//|       Type of values in the array
+//|
+//|    Return a new array of the given shape with all elements set to 0. An alias for numpy.zeros."""
+//|    ...
+//|
+#endif
+
+#if ULAB_NUMPY_HAS_EYE
+//| def eye(size: int, *, M: Optional[int] = None, k: int = 0, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray:
+//|     """Return a new square array of size, with the diagonal elements set to 1
+//|        and the other elements set to 0. If k is given, the diagonal is shifted by the specified amount."""
+//|     ...
+//|
+
+mp_obj_t create_eye(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_INT, { .u_int = 0 } },
+        { MP_QSTR_M, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_k, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 0 } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    size_t n = args[0].u_int, m;
+    size_t k = args[2].u_int > 0 ? (size_t)args[2].u_int : (size_t)(-args[2].u_int);
+    uint8_t dtype = args[3].u_int;
+    if(args[1].u_obj == mp_const_none) {
+        m = n;
+    } else {
+        m = mp_obj_get_int(args[1].u_obj);
+    }
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, n, m), dtype);
+    if(dtype == NDARRAY_BOOL) {
+       dtype = NDARRAY_UINT8;
+   }
+    mp_obj_t one = mp_obj_new_int(1);
+    size_t i = 0;
+    if((args[2].u_int >= 0)) {
+        while(k < m) {
+            ndarray_set_value(dtype, ndarray->array, i*m+k, one);
+            k++;
+            i++;
+        }
+    } else {
+        while(k < n) {
+            ndarray_set_value(dtype, ndarray->array, k*m+i, one);
+            k++;
+            i++;
+        }
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_eye_obj, 1, create_eye);
+#endif /* ULAB_NUMPY_HAS_EYE */
+#endif /* ULAB_MAX_DIMS > 1 */
+
+#if ULAB_NUMPY_HAS_FULL
+//| def full(shape: Union[int, Tuple[int, ...]], fill_value: Union[_float, _bool], *, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray:
+//|    """
+//|    .. param: shape
+//|       Shape of the array, either an integer (for a 1-D array) or a tuple of integers (for tensors of higher rank)
+//|    .. param: fill_value
+//|       scalar, the value with which the array is filled
+//|    .. param: dtype
+//|       Type of values in the array
+//|
+//|    Return a new array of the given shape with all elements set to 0."""
+//|    ...
+//|
+
+mp_obj_t create_full(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_obj = MP_OBJ_NULL } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_obj = MP_OBJ_NULL } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    uint8_t dtype = args[2].u_int;
+
+    return create_zeros_ones_full(args[0].u_obj, dtype, args[1].u_obj);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_full_obj, 0, create_full);
+#endif
+
+
+#if ULAB_NUMPY_HAS_LINSPACE
+//| def linspace(
+//|     start: _float,
+//|     stop: _float,
+//|     *,
+//|     dtype: _DType = ulab.numpy.float,
+//|     num: int = 50,
+//|     endpoint: _bool = True,
+//|     retstep: _bool = False
+//| ) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: start
+//|       First value in the array
+//|     .. param: stop
+//|       Final value in the array
+//|     .. param int: num
+//|       Count of values in the array.
+//|     .. param: dtype
+//|       Type of values in the array
+//|     .. param bool: endpoint
+//|       Whether the ``stop`` value is included.  Note that even when
+//|       endpoint=True, the exact ``stop`` value may not be included due to the
+//|       inaccuracy of floating point arithmetic.
+//|      .. param bool: retstep,
+//|       If True, return (`samples`, `step`), where `step` is the spacing between samples.
+//|
+//|     Return a new 1-D array with ``num`` elements ranging from ``start`` to ``stop`` linearly."""
+//|     ...
+//|
+
+mp_obj_t create_linspace(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_num, MP_ARG_INT, { .u_int = 50 } },
+        { MP_QSTR_endpoint, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_TRUE } },
+        { MP_QSTR_retstep, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_FALSE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(args[2].u_int < 2) {
+        mp_raise_ValueError(MP_ERROR_TEXT("number of points must be at least 2"));
+    }
+    size_t len = (size_t)args[2].u_int;
+    mp_float_t start, step, stop;
+
+    ndarray_obj_t *ndarray = NULL;
+
+    #if ULAB_SUPPORTS_COMPLEX
+    mp_float_t step_real, step_imag;
+    bool complex_out = false;
+
+    if(mp_obj_is_type(args[0].u_obj, &mp_type_complex) || mp_obj_is_type(args[1].u_obj, &mp_type_complex)) {
+        complex_out = true;
+        ndarray = ndarray_new_linear_array(len, NDARRAY_COMPLEX);
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        mp_float_t start_real, start_imag;
+        mp_float_t stop_real, stop_imag;
+
+        mp_obj_get_complex(args[0].u_obj, &start_real, &start_imag);
+        mp_obj_get_complex(args[1].u_obj, &stop_real, &stop_imag);
+        if(args[3].u_obj == mp_const_true) {
+            step_real = (stop_real - start_real) / (len - 1);
+            step_imag = (stop_imag - start_imag) / (len - 1);
+        } else {
+            step_real = (stop_real - start_real) / len;
+            step_imag = (stop_imag - start_imag) / len;
+        }
+
+        for(size_t i = 0; i < len; i++) {
+            *array++ = start_real;
+            *array++ = start_imag;
+            start_real += step_real;
+            start_imag += step_imag;
+        }
+    } else {
+    #endif
+        start = mp_obj_get_float(args[0].u_obj);
+        stop = mp_obj_get_float(args[1].u_obj);
+
+        uint8_t typecode = args[5].u_int;
+
+        if(args[3].u_obj == mp_const_true) {
+            step = (stop - start) / (len - 1);
+        } else {
+            step = (stop - start) / len;
+            stop = start + step * (len - 1);
+        }
+
+        ndarray = create_linspace_arange(start, step, stop, len, typecode);
+    #if ULAB_SUPPORTS_COMPLEX
+    }
+    #endif
+
+    if(args[4].u_obj == mp_const_false) {
+        return MP_OBJ_FROM_PTR(ndarray);
+    } else {
+        mp_obj_t tuple[2];
+        tuple[0] = MP_OBJ_FROM_PTR(ndarray);
+        #if ULAB_SUPPORTS_COMPLEX
+        if(complex_out) {
+            tuple[1] = mp_obj_new_complex(step_real, step_imag);
+        } else {
+            tuple[1] = mp_obj_new_float(step);
+        }
+        #else /* ULAB_SUPPORTS_COMPLEX */
+        tuple[1] = mp_obj_new_float(step);
+        #endif
+
+        return mp_obj_new_tuple(2, tuple);
+    }
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_linspace_obj, 2, create_linspace);
+#endif
+
+#if ULAB_NUMPY_HAS_LOGSPACE
+//| def logspace(
+//|     start: _float,
+//|     stop: _float,
+//|     *,
+//|     dtype: _DType = ulab.numpy.float,
+//|     num: int = 50,
+//|     endpoint: _bool = True,
+//|     base: _float = 10.0
+//| ) -> ulab.numpy.ndarray:
+//|     """
+//|     .. param: start
+//|       First value in the array
+//|     .. param: stop
+//|       Final value in the array
+//|     .. param int: num
+//|       Count of values in the array. Defaults to 50.
+//|     .. param: base
+//|       The base of the log space. The step size between the elements in
+//|       ``ln(samples) / ln(base)`` (or ``log_base(samples)``) is uniform. Defaults to 10.0.
+//|     .. param: dtype
+//|       Type of values in the array
+//|     .. param bool: endpoint
+//|       Whether the ``stop`` value is included.  Note that even when
+//|       endpoint=True, the exact ``stop`` value may not be included due to the
+//|       inaccuracy of floating point arithmetic. Defaults to True.
+//|
+//|     Return a new 1-D array with ``num`` evenly spaced elements on a log scale.
+//|     The sequence starts at ``base ** start``, and ends with ``base ** stop``."""
+//|     ...
+//|
+
+ULAB_DEFINE_FLOAT_CONST(const_ten, MICROPY_FLOAT_CONST(10.0), 0x41200000UL, 0x4024000000000000ULL);
+
+mp_obj_t create_logspace(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_num, MP_ARG_INT, { .u_int = 50 } },
+        { MP_QSTR_base, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(const_ten) } },
+        { MP_QSTR_endpoint, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_TRUE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(args[2].u_int < 2) {
+        mp_raise_ValueError(MP_ERROR_TEXT("number of points must be at least 2"));
+    }
+    size_t len = (size_t)args[2].u_int;
+    mp_float_t start, step, quotient;
+    start = mp_obj_get_float(args[0].u_obj);
+    uint8_t dtype = args[5].u_int;
+    mp_float_t base = mp_obj_get_float(args[3].u_obj);
+    if(args[4].u_obj == mp_const_true) step = (mp_obj_get_float(args[1].u_obj) - start)/(len - 1);
+    else step = (mp_obj_get_float(args[1].u_obj) - start) / len;
+    quotient = MICROPY_FLOAT_C_FUN(pow)(base, step);
+    ndarray_obj_t *ndarray = ndarray_new_linear_array(len, dtype);
+
+    mp_float_t value = MICROPY_FLOAT_C_FUN(pow)(base, start);
+    if(ndarray->dtype == NDARRAY_UINT8) {
+        uint8_t *array = (uint8_t *)ndarray->array;
+        if(ndarray->boolean) {
+            memset(array, 1, len);
+        } else {
+            for(size_t i=0; i < len; i++, value *= quotient) *array++ = (uint8_t)value;
+        }
+    } else if(ndarray->dtype == NDARRAY_INT8) {
+        int8_t *array = (int8_t *)ndarray->array;
+        for(size_t i=0; i < len; i++, value *= quotient) *array++ = (int8_t)value;
+    } else if(ndarray->dtype == NDARRAY_UINT16) {
+        uint16_t *array = (uint16_t *)ndarray->array;
+        for(size_t i=0; i < len; i++, value *= quotient) *array++ = (uint16_t)value;
+    } else if(ndarray->dtype == NDARRAY_INT16) {
+        int16_t *array = (int16_t *)ndarray->array;
+        for(size_t i=0; i < len; i++, value *= quotient) *array++ = (int16_t)value;
+    } else {
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        for(size_t i=0; i < len; i++, value *= quotient) *array++ = value;
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_logspace_obj, 2, create_logspace);
+#endif
+
+#if ULAB_NUMPY_HAS_ONES
+//| def ones(shape: Union[int, Tuple[int, ...]], *, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray:
+//|    """
+//|    .. param: shape
+//|       Shape of the array, either an integer (for a 1-D array) or a tuple of 2 integers (for a 2-D array)
+//|    .. param: dtype
+//|       Type of values in the array
+//|
+//|    Return a new array of the given shape with all elements set to 1."""
+//|    ...
+//|
+
+mp_obj_t create_ones(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_obj = MP_OBJ_NULL } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    uint8_t dtype = args[1].u_int;
+    mp_obj_t one = mp_obj_new_int(1);
+    return create_zeros_ones_full(args[0].u_obj, dtype, one);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_ones_obj, 0, create_ones);
+#endif
+
+#if ULAB_NUMPY_HAS_TAKE
+//| def take(
+//|    a: ulab.numpy.ndarray,
+//|    indices: _ArrayLike,
+//|    axis: Optional[int] = None,
+//|    out: Optional[ulab.numpy.ndarray] = None,
+//|    mode: Optional[str] = None) -> ulab.numpy.ndarray:
+//|    """
+//|    .. param: a
+//|       The source array.
+//|    .. param: indices
+//|       The indices of the values to extract.
+//|    .. param: axis
+//|       The axis over which to select values. By default, the flattened input array is used.
+//|    .. param: out
+//|       If provided, the result will be placed in this array. It should be of the appropriate shape and dtype.
+//|    .. param: mode
+//|       Specifies how out-of-bounds indices will behave.
+//|       - `raise`: raise an error (default)
+//|       - `wrap`: wrap around
+//|       - `clip`: clip to the range
+//|       `clip` mode means that all indices that are too large are replaced by the 
+//|       index that addresses the last element along that axis. Note that this disables 
+//|       indexing with negative numbers.
+//|    
+//|    Return a new array."""
+//|    ...
+//|
+
+enum CREATE_TAKE_MODE {
+    CREATE_TAKE_RAISE,
+    CREATE_TAKE_WRAP,
+    CREATE_TAKE_CLIP,
+};
+
+mp_obj_t create_take(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_obj = MP_OBJ_NULL } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_obj = MP_OBJ_NULL } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_mode, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input is not an array"));
+    }
+
+    ndarray_obj_t *a = MP_OBJ_TO_PTR(args[0].u_obj);
+    int8_t axis = 0;
+    int8_t axis_index = 0;
+    int32_t axis_len;
+    uint8_t mode = CREATE_TAKE_RAISE;
+    uint8_t ndim;
+
+    // axis keyword argument
+    if(args[2].u_obj == mp_const_none) {
+        // work with the flattened array
+        axis_len = a->len;
+        ndim = 1;
+    } else { // i.e., axis is an integer
+        // TODO: this pops up at quite a few places, write it as a function
+        axis = mp_obj_get_int(args[2].u_obj);
+        ndim = a->ndim;
+        if(axis < 0) axis += a->ndim;
+        if((axis < 0) || (axis > a->ndim - 1)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("index out of range"));
+        }
+        axis_index = ULAB_MAX_DIMS - a->ndim + axis;
+        axis_len = (int32_t)a->shape[axis_index];
+    }
+
+    size_t _len;
+    // mode keyword argument
+    if(mp_obj_is_str(args[4].u_obj)) {
+        const char *_mode = mp_obj_str_get_data(args[4].u_obj, &_len);
+        if(memcmp(_mode, "raise", 5) == 0) {
+            mode = CREATE_TAKE_RAISE;
+        } else if(memcmp(_mode, "wrap", 4) == 0) {
+            mode = CREATE_TAKE_WRAP;
+        } else if(memcmp(_mode, "clip", 4) == 0) {
+            mode = CREATE_TAKE_CLIP;
+        } else {
+            mp_raise_ValueError(MP_ERROR_TEXT("mode should be raise, wrap or clip"));
+        }
+    }
+
+    size_t indices_len = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[1].u_obj));
+
+    size_t *indices = m_new(size_t, indices_len);
+
+    mp_obj_iter_buf_t buf;
+    mp_obj_t item, iterable = mp_getiter(args[1].u_obj, &buf);
+
+    size_t z = 0;
+    while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+        int32_t index = mp_obj_get_int(item);
+        if(mode == CREATE_TAKE_RAISE) {
+            if(index < 0) {
+                index += axis_len;
+            }
+            if((index < 0) || (index > axis_len - 1)) {
+                m_del(size_t, indices, indices_len);
+                mp_raise_ValueError(MP_ERROR_TEXT("index out of range"));
+            }
+        } else if(mode == CREATE_TAKE_WRAP) {
+            index %= axis_len;
+        } else { // mode == CREATE_TAKE_CLIP
+            if(index < 0) {
+                m_del(size_t, indices, indices_len);
+                mp_raise_ValueError(MP_ERROR_TEXT("index must not be negative"));
+            }
+            if(index > axis_len - 1) {
+                index = axis_len - 1;
+            }
+        }
+        indices[z++] = (size_t)index;
+    }
+
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    if(args[2].u_obj == mp_const_none) { // flattened array
+        shape[ULAB_MAX_DIMS - 1] = indices_len;
+    } else {
+        for(uint8_t i = 0; i < ULAB_MAX_DIMS; i++) {
+            shape[i] = a->shape[i];
+            if(i == axis_index) {
+                shape[i] = indices_len;
+            }
+        }
+    }
+
+    ndarray_obj_t *out = NULL;
+    if(args[3].u_obj == mp_const_none) {
+        // no output was supplied
+        out = ndarray_new_dense_ndarray(ndim, shape, a->dtype);
+    } else {
+        // TODO: deal with last argument being false!
+        out = ulab_tools_inspect_out(args[3].u_obj, a->dtype, ndim, shape, true);
+    }
+
+    #if ULAB_MAX_DIMS > 1 // we can save the hassle, if there is only one possible dimension
+    if((args[2].u_obj == mp_const_none) || (a->ndim == 1)) { // flattened array
+    #endif
+        uint8_t *out_array = (uint8_t *)out->array;
+        for(size_t x = 0; x < indices_len; x++) {
+            uint8_t *a_array = (uint8_t *)a->array;
+            size_t remainder = indices[x];
+            uint8_t q = ULAB_MAX_DIMS - 1;
+            do {
+                size_t div = (remainder / a->shape[q]);
+                a_array += remainder * a->strides[q];
+                remainder -= div * a->shape[q];
+                q--;
+            } while(q > ULAB_MAX_DIMS - a->ndim);
+            // NOTE: for floats and complexes, this might be 
+            // better with memcpy(out_array, a_array, a->itemsize)
+            for(uint8_t p = 0; p < a->itemsize; p++) {
+                out_array[p] = a_array[p];
+            }
+            out_array += a->itemsize;
+        }
+    #if ULAB_MAX_DIMS > 1
+    } else {     
+        // move the axis shape/stride to the leftmost position:
+        SWAP(size_t, a->shape[0], a->shape[axis_index]);
+        SWAP(size_t, out->shape[0], out->shape[axis_index]);
+        SWAP(int32_t, a->strides[0], a->strides[axis_index]);
+        SWAP(int32_t, out->strides[0], out->strides[axis_index]);
+
+        for(size_t x = 0; x < indices_len; x++) {
+            uint8_t *a_array = (uint8_t *)a->array;
+            uint8_t *out_array = (uint8_t *)out->array;
+            a_array += indices[x] * a->strides[0];
+            out_array += x * out->strides[0];
+
+            #if ULAB_MAX_DIMS > 3
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 2
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        // NOTE: for floats and complexes, this might be 
+                        // better with memcpy(out_array, a_array, a->itemsize)
+                        for(uint8_t p = 0; p < a->itemsize; p++) {
+                            out_array[p] = a_array[p];
+                        }
+                        out_array += out->strides[ULAB_MAX_DIMS - 1];
+                        a_array += a->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < a->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 2
+                    out_array -= out->strides[ULAB_MAX_DIMS - 1] * out->shape[ULAB_MAX_DIMS - 1];
+                    out_array += out->strides[ULAB_MAX_DIMS - 2];
+                    a_array -= a->strides[ULAB_MAX_DIMS - 1] * a->shape[ULAB_MAX_DIMS - 1];
+                    a_array += a->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < a->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 3
+                out_array -= out->strides[ULAB_MAX_DIMS - 2] * out->shape[ULAB_MAX_DIMS - 2];
+                out_array += out->strides[ULAB_MAX_DIMS - 3];
+                a_array -= a->strides[ULAB_MAX_DIMS - 2] * a->shape[ULAB_MAX_DIMS - 2];
+                a_array += a->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < a->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        }
+
+        // revert back to the original order
+        SWAP(size_t, a->shape[0], a->shape[axis_index]);
+        SWAP(size_t, out->shape[0], out->shape[axis_index]);
+        SWAP(int32_t, a->strides[0], a->strides[axis_index]);
+        SWAP(int32_t, out->strides[0], out->strides[axis_index]);
+    }
+    #endif /* ULAB_MAX_DIMS > 1 */
+    m_del(size_t, indices, indices_len);
+    return MP_OBJ_FROM_PTR(out);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_take_obj, 2, create_take);
+#endif /* ULAB_NUMPY_HAS_TAKE */
+
+#if ULAB_NUMPY_HAS_ZEROS
+//| def zeros(shape: Union[int, Tuple[int, ...]], *, dtype: _DType = ulab.numpy.float) -> ulab.numpy.ndarray:
+//|    """
+//|    .. param: shape
+//|       Shape of the array, either an integer (for a 1-D array) or a tuple of 2 integers (for a 2-D array)
+//|    .. param: dtype
+//|       Type of values in the array
+//|
+//|    Return a new array of the given shape with all elements set to 0."""
+//|    ...
+//|
+
+mp_obj_t create_zeros(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_obj = MP_OBJ_NULL } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    uint8_t dtype = args[1].u_int;
+    return create_zeros_ones_full(args[0].u_obj, dtype, mp_const_none);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_zeros_obj, 0, create_zeros);
+#endif
+
+#if ULAB_NUMPY_HAS_FROMBUFFER
+mp_obj_t create_frombuffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_INT(NDARRAY_FLOAT) } },
+        { MP_QSTR_count, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_INT(-1) } },
+        { MP_QSTR_offset, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_INT(0) } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    uint8_t dtype = mp_obj_get_int(args[1].u_obj);
+    size_t offset = mp_obj_get_int(args[3].u_obj);
+
+    mp_buffer_info_t bufinfo;
+    if(mp_get_buffer(args[0].u_obj, &bufinfo, MP_BUFFER_READ)) {
+        size_t sz = ulab_binary_get_size(dtype);
+
+        if(bufinfo.len < offset) {
+            mp_raise_ValueError(MP_ERROR_TEXT("offset must be non-negative and no greater than buffer length"));
+        }
+        size_t len = (bufinfo.len - offset) / sz;
+        if((len * sz) != (bufinfo.len - offset)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("buffer size must be a multiple of element size"));
+        }
+        if(mp_obj_get_int(args[2].u_obj) > 0) {
+            size_t count = mp_obj_get_int(args[2].u_obj);
+            if(len < count) {
+                mp_raise_ValueError(MP_ERROR_TEXT("buffer is smaller than requested size"));
+            } else {
+                len = count;
+            }
+        }
+
+        size_t *shape = ndarray_shape_vector(0, 0, 0, len);
+        uint8_t *buffer = bufinfo.buf;
+        return ndarray_new_ndarray(1, shape, NULL, dtype, buffer + offset);
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(create_frombuffer_obj, 1, create_frombuffer);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/create.h b/tulip/shared/ulab/code/numpy/create.h
new file mode 100644
index 000000000..ffa7a4406
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/create.h
@@ -0,0 +1,89 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2019-2021 Zoltán Vörös
+*/
+
+#ifndef _CREATE_
+#define _CREATE_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+#if ULAB_NUMPY_HAS_ARANGE
+mp_obj_t create_arange(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_arange_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_ASARRAY
+mp_obj_t create_arange(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_asarray_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_CONCATENATE
+mp_obj_t create_concatenate(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_concatenate_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_DIAG
+mp_obj_t create_diag(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_diag_obj);
+#endif
+
+#if ULAB_MAX_DIMS > 1
+#if ULAB_NUMPY_HAS_EYE
+mp_obj_t create_eye(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_eye_obj);
+#endif
+#endif
+
+#if ULAB_NUMPY_HAS_FULL
+mp_obj_t create_full(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_full_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_LINSPACE
+mp_obj_t create_linspace(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_linspace_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_LOGSPACE
+mp_obj_t create_logspace(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_logspace_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_ONES
+mp_obj_t create_ones(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_ones_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_TAKE
+mp_obj_t create_take(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_take_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_ZEROS
+mp_obj_t create_zeros(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_zeros_obj);
+#endif
+
+#if ULAB_NUMPY_HAS_FROMBUFFER
+mp_obj_t create_frombuffer(size_t , const mp_obj_t *, mp_map_t *);
+MP_DECLARE_CONST_FUN_OBJ_KW(create_frombuffer_obj);
+#endif
+
+#define ARANGE_LOOP(type_, ndarray, len, step, stop) \
+({\
+    type_ *array = (type_ *)(ndarray)->array;\
+    for (size_t i = 0; i < (len) - 1; i++, (value) += (step)) {\
+        *array++ = (type_)(value);\
+    }\
+    *array = (type_)(stop);\
+})
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/fft/fft.c b/tulip/shared/ulab/code/numpy/fft/fft.c
new file mode 100644
index 000000000..d4cab9e5b
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/fft/fft.c
@@ -0,0 +1,105 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2024 Zoltán Vörös
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/runtime.h"
+#include "py/builtin.h"
+#include "py/binary.h"
+#include "py/obj.h"
+#include "py/objarray.h"
+
+#include "../carray/carray_tools.h"
+#include "fft.h"
+
+//| """Frequency-domain functions"""
+//|
+//| import ulab.numpy
+//| import ulab.utils
+
+
+//| def fft(r: ulab.numpy.ndarray, c: Optional[ulab.numpy.ndarray] = None) -> Tuple[ulab.numpy.ndarray, ulab.numpy.ndarray]:
+//|     """
+//|     :param ulab.numpy.ndarray r: A 1-dimension array of values whose size is a power of 2
+//|     :param ulab.numpy.ndarray c: An optional 1-dimension array of values whose size is a power of 2, giving the complex part of the value
+//|     :return tuple (r, c): The real and complex parts of the FFT
+//|
+//|     Perform a Fast Fourier Transform from the time domain into the frequency domain
+//|
+//|     See also `ulab.utils.spectrogram`, which computes the magnitude of the fft,
+//|     rather than separately returning its real and imaginary parts."""
+//|     ...
+//|
+#if ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE
+static mp_obj_t fft_fft(mp_obj_t arg) {
+    return fft_fft_ifft(arg, FFT_FFT);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(fft_fft_obj, fft_fft);
+#else
+static mp_obj_t fft_fft(size_t n_args, const mp_obj_t *args) {
+    if(n_args == 2) {
+        return fft_fft_ifft(n_args, args[0], args[1], FFT_FFT);
+    } else {
+        return fft_fft_ifft(n_args, args[0], mp_const_none, FFT_FFT);
+    }
+}
+
+MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(fft_fft_obj, 1, 2, fft_fft);
+#endif
+
+//| def ifft(r: ulab.numpy.ndarray, c: Optional[ulab.numpy.ndarray] = None) -> Tuple[ulab.numpy.ndarray, ulab.numpy.ndarray]:
+//|     """
+//|     :param ulab.numpy.ndarray r: A 1-dimension array of values whose size is a power of 2
+//|     :param ulab.numpy.ndarray c: An optional 1-dimension array of values whose size is a power of 2, giving the complex part of the value
+//|     :return tuple (r, c): The real and complex parts of the inverse FFT
+//|
+//|     Perform an Inverse Fast Fourier Transform from the frequeny domain into the time domain"""
+//|     ...
+//|
+
+#if ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE
+static mp_obj_t fft_ifft(mp_obj_t arg) {
+    return fft_fft_ifft(arg, FFT_IFFT);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(fft_ifft_obj, fft_ifft);
+#else
+static mp_obj_t fft_ifft(size_t n_args, const mp_obj_t *args) {
+    NOT_IMPLEMENTED_FOR_COMPLEX()
+    if(n_args == 2) {
+        return fft_fft_ifft(n_args, args[0], args[1], FFT_IFFT);
+    } else {
+        return fft_fft_ifft(n_args, args[0], mp_const_none, FFT_IFFT);
+    }
+}
+
+MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(fft_ifft_obj, 1, 2, fft_ifft);
+#endif
+
+static const mp_rom_map_elem_t ulab_fft_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_fft) },
+    { MP_ROM_QSTR(MP_QSTR_fft), MP_ROM_PTR(&fft_fft_obj) },
+    { MP_ROM_QSTR(MP_QSTR_ifft), MP_ROM_PTR(&fft_ifft_obj) },
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_fft_globals, ulab_fft_globals_table);
+
+const mp_obj_module_t ulab_fft_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_fft_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_numpy_dot_fft, ulab_fft_module);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/fft/fft.h b/tulip/shared/ulab/code/numpy/fft/fft.h
new file mode 100644
index 000000000..1e50a8da5
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/fft/fft.h
@@ -0,0 +1,30 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _FFT_
+#define _FFT_
+
+#include "../../ulab.h"
+#include "../../ulab_tools.h"
+#include "../../ndarray.h"
+#include "fft_tools.h"
+
+extern const mp_obj_module_t ulab_fft_module;
+
+#if ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE
+MP_DECLARE_CONST_FUN_OBJ_3(fft_fft_obj);
+MP_DECLARE_CONST_FUN_OBJ_3(fft_ifft_obj);
+#else
+MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(fft_fft_obj);
+MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(fft_ifft_obj);
+#endif
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/fft/fft_tools.c b/tulip/shared/ulab/code/numpy/fft/fft_tools.c
new file mode 100644
index 000000000..bc98b3d3b
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/fft/fft_tools.c
@@ -0,0 +1,266 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2024 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <string.h>
+#include "py/runtime.h"
+
+#include "../../ndarray.h"
+#include "../../ulab_tools.h"
+#include "../carray/carray_tools.h"
+#include "fft_tools.h"
+
+#ifndef MP_PI
+#define MP_PI MICROPY_FLOAT_CONST(3.14159265358979323846)
+#endif
+#ifndef MP_E
+#define MP_E MICROPY_FLOAT_CONST(2.71828182845904523536)
+#endif
+
+/* Kernel implementation for the case, when ulab has no complex support
+
+ * The following function takes two arrays, namely, the real and imaginary
+ * parts of a complex array, and calculates the Fourier transform in place.
+ *
+ * The function is basically a modification of four1 from Numerical Recipes,
+ * has no dependencies beyond micropython itself (for the definition of mp_float_t),
+ * and can be used independent of ulab.
+ */
+
+#if ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE
+/* Kernel implementation for the complex case. Data are contained in data as
+
+    data[0], data[1], data[2], data[3], .... , data[2n - 2], data[2n-1]
+    real[0], imag[0], real[1], imag[1], .... , real[n-1],    imag[n-1]
+
+    In general
+    real[i] = data[2i]
+    imag[i] = data[2i+1]
+
+*/
+void fft_kernel(mp_float_t *data, size_t n, int isign) {
+    size_t j, m, mmax, istep;
+    mp_float_t tempr, tempi;
+    mp_float_t wtemp, wr, wpr, wpi, wi, theta;
+
+    j = 0;
+    for(size_t i = 0; i < n; i++) {
+        if (j > i) {
+            SWAP(mp_float_t, data[2*i], data[2*j]);
+            SWAP(mp_float_t, data[2*i+1], data[2*j+1]);
+        }
+        m = n >> 1;
+        while (j >= m && m > 0) {
+            j -= m;
+            m >>= 1;
+        }
+        j += m;
+    }
+
+    mmax = 1;
+    while (n > mmax) {
+        istep = mmax << 1;
+        theta = MICROPY_FLOAT_CONST(-2.0)*isign*MP_PI/istep;
+        wtemp = MICROPY_FLOAT_C_FUN(sin)(MICROPY_FLOAT_CONST(0.5) * theta);
+        wpr = MICROPY_FLOAT_CONST(-2.0) * wtemp * wtemp;
+        wpi = MICROPY_FLOAT_C_FUN(sin)(theta);
+        wr = MICROPY_FLOAT_CONST(1.0);
+        wi = MICROPY_FLOAT_CONST(0.0);
+        for(m = 0; m < mmax; m++) {
+            for(size_t i = m; i < n; i += istep) {
+                j = i + mmax;
+                tempr = wr * data[2*j] - wi * data[2*j+1];
+                tempi = wr * data[2*j+1] + wi * data[2*j];
+                data[2*j] = data[2*i] - tempr;
+                data[2*j+1] = data[2*i+1] - tempi;
+                data[2*i] += tempr;
+                data[2*i+1] += tempi;
+            }
+            wtemp = wr;
+            wr = wr*wpr - wi*wpi + wr;
+            wi = wi*wpr + wtemp*wpi + wi;
+        }
+        mmax = istep;
+    }
+}
+
+/*
+ * The following function is a helper interface to the python side.
+ * It has been factored out from fft.c, so that the same argument parsing
+ * routine can be called from utils.spectrogram.
+ */
+mp_obj_t fft_fft_ifft(mp_obj_t data_in, uint8_t type) {
+    if(!mp_obj_is_type(data_in, &ulab_ndarray_type)) {
+        mp_raise_NotImplementedError(MP_ERROR_TEXT("FFT is defined for ndarrays only"));
+    }
+    ndarray_obj_t *in = MP_OBJ_TO_PTR(data_in);
+    #if ULAB_MAX_DIMS > 1
+    if(in->ndim != 1) {
+        mp_raise_TypeError(MP_ERROR_TEXT("FFT is implemented for linear arrays only"));
+    }
+    #endif
+    size_t len = in->len;
+    // Check if input is of length of power of 2
+    if((len & (len-1)) != 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("input array length must be power of 2"));
+    }
+
+    ndarray_obj_t *out = ndarray_new_linear_array(len, NDARRAY_COMPLEX);
+    mp_float_t *data = (mp_float_t *)out->array;
+    uint8_t *array = (uint8_t *)in->array;
+
+    if(in->dtype == NDARRAY_COMPLEX) {
+        uint8_t sz = 2 * sizeof(mp_float_t);
+        for(size_t i = 0; i < len; i++) {
+            memcpy(data, array, sz);
+            data += 2;
+            array += in->strides[ULAB_MAX_DIMS - 1];
+        }
+    } else {
+        mp_float_t (*func)(void *) = ndarray_get_float_function(in->dtype);
+        for(size_t i = 0; i < len; i++) {
+            // real part; the imaginary part is 0, no need to assign
+            *data = func(array);
+            data += 2;
+            array += in->strides[ULAB_MAX_DIMS - 1];
+        }
+    }
+    data -= 2 * len;
+
+    if(type == FFT_FFT) {
+        fft_kernel(data, len, 1);
+    } else { // inverse transform
+        fft_kernel(data, len, -1);
+        // TODO: numpy accepts the norm keyword argument
+        for(size_t i = 0; i < 2 * len; i++) {
+            *data++ /= len;
+        }
+    }
+    return MP_OBJ_FROM_PTR(out);
+}
+#else /* ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE */
+void fft_kernel(mp_float_t *real, mp_float_t *imag, size_t n, int isign) {
+    size_t j, m, mmax, istep;
+    mp_float_t tempr, tempi;
+    mp_float_t wtemp, wr, wpr, wpi, wi, theta;
+
+    j = 0;
+    for(size_t i = 0; i < n; i++) {
+        if (j > i) {
+            SWAP(mp_float_t, real[i], real[j]);
+            SWAP(mp_float_t, imag[i], imag[j]);
+        }
+        m = n >> 1;
+        while (j >= m && m > 0) {
+            j -= m;
+            m >>= 1;
+        }
+        j += m;
+    }
+
+    mmax = 1;
+    while (n > mmax) {
+        istep = mmax << 1;
+        theta = MICROPY_FLOAT_CONST(-2.0)*isign*MP_PI/istep;
+        wtemp = MICROPY_FLOAT_C_FUN(sin)(MICROPY_FLOAT_CONST(0.5) * theta);
+        wpr = MICROPY_FLOAT_CONST(-2.0) * wtemp * wtemp;
+        wpi = MICROPY_FLOAT_C_FUN(sin)(theta);
+        wr = MICROPY_FLOAT_CONST(1.0);
+        wi = MICROPY_FLOAT_CONST(0.0);
+        for(m = 0; m < mmax; m++) {
+            for(size_t i = m; i < n; i += istep) {
+                j = i + mmax;
+                tempr = wr * real[j] - wi * imag[j];
+                tempi = wr * imag[j] + wi * real[j];
+                real[j] = real[i] - tempr;
+                imag[j] = imag[i] - tempi;
+                real[i] += tempr;
+                imag[i] += tempi;
+            }
+            wtemp = wr;
+            wr = wr*wpr - wi*wpi + wr;
+            wi = wi*wpr + wtemp*wpi + wi;
+        }
+        mmax = istep;
+    }
+}
+
+mp_obj_t fft_fft_ifft(size_t n_args, mp_obj_t arg_re, mp_obj_t arg_im, uint8_t type) {
+    if(!mp_obj_is_type(arg_re, &ulab_ndarray_type)) {
+        mp_raise_NotImplementedError(MP_ERROR_TEXT("FFT is defined for ndarrays only"));
+    }
+    if(n_args == 2) {
+        if(!mp_obj_is_type(arg_im, &ulab_ndarray_type)) {
+            mp_raise_NotImplementedError(MP_ERROR_TEXT("FFT is defined for ndarrays only"));
+        }
+    }
+    ndarray_obj_t *re = MP_OBJ_TO_PTR(arg_re);
+    #if ULAB_MAX_DIMS > 1
+    if(re->ndim != 1) {
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(re->dtype)
+        mp_raise_TypeError(MP_ERROR_TEXT("FFT is implemented for linear arrays only"));
+    }
+    #endif
+    size_t len = re->len;
+    // Check if input is of length of power of 2
+    if((len & (len-1)) != 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("input array length must be power of 2"));
+    }
+
+    ndarray_obj_t *out_re = ndarray_new_linear_array(len, NDARRAY_FLOAT);
+    mp_float_t *data_re = (mp_float_t *)out_re->array;
+
+    uint8_t *array = (uint8_t *)re->array;
+    mp_float_t (*func)(void *) = ndarray_get_float_function(re->dtype);
+
+    for(size_t i=0; i < len; i++) {
+        *data_re++ = func(array);
+        array += re->strides[ULAB_MAX_DIMS - 1];
+    }
+    data_re -= len;
+    ndarray_obj_t *out_im = ndarray_new_linear_array(len, NDARRAY_FLOAT);
+    mp_float_t *data_im = (mp_float_t *)out_im->array;
+
+    if(n_args == 2) {
+        ndarray_obj_t *im = MP_OBJ_TO_PTR(arg_im);
+        #if ULAB_MAX_DIMS > 1
+        if(im->ndim != 1) {
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(im->dtype)
+            mp_raise_TypeError(MP_ERROR_TEXT("FFT is implemented for linear arrays only"));
+        }
+        #endif
+        if (re->len != im->len) {
+            mp_raise_ValueError(MP_ERROR_TEXT("real and imaginary parts must be of equal length"));
+        }
+        array = (uint8_t *)im->array;
+        func = ndarray_get_float_function(im->dtype);
+        for(size_t i=0; i < len; i++) {
+           *data_im++ = func(array);
+           array += im->strides[ULAB_MAX_DIMS - 1];
+        }
+        data_im -= len;
+    }
+
+    if(type == FFT_FFT) {
+        fft_kernel(data_re, data_im, len, 1);
+    } else { // inverse transform
+        fft_kernel(data_re, data_im, len, -1);
+        // TODO: numpy accepts the norm keyword argument
+        for(size_t i=0; i < len; i++) {
+            *data_re++ /= len;
+            *data_im++ /= len;
+        }
+    }
+    mp_obj_t tuple[2];
+    tuple[0] = MP_OBJ_FROM_PTR(out_re);
+    tuple[1] = MP_OBJ_FROM_PTR(out_im);
+    return mp_obj_new_tuple(2, tuple);
+}
+#endif  /* ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE */
diff --git a/tulip/shared/ulab/code/numpy/fft/fft_tools.h b/tulip/shared/ulab/code/numpy/fft/fft_tools.h
new file mode 100644
index 000000000..aa5982011
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/fft/fft_tools.h
@@ -0,0 +1,27 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _FFT_TOOLS_
+#define _FFT_TOOLS_
+
+enum FFT_TYPE {
+    FFT_FFT,
+    FFT_IFFT,
+};
+
+#if ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE
+void fft_kernel(mp_float_t *, size_t , int );
+mp_obj_t fft_fft_ifft(mp_obj_t , uint8_t );
+#else
+void fft_kernel(mp_float_t *, mp_float_t *, size_t , int );
+mp_obj_t fft_fft_ifft(size_t , mp_obj_t , mp_obj_t , uint8_t );
+#endif /* ULAB_SUPPORTS_COMPLEX & ULAB_FFT_IS_NUMPY_COMPATIBLE */
+
+#endif /* _FFT_TOOLS_ */
diff --git a/tulip/shared/ulab/code/numpy/filter.c b/tulip/shared/ulab/code/numpy/filter.c
new file mode 100644
index 000000000..79c1740a5
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/filter.c
@@ -0,0 +1,132 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../scipy/signal/signal.h"
+#include "carray/carray_tools.h"
+#include "filter.h"
+
+#if ULAB_NUMPY_HAS_CONVOLVE
+
+mp_obj_t filter_convolve(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_a, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_v, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type) || !mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must be ndarrays"));
+    }
+
+    ndarray_obj_t *a = MP_OBJ_TO_PTR(args[0].u_obj);
+    ndarray_obj_t *c = MP_OBJ_TO_PTR(args[1].u_obj);
+    // deal with linear arrays only
+    #if ULAB_MAX_DIMS > 1
+    if((a->ndim != 1) || (c->ndim != 1)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must be linear arrays"));
+    }
+    #endif
+    size_t len_a = a->len;
+    size_t len_c = c->len;
+    if(len_a == 0 || len_c == 0) {
+        mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must not be empty"));
+    }
+
+    int len = len_a + len_c - 1; // convolve mode "full"
+    int32_t off = len_c - 1;
+    uint8_t dtype = NDARRAY_FLOAT;
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if((a->dtype == NDARRAY_COMPLEX) || (c->dtype == NDARRAY_COMPLEX)) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #endif
+    ndarray_obj_t *ndarray = ndarray_new_linear_array(len, dtype);
+    mp_float_t *array = (mp_float_t *)ndarray->array;
+
+    uint8_t *aarray = (uint8_t *)a->array;
+    uint8_t *carray = (uint8_t *)c->array;
+
+    int32_t as = a->strides[ULAB_MAX_DIMS - 1] / a->itemsize;
+    int32_t cs = c->strides[ULAB_MAX_DIMS - 1] / c->itemsize;
+
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if(dtype == NDARRAY_COMPLEX) {
+        mp_float_t a_real, a_imag;
+        mp_float_t c_real, c_imag = MICROPY_FLOAT_CONST(0.0);
+        for(int32_t k = -off; k < len-off; k++) {
+            mp_float_t accum_real = MICROPY_FLOAT_CONST(0.0);
+            mp_float_t accum_imag = MICROPY_FLOAT_CONST(0.0);
+
+            int32_t top_n = MIN(len_c, len_a - k);
+            int32_t bot_n = MAX(-k, 0);
+
+            for(int32_t n = bot_n; n < top_n; n++) {
+                int32_t idx_c = (len_c - n - 1) * cs;
+                int32_t idx_a = (n + k) * as;
+                if(a->dtype != NDARRAY_COMPLEX) {
+                    a_real = ndarray_get_float_index(aarray, a->dtype, idx_a);
+                    a_imag = MICROPY_FLOAT_CONST(0.0);
+                } else {
+                    a_real = ndarray_get_float_index(aarray, NDARRAY_FLOAT, 2 * idx_a);
+                    a_imag = ndarray_get_float_index(aarray, NDARRAY_FLOAT, 2 * idx_a + 1);
+                }
+
+                if(c->dtype != NDARRAY_COMPLEX) {
+                    c_real = ndarray_get_float_index(carray, c->dtype, idx_c);
+                    c_imag = MICROPY_FLOAT_CONST(0.0);
+                } else {
+                    c_real = ndarray_get_float_index(carray, NDARRAY_FLOAT, 2 * idx_c);
+                    c_imag = ndarray_get_float_index(carray, NDARRAY_FLOAT, 2 * idx_c + 1);
+                }
+                accum_real += a_real * c_real - a_imag * c_imag;
+                accum_imag += a_real * c_imag + a_imag * c_real;
+            }
+            *array++ = accum_real;
+            *array++ = accum_imag;
+        }
+        return MP_OBJ_FROM_PTR(ndarray);
+    }
+    #endif
+
+    for(int32_t k = -off; k < len-off; k++) {
+        mp_float_t accum = MICROPY_FLOAT_CONST(0.0);
+        int32_t top_n = MIN(len_c, len_a - k);
+        int32_t bot_n = MAX(-k, 0);
+        for(int32_t n = bot_n; n < top_n; n++) {
+            int32_t idx_c = (len_c - n - 1) * cs;
+            int32_t idx_a = (n + k) * as;
+            mp_float_t ai = ndarray_get_float_index(aarray, a->dtype, idx_a);
+            mp_float_t ci = ndarray_get_float_index(carray, c->dtype, idx_c);
+            accum += ai * ci;
+        }
+        *array++ = accum;
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(filter_convolve_obj, 2, filter_convolve);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/filter.h b/tulip/shared/ulab/code/numpy/filter.h
new file mode 100644
index 000000000..d6d0f1723
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/filter.h
@@ -0,0 +1,20 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+*/
+
+#ifndef _FILTER_
+#define _FILTER_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+MP_DECLARE_CONST_FUN_OBJ_KW(filter_convolve_obj);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/io/io.c b/tulip/shared/ulab/code/numpy/io/io.c
new file mode 100644
index 000000000..95e62adc0
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/io/io.c
@@ -0,0 +1,821 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <string.h>
+
+#include "py/builtin.h"
+#include "py/formatfloat.h"
+#include "py/obj.h"
+#include "py/parsenum.h"
+#include "py/runtime.h"
+#include "py/stream.h"
+#include "extmod/vfs.h"
+
+#include "../../ndarray.h"
+#include "../../ulab_tools.h"
+#include "io.h"
+
+#define ULAB_IO_BUFFER_SIZE         128
+#define ULAB_IO_CLIPBOARD_SIZE      32
+#define ULAB_IO_MAX_ROWS            65535
+
+#define ULAB_IO_NULL_ENDIAN         0
+#define ULAB_IO_LITTLE_ENDIAN       1
+#define ULAB_IO_BIG_ENDIAN          2
+
+#if ULAB_NUMPY_HAS_LOAD
+static void io_read_(mp_obj_t stream, const mp_stream_p_t *stream_p, char *buffer, const char *string, uint16_t len, int *error) {
+    size_t read = stream_p->read(stream, buffer, len, error);
+    bool fail = false;
+    if(read == len) {
+        if(string != NULL) {
+            if(memcmp(buffer, string, len) != 0) {
+                fail = true;
+            }
+        }
+    } else {
+        fail = true;
+    }
+    if(fail) {
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, error);
+        mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("corrupted file"));
+    }
+}
+
+static mp_obj_t io_load(mp_obj_t file) {
+    if(!mp_obj_is_str(file)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong input type"));
+    }
+
+    int error;
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+
+    // test for endianness
+    uint16_t x = 1;
+    int8_t native_endianness = (x >> 8) == 1 ? ULAB_IO_BIG_ENDIAN : ULAB_IO_LITTLE_ENDIAN;
+
+    mp_obj_t open_args[2] = {
+        file,
+        MP_OBJ_NEW_QSTR(MP_QSTR_rb)
+    };
+
+    mp_obj_t stream = mp_builtin_open_obj.fun.kw(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
+
+    // read header
+    // magic string
+    io_read_(stream, stream_p, buffer, "\x93NUMPY", 6, &error);
+    // simply discard the version number
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
+    // header length, represented as a little endian uint16 (0x76, 0x00)
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
+
+    uint16_t header_length = buffer[1];
+    header_length <<= 8;
+    header_length += buffer[0];
+
+    // beginning of the dictionary describing the array
+    io_read_(stream, stream_p, buffer, "{'descr': '", 11, &error);
+    uint8_t dtype;
+
+    io_read_(stream, stream_p, buffer, NULL, 1, &error);
+    uint8_t endianness = ULAB_IO_NULL_ENDIAN;
+    if(*buffer == '<') {
+        endianness = ULAB_IO_LITTLE_ENDIAN;
+    } else if(*buffer == '>') {
+        endianness = ULAB_IO_BIG_ENDIAN;
+    }
+
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
+    if(memcmp(buffer, "u1", 2) == 0) {
+        dtype = NDARRAY_UINT8;
+    } else if(memcmp(buffer, "i1", 2) == 0) {
+        dtype = NDARRAY_INT8;
+    } else if(memcmp(buffer, "u2", 2) == 0) {
+        dtype = NDARRAY_UINT16;
+    } else if(memcmp(buffer, "i2", 2) == 0) {
+        dtype = NDARRAY_INT16;
+    }
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+    else if(memcmp(buffer, "f4", 2) == 0) {
+        dtype = NDARRAY_FLOAT;
+    }
+    #else
+    else if(memcmp(buffer, "f8", 2) == 0) {
+        dtype = NDARRAY_FLOAT;
+    }
+    #endif
+    #if ULAB_SUPPORTS_COMPLEX
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+    else if(memcmp(buffer, "c8", 2) == 0) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #else
+    else if(memcmp(buffer, "c16", 3) == 0) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #endif
+    #endif /* ULAB_SUPPORT_COPMLEX */
+    else {
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong dtype"));
+    }
+
+    io_read_(stream, stream_p, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
+
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+
+    uint16_t bytes_to_read = MIN(ULAB_IO_BUFFER_SIZE, header_length - 51);
+    // bytes_to_read is 128 at most. This should be enough to contain a
+    // maximum of 4 size_t numbers plus the delimiters
+    io_read_(stream, stream_p, buffer, NULL, bytes_to_read, &error);
+    char *needle = buffer;
+    uint8_t ndim = 0;
+
+    // find out the number of dimensions by counting the commas in the string
+    while(1) {
+        if(*needle == ',') {
+            ndim++;
+            if(needle[1] == ')') {
+                break;
+            }
+        } else if((*needle == ')') && (ndim > 0)) {
+            ndim++;
+            break;
+        }
+        needle++;
+    }
+
+    needle = buffer;
+    for(uint8_t i = 0; i < ndim; i++) {
+        size_t number = 0;
+        // trivial number parsing here
+        while(1) {
+            if((*needle == ' ') || (*needle == '\t')) {
+                needle++;
+            }
+            if((*needle > 47) && (*needle < 58)) {
+                number = number * 10 + (*needle - 48);
+            } else if((*needle == ',') || (*needle == ')')) {
+                break;
+            }
+            else {
+                stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+                mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("corrupted file"));
+            }
+            needle++;
+        }
+        needle++;
+        shape[ULAB_MAX_DIMS - ndim + i] = number;
+    }
+
+    // strip the rest of the header
+    if((bytes_to_read + 51) < header_length) {
+        io_read_(stream, stream_p, buffer, NULL, header_length - (bytes_to_read + 51), &error);
+    }
+
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);
+    char *array = (char *)ndarray->array;
+
+    size_t read = stream_p->read(stream, array, ndarray->len * ndarray->itemsize, &error);
+    if(read != ndarray->len * ndarray->itemsize) {
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+        mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("corrupted file"));
+    }
+
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+
+    // swap the bytes, if necessary
+    if((native_endianness != endianness) && (dtype != NDARRAY_UINT8) && (dtype != NDARRAY_INT8)) {
+        uint8_t sz = ndarray->itemsize;
+        char *tmpbuff = NULL;
+
+        #if ULAB_SUPPORTS_COMPLEX
+        if(dtype == NDARRAY_COMPLEX) {
+            // work with the floating point real and imaginary parts
+            sz /= 2;
+            tmpbuff = m_new(char, sz);
+            for(size_t i = 0; i < ndarray->len; i++) {
+                for(uint8_t k = 0; k < 2; k++) {
+                    tmpbuff += sz;
+                    for(uint8_t j = 0; j < sz; j++) {
+                        memcpy(--tmpbuff, array++, 1);
+                    }
+                    memcpy(array-sz, tmpbuff, sz);
+                }
+            }
+        } else {
+        #endif
+            tmpbuff = m_new(char, sz);
+            for(size_t i = 0; i < ndarray->len; i++) {
+                tmpbuff += sz;
+                for(uint8_t j = 0; j < sz; j++) {
+                    memcpy(--tmpbuff, array++, 1);
+                }
+                memcpy(array-sz, tmpbuff, sz);
+            }
+        #if ULAB_SUPPORTS_COMPLEX
+        }
+        #endif
+        m_del(char, tmpbuff, sz);
+    }
+
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(io_load_obj, io_load);
+#endif /* ULAB_NUMPY_HAS_LOAD */
+
+#if ULAB_NUMPY_HAS_LOADTXT
+static void io_assign_value(const char *clipboard, uint8_t len, ndarray_obj_t *ndarray, size_t *idx, uint8_t dtype) {
+    #if MICROPY_PY_BUILTINS_COMPLEX
+    mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
+    #else
+    mp_obj_t value = mp_parse_num_float(clipboard, len, false, NULL);
+    #endif
+    if(dtype != NDARRAY_FLOAT) {
+        mp_float_t _value = mp_obj_get_float(value);
+        value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value));
+    }
+    ndarray_set_value(dtype, ndarray->array, (*idx)++, value);
+}
+
+static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_delimiter, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_comments, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_max_rows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = -1 } },
+        { MP_QSTR_usecols, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
+        { MP_QSTR_skiprows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 0 } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t open_args[2] = {
+        args[0].u_obj,
+        MP_OBJ_NEW_QSTR(MP_QSTR_r)
+    };
+
+    mp_obj_t stream = mp_builtin_open_obj.fun.kw(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
+
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+    int error;
+
+    char delimiter = ' ';
+    if(args[1].u_obj != mp_const_none) {
+        size_t _len;
+        char *_delimiter = m_new(char, 8);
+        _delimiter = (char *)mp_obj_str_get_data(args[1].u_obj, &_len);
+        delimiter = _delimiter[0];
+    }
+
+    char comment_char = '#';
+    if(args[2].u_obj != mp_const_none) {
+        size_t _len;
+        char *_comment_char = m_new(char, 8);
+        _comment_char = (char *)mp_obj_str_get_data(args[2].u_obj, &_len);
+        comment_char = _comment_char[0];
+    }
+
+    uint16_t skiprows = args[6].u_int;
+    uint16_t max_rows = ULAB_IO_MAX_ROWS;
+    if((args[3].u_int > 0) && (args[3].u_int < ULAB_IO_MAX_ROWS)) {
+        max_rows = args[3].u_int + skiprows;
+    }
+
+    uint16_t *cols = NULL;
+    uint8_t used_columns = 0;
+    if(args[4].u_obj != mp_const_none) {
+        if(mp_obj_is_int(args[4].u_obj)) {
+            used_columns = 1;
+            cols = m_new(uint16_t, used_columns);
+            cols[0] = (uint16_t)mp_obj_get_int(args[4].u_obj);
+        } else {
+            #if ULAB_MAX_DIMS == 1
+            mp_raise_ValueError(MP_ERROR_TEXT("usecols keyword must be specified"));
+            #else
+            // assume that the argument is an iterable
+            used_columns = (uint16_t)mp_obj_get_int(mp_obj_len(args[4].u_obj));
+            cols = m_new(uint16_t, used_columns);
+            mp_obj_iter_buf_t iter_buf;
+            mp_obj_t item, iterable = mp_getiter(args[4].u_obj, &iter_buf);
+            while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+                *cols++ = (uint16_t)mp_obj_get_int(item);
+            }
+            cols -= used_columns;
+            #endif
+        }
+    }
+
+    uint8_t dtype = args[5].u_int;
+
+    // count the columns and rows
+    // we actually count only the rows and the items, and assume that
+    // the number of columns can be gotten by means of a simple division,
+    // i.e., that each row has the same number of columns
+    char *offset;
+    uint16_t rows = 0, items = 0, all_rows = 0;
+    uint8_t read;
+    uint8_t len = 0;
+
+    do {
+        read = (uint8_t)stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error);
+        buffer[read] = '\0';
+        offset = buffer;
+        while(*offset != '\0') {
+            if(*offset == comment_char) {
+                // clear the line till the end, or the buffer's end
+                while((*offset != '\0')) {
+                    offset++;
+                    if(*offset == '\n') {
+                        offset++;
+                        all_rows++;
+                        break;
+                    }
+                }
+            }
+
+            // catch whitespaces here: if these are not on a comment line, then they delimit a number
+            if(*offset == '\n') {
+                all_rows++;
+                if(all_rows > skiprows) {
+                    rows++;
+                    items++;
+                    len = 0;
+                }
+                if(all_rows == max_rows) {
+                    break;
+                }
+            }
+
+            if((*offset == ' ') || (*offset == '\t') || (*offset == '\v') ||
+                (*offset == '\f') || (*offset == '\r') || (*offset == delimiter)) {
+                offset++;
+                while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r')) {
+                    offset++;
+                }
+                if(len > 0) {
+                    if(all_rows >= skiprows) {
+                        items++;
+                    }
+                    len = 0;
+                }
+            } else {
+                offset++;
+                len++;
+            }
+        }
+    } while((read > 0) && (all_rows < max_rows));
+
+    if(rows == 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("empty file"));
+    }
+    uint16_t columns = items / rows;
+
+    if(columns < used_columns) {
+        mp_raise_ValueError(MP_ERROR_TEXT("usecols is too high"));
+    }
+
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+
+    #if ULAB_MAX_DIMS == 1
+    shape[0] = rows;
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, dtype);
+    #else
+    if(args[4].u_obj == mp_const_none) {
+        shape[ULAB_MAX_DIMS - 1] = columns;
+    } else {
+        shape[ULAB_MAX_DIMS - 1] = used_columns;
+    }
+    shape[ULAB_MAX_DIMS - 2] = rows;
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, dtype);
+    #endif
+
+    struct mp_stream_seek_t seek_s;
+    seek_s.offset = 0;
+    seek_s.whence = MP_SEEK_SET;
+    stream_p->ioctl(stream, MP_STREAM_SEEK, (mp_uint_t)(uintptr_t)&seek_s, &error);
+
+    char *clipboard = m_new(char, ULAB_IO_CLIPBOARD_SIZE);
+    char *clipboard_origin = clipboard;
+
+    rows = 0;
+    columns = 0;
+    len = 0;
+
+    size_t idx = 0;
+    do {
+        read = stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error);
+        buffer[read] = '\0';
+        offset = buffer;
+
+        while(*offset != '\0') {
+            if(*offset == comment_char) {
+                // clear the line till the end, or the buffer's end
+                while((*offset != '\0')) {
+                    offset++;
+                    if(*offset == '\n') {
+                        rows++;
+                        offset++;
+                        break;
+                    }
+                }
+            }
+
+            if(rows == max_rows) {
+                break;
+            }
+
+            if((*offset == ' ') || (*offset == '\t') || (*offset == '\v') ||
+                (*offset == '\f') || (*offset == '\r') || (*offset == '\n') || (*offset == delimiter)) {
+                offset++;
+                while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') ||
+                    (*offset == '\f') || (*offset == '\r') || (*offset == '\n')) {
+                    offset++;
+                }
+                if(len > 0) {
+                    clipboard = clipboard_origin;
+                    if(rows >= skiprows) {
+                        #if ULAB_MAX_DIMS == 1
+                        if(columns == cols[0]) {
+                            io_assign_value(clipboard, len, ndarray, &idx, dtype);
+                        }
+                        #else
+                        if(args[4].u_obj == mp_const_none) {
+                            io_assign_value(clipboard, len, ndarray, &idx, dtype);
+                        } else {
+                            for(uint8_t c = 0; c < used_columns; c++) {
+                                if(columns == cols[c]) {
+                                    io_assign_value(clipboard, len, ndarray, &idx, dtype);
+                                    break;
+                                }
+                            }
+                        }
+                        #endif
+                    }
+                    columns++;
+                    len = 0;
+
+                    if(offset[-1] == '\n') {
+                        columns = 0;
+                        rows++;
+                    }
+                }
+            } else {
+                *clipboard++ = *offset++;
+                len++;
+            }
+        }
+    } while((read > 0) && (rows < max_rows));
+
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+    m_del(char, clipboard, ULAB_IO_CLIPBOARD_SIZE);
+    m_del(uint16_t, cols, used_columns);
+
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(io_loadtxt_obj, 1, io_loadtxt);
+#endif /* ULAB_NUMPY_HAS_LOADTXT */
+
+
+#if ULAB_NUMPY_HAS_SAVE
+static uint8_t io_sprintf(char *buffer, const char *comma, size_t x) {
+    uint8_t offset = 1;
+    char *buf = buffer;
+    // our own minimal implementation of sprintf for size_t types
+    // this is required on systems, where sprintf is not available
+
+    // find out, how many characters are required
+    // we could call log10 here...
+    for(size_t i = 10; i < 100000000; i *= 10) {
+        if(x < i) {
+            break;
+        }
+        buf++;
+    }
+
+    while(x > 0) {
+        uint8_t rem = x % 10;
+        *buf-- = '0' + rem;
+        x /= 10;
+        offset++;
+    }
+
+    buf += offset;
+    while(*comma != '\0') {
+        *buf++ = *comma++;
+        offset++;
+    }
+    return offset - 1;
+}
+
+static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
+    if(!mp_obj_is_str(file) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong input type"));
+    }
+
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(ndarray_);
+    int error;
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+    uint8_t offset = 0;
+
+    // test for endianness
+    uint16_t x = 1;
+    int8_t native_endianness = (x >> 8) == 1 ? '>' : '<';
+
+    mp_obj_t open_args[2] = {
+        file,
+        MP_OBJ_NEW_QSTR(MP_QSTR_wb)
+    };
+
+    mp_obj_t stream = mp_builtin_open_obj.fun.kw(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
+
+    // write header;
+    // magic string + header length, which is always 128 - 10 = 118, represented as a little endian uint16 (0x76, 0x00)
+    // + beginning of the dictionary describing the array
+    memcpy(buffer, "\x93NUMPY\x01\x00\x76\x00{'descr': '", 21);
+    offset += 21;
+
+    buffer[offset] = native_endianness;
+    if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+        // for single-byte data, the endianness doesn't matter
+        buffer[offset] = '|';
+    }
+    offset++;
+    switch(ndarray->dtype) {
+        case NDARRAY_UINT8:
+            memcpy(buffer+offset, "u1", 2);
+            break;
+        case NDARRAY_INT8:
+            memcpy(buffer+offset, "i1", 2);
+            break;
+        case NDARRAY_UINT16:
+            memcpy(buffer+offset, "u2", 2);
+            break;
+        case NDARRAY_INT16:
+            memcpy(buffer+offset, "i2", 2);
+            break;
+        case NDARRAY_FLOAT:
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            memcpy(buffer+offset, "f4", 2);
+            #else
+            memcpy(buffer+offset, "f8", 2);
+            #endif
+            break;
+        #if ULAB_SUPPORTS_COMPLEX
+        case NDARRAY_COMPLEX:
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            memcpy(buffer+offset, "c8", 2);
+            #else
+            memcpy(buffer+offset, "c16", 3);
+            offset++;
+            #endif
+            break;
+        #endif
+    }
+
+    offset += 2;
+    memcpy(buffer+offset, "', 'fortran_order': False, 'shape': (", 37);
+    offset += 37;
+
+    if(ndarray->ndim == 1) {
+        offset += io_sprintf(buffer+offset, ",\0", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    } else {
+        for(uint8_t i = ndarray->ndim; i > 1; i--) {
+            offset += io_sprintf(buffer+offset, ", \0", ndarray->shape[ULAB_MAX_DIMS - i]);
+        }
+        offset += io_sprintf(buffer+offset, "\0", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    }
+    memcpy(buffer+offset, "), }", 4);
+    offset += 4;
+    // pad with space till the very end
+    memset(buffer+offset, 32, ULAB_IO_BUFFER_SIZE - offset - 1);
+    buffer[ULAB_IO_BUFFER_SIZE - 1] = '\n';
+    stream_p->write(stream, buffer, ULAB_IO_BUFFER_SIZE, &error);
+
+    // write the array data
+    uint8_t sz = ndarray->itemsize;
+    offset = 0;
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    memcpy(buffer+offset, array, sz);
+                    offset += sz;
+                    if(offset == ULAB_IO_BUFFER_SIZE) {
+                        stream_p->write(stream, buffer, offset, &error);
+                        offset = 0;
+                    }
+                    array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l <  ndarray->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k <  ndarray->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+            array += ndarray->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j <  ndarray->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+        array += ndarray->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    stream_p->write(stream, buffer, offset, &error);
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(io_save_obj, io_save);
+#endif /* ULAB_NUMPY_HAS_SAVE */
+
+#if ULAB_NUMPY_HAS_SAVETXT
+static int8_t io_format_float(ndarray_obj_t *ndarray, mp_float_t (*func)(void *), uint8_t *array, char *buffer, const char *delimiter) {
+    // own implementation of float formatting for platforms that don't have sprintf
+    int8_t offset = 0;
+
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
+        const int precision = 6;
+        #else
+        const int precision = 7;
+        #endif
+    #else
+        const int precision = 16;
+    #endif
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if(ndarray->dtype == NDARRAY_COMPLEX) {
+        mp_float_t real = func(array);
+        mp_float_t imag = func(array + ndarray->itemsize / 2);
+        offset = mp_format_float(real, buffer, ULAB_IO_BUFFER_SIZE, 'f', precision, 'j');
+        if(imag >= MICROPY_FLOAT_CONST(0.0)) {
+            buffer[offset++] = '+';
+        } else {
+            buffer[offset++] = '-';
+        }
+        offset += mp_format_float(-imag, &buffer[offset], ULAB_IO_BUFFER_SIZE, 'f', precision, 'j');
+    }
+    #endif
+    offset = (uint8_t)mp_format_float(func(array), buffer, ULAB_IO_BUFFER_SIZE, 'f', precision, '\0');
+
+    #if ULAB_SUPPORTS_COMPLEX
+    if(ndarray->dtype != NDARRAY_COMPLEX) {
+        // complexes end with a 'j', floats with a '\0', so we have to wind back by one character
+        offset--;
+    }
+    #endif
+
+    while(*delimiter != '\0') {
+        buffer[offset++] = *delimiter++;
+    }
+
+    return offset;
+}
+
+static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_delimiter, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_header, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_footer, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_comments, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_str(args[0].u_obj) || !mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong input type"));
+    }
+
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[1].u_obj);
+
+    #if ULAB_MAX_DIMS > 2
+    if(ndarray->ndim > 2) {
+        mp_raise_ValueError(MP_ERROR_TEXT("array has too many dimensions"));
+    }
+    #endif
+
+    mp_obj_t open_args[2] = {
+        args[0].u_obj,
+        MP_OBJ_NEW_QSTR(MP_QSTR_w)
+    };
+
+    mp_obj_t stream = mp_builtin_open_obj.fun.kw(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
+
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+    int error;
+
+    if(mp_obj_is_str(args[3].u_obj)) {
+        size_t _len;
+        if(mp_obj_is_str(args[5].u_obj)) {
+            const char *comments = mp_obj_str_get_data(args[5].u_obj, &_len);
+            stream_p->write(stream, comments, _len, &error);
+        } else {
+            stream_p->write(stream, "# ", 2, &error);
+        }
+        const char *header = mp_obj_str_get_data(args[3].u_obj, &_len);
+        stream_p->write(stream, header, _len, &error);
+        stream_p->write(stream, "\n", 1, &error);
+    }
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+    char *delimiter = m_new(char, 8);
+
+    if(ndarray->ndim == 1) {
+        delimiter[0] = '\n';
+        delimiter[1] = '\0';
+    } else if(args[2].u_obj == mp_const_none) {
+        delimiter[0] = ' ';
+        delimiter[1] = '\0';
+    } else {
+        size_t delimiter_len;
+        delimiter = (char *)mp_obj_str_get_data(args[2].u_obj, &delimiter_len);
+    }
+
+    #if ULAB_MAX_DIMS > 1
+    size_t k = 0;
+    do {
+    #endif
+        size_t l = 0;
+        do {
+            int8_t chars = io_format_float(ndarray, func, array, buffer, l == ndarray->shape[ULAB_MAX_DIMS - 1] - 1 ? "\n" : delimiter);
+            if(chars > 0) {
+                stream_p->write(stream, buffer, chars, &error);
+            }
+            array += ndarray->strides[ULAB_MAX_DIMS - 1];
+            l++;
+        } while(l < ndarray->shape[ULAB_MAX_DIMS - 1]);
+    #if ULAB_MAX_DIMS > 1
+        array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+        array += ndarray->strides[ULAB_MAX_DIMS - 2];
+        k++;
+    } while(k < ndarray->shape[ULAB_MAX_DIMS - 2]);
+    #endif
+
+    if(mp_obj_is_str(args[4].u_obj)) {
+        size_t _len;
+        if(mp_obj_is_str(args[5].u_obj)) {
+            const char *comments = mp_obj_str_get_data(args[5].u_obj, &_len);
+            stream_p->write(stream, comments, _len, &error);
+        } else {
+            stream_p->write(stream, "# ", 2, &error);
+        }
+        const char *footer = mp_obj_str_get_data(args[4].u_obj, &_len);
+        stream_p->write(stream, footer, _len, &error);
+        stream_p->write(stream, "\n", 1, &error);
+    }
+
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(io_savetxt_obj, 2, io_savetxt);
+#endif /* ULAB_NUMPY_HAS_SAVETXT */
diff --git a/tulip/shared/ulab/code/numpy/io/io.h b/tulip/shared/ulab/code/numpy/io/io.h
new file mode 100644
index 000000000..33f1b687b
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/io/io.h
@@ -0,0 +1,19 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#ifndef _ULAB_IO_
+#define _ULAB_IO_
+
+MP_DECLARE_CONST_FUN_OBJ_1(io_load_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(io_loadtxt_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(io_save_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(io_savetxt_obj);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/linalg/linalg.c b/tulip/shared/ulab/code/numpy/linalg/linalg.c
new file mode 100644
index 000000000..70b1d20b4
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/linalg/linalg.c
@@ -0,0 +1,542 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020 Roberto Colistete Jr.
+ *               2020 Taku Fukada
+ *
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../../ulab.h"
+#include "../../ulab_tools.h"
+#include "../carray/carray_tools.h"
+#include "linalg.h"
+
+#if ULAB_NUMPY_HAS_LINALG_MODULE
+//|
+//| import ulab.numpy
+//|
+//| """Linear algebra functions"""
+//|
+
+#if ULAB_MAX_DIMS > 1
+//| def cholesky(A: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     :param ~ulab.numpy.ndarray A: a positive definite, symmetric square matrix
+//|     :return ~ulab.numpy.ndarray L: a square root matrix in the lower triangular form
+//|     :raises ValueError: If the input does not fulfill the necessary conditions
+//|
+//|     The returned matrix satisfies the equation m=LL*"""
+//|     ...
+//|
+
+static mp_obj_t linalg_cholesky(mp_obj_t oin) {
+    ndarray_obj_t *ndarray = tools_object_is_square(oin);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    ndarray_obj_t *L = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, ndarray->shape[ULAB_MAX_DIMS - 1], ndarray->shape[ULAB_MAX_DIMS - 1]), NDARRAY_FLOAT);
+    mp_float_t *Larray = (mp_float_t *)L->array;
+
+    size_t N = ndarray->shape[ULAB_MAX_DIMS - 1];
+    uint8_t *array = (uint8_t *)ndarray->array;
+    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+
+    for(size_t m=0; m < N; m++) { // rows
+        for(size_t n=0; n < N; n++) { // columns
+            *Larray++ = func(array);
+            array += ndarray->strides[ULAB_MAX_DIMS - 1];
+        }
+        array -= ndarray->strides[ULAB_MAX_DIMS - 1] * N;
+        array += ndarray->strides[ULAB_MAX_DIMS - 2];
+    }
+    Larray -= N*N;
+    // make sure the matrix is symmetric
+    for(size_t m=0; m < N; m++) { // rows
+        for(size_t n=m+1; n < N; n++) { // columns
+            // compare entry (m, n) to (n, m)
+            if(LINALG_EPSILON < MICROPY_FLOAT_C_FUN(fabs)(Larray[m * N + n] - Larray[n * N + m])) {
+                mp_raise_ValueError(MP_ERROR_TEXT("input matrix is asymmetric"));
+            }
+        }
+    }
+
+    // this is actually not needed, but Cholesky in numpy returns the lower triangular matrix
+    for(size_t i=0; i < N; i++) { // rows
+        for(size_t j=i+1; j < N; j++) { // columns
+            Larray[i*N + j] = MICROPY_FLOAT_CONST(0.0);
+        }
+    }
+    mp_float_t sum = 0.0;
+    for(size_t i=0; i < N; i++) { // rows
+        for(size_t j=0; j <= i; j++) { // columns
+            sum = Larray[i * N + j];
+            for(size_t k=0; k < j; k++) {
+                sum -= Larray[i * N + k] * Larray[j * N + k];
+            }
+            if(i == j) {
+                if(sum <= MICROPY_FLOAT_CONST(0.0)) {
+                    mp_raise_ValueError(MP_ERROR_TEXT("matrix is not positive definite"));
+                } else {
+                    Larray[i * N + i] = MICROPY_FLOAT_C_FUN(sqrt)(sum);
+                }
+            } else {
+                Larray[i * N + j] = sum / Larray[j * N + j];
+            }
+        }
+    }
+    return MP_OBJ_FROM_PTR(L);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(linalg_cholesky_obj, linalg_cholesky);
+
+//| def det(m: ulab.numpy.ndarray) -> float:
+//|     """
+//|     :param: m, a square matrix
+//|     :return float: The determinant of the matrix
+//|
+//|     Computes the eigenvalues and eigenvectors of a square matrix"""
+//|     ...
+//|
+
+static mp_obj_t linalg_det(mp_obj_t oin) {
+    ndarray_obj_t *ndarray = tools_object_is_square(oin);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    uint8_t *array = (uint8_t *)ndarray->array;
+    size_t N = ndarray->shape[ULAB_MAX_DIMS - 1];
+    mp_float_t *tmp = m_new(mp_float_t, N * N);
+    for(size_t m=0; m < N; m++) { // rows
+        for(size_t n=0; n < N; n++) { // columns
+            *tmp++ = ndarray_get_float_value(array, ndarray->dtype);
+            array += ndarray->strides[ULAB_MAX_DIMS - 1];
+        }
+        array -= ndarray->strides[ULAB_MAX_DIMS - 1] * N;
+        array += ndarray->strides[ULAB_MAX_DIMS - 2];
+    }
+
+    // re-wind the pointer
+    tmp -= N*N;
+
+    mp_float_t c;
+    mp_float_t det_sign = 1.0;
+
+    for(size_t m=0; m < N-1; m++){
+        if(MICROPY_FLOAT_C_FUN(fabs)(tmp[m * (N+1)]) < LINALG_EPSILON) {
+            size_t m1 = m + 1;
+            for(; m1 < N; m1++) {
+                if(!(MICROPY_FLOAT_C_FUN(fabs)(tmp[m1*N+m]) < LINALG_EPSILON)) {
+                     //look for a line to swap
+                    for(size_t m2=0; m2 < N; m2++) {
+                        mp_float_t swapVal = tmp[m*N+m2];
+                        tmp[m*N+m2] = tmp[m1*N+m2];
+                        tmp[m1*N+m2] = swapVal;
+                    }
+                    det_sign = -det_sign;
+                    break;
+                }
+            }
+            if (m1 >= N) {
+                m_del(mp_float_t, tmp, N * N);
+                return mp_obj_new_float(0.0);
+            }
+        }
+        for(size_t n=0; n < N; n++) {
+            if(m != n) {
+                c = tmp[N * n + m] / tmp[m * (N+1)];
+                for(size_t k=0; k < N; k++){
+                    tmp[N * n + k] -= c * tmp[N * m + k];
+                }
+            }
+        }
+    }
+    mp_float_t det = det_sign;
+
+    for(size_t m=0; m < N; m++){
+        det *= tmp[m * (N+1)];
+    }
+    m_del(mp_float_t, tmp, N * N);
+    return mp_obj_new_float(det);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(linalg_det_obj, linalg_det);
+
+#endif
+
+#if ULAB_MAX_DIMS > 1
+//| def eig(m: ulab.numpy.ndarray) -> Tuple[ulab.numpy.ndarray, ulab.numpy.ndarray]:
+//|     """
+//|     :param m: a square matrix
+//|     :return tuple (eigenvectors, eigenvalues):
+//|
+//|     Computes the eigenvalues and eigenvectors of a square matrix"""
+//|     ...
+//|
+
+static mp_obj_t linalg_eig(mp_obj_t oin) {
+    ndarray_obj_t *in = tools_object_is_square(oin);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(in->dtype)
+    uint8_t *iarray = (uint8_t *)in->array;
+    size_t S = in->shape[ULAB_MAX_DIMS - 1];
+    mp_float_t *array = m_new(mp_float_t, S*S);
+    for(size_t i=0; i < S; i++) { // rows
+        for(size_t j=0; j < S; j++) { // columns
+            *array++ = ndarray_get_float_value(iarray, in->dtype);
+            iarray += in->strides[ULAB_MAX_DIMS - 1];
+        }
+        iarray -= in->strides[ULAB_MAX_DIMS - 1] * S;
+        iarray += in->strides[ULAB_MAX_DIMS - 2];
+    }
+    array -= S * S;
+    // make sure the matrix is symmetric
+    for(size_t m=0; m < S; m++) {
+        for(size_t n=m+1; n < S; n++) {
+            // compare entry (m, n) to (n, m)
+            // TODO: this must probably be scaled!
+            if(LINALG_EPSILON < MICROPY_FLOAT_C_FUN(fabs)(array[m * S + n] - array[n * S + m])) {
+                mp_raise_ValueError(MP_ERROR_TEXT("input matrix is asymmetric"));
+            }
+        }
+    }
+
+    // if we got this far, then the matrix will be symmetric
+
+    ndarray_obj_t *eigenvectors = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, S, S), NDARRAY_FLOAT);
+    mp_float_t *eigvectors = (mp_float_t *)eigenvectors->array;
+
+    size_t iterations = linalg_jacobi_rotations(array, eigvectors, S);
+
+    if(iterations == 0) {
+        // the computation did not converge; numpy raises LinAlgError
+        m_del(mp_float_t, array, in->len);
+        mp_raise_ValueError(MP_ERROR_TEXT("iterations did not converge"));
+    }
+    ndarray_obj_t *eigenvalues = ndarray_new_linear_array(S, NDARRAY_FLOAT);
+    mp_float_t *eigvalues = (mp_float_t *)eigenvalues->array;
+    for(size_t i=0; i < S; i++) {
+        eigvalues[i] = array[i * (S + 1)];
+    }
+    m_del(mp_float_t, array, in->len);
+
+    mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(2, NULL));
+    tuple->items[0] = MP_OBJ_FROM_PTR(eigenvalues);
+    tuple->items[1] = MP_OBJ_FROM_PTR(eigenvectors);
+    return MP_OBJ_FROM_PTR(tuple);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(linalg_eig_obj, linalg_eig);
+
+//| def inv(m: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     :param ~ulab.numpy.ndarray m: a square matrix
+//|     :return: The inverse of the matrix, if it exists
+//|     :raises ValueError: if the matrix is not invertible
+//|
+//|     Computes the inverse of a square matrix"""
+//|     ...
+//|
+static mp_obj_t linalg_inv(mp_obj_t o_in) {
+    ndarray_obj_t *ndarray = tools_object_is_square(o_in);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    uint8_t *array = (uint8_t *)ndarray->array;
+    size_t N = ndarray->shape[ULAB_MAX_DIMS - 1];
+    ndarray_obj_t *inverted = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, N, N), NDARRAY_FLOAT);
+    mp_float_t *iarray = (mp_float_t *)inverted->array;
+
+    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+
+    for(size_t i=0; i < N; i++) { // rows
+        for(size_t j=0; j < N; j++) { // columns
+            *iarray++ = func(array);
+            array += ndarray->strides[ULAB_MAX_DIMS - 1];
+        }
+        array -= ndarray->strides[ULAB_MAX_DIMS - 1] * N;
+        array += ndarray->strides[ULAB_MAX_DIMS - 2];
+    }
+    // re-wind the pointer
+    iarray -= N*N;
+
+    if(!linalg_invert_matrix(iarray, N)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("input matrix is singular"));
+    }
+    return MP_OBJ_FROM_PTR(inverted);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(linalg_inv_obj, linalg_inv);
+#endif
+
+//| def norm(x: ulab.numpy.ndarray) -> float:
+//|    """
+//|    :param ~ulab.numpy.ndarray x: a vector or a matrix
+//|
+//|    Computes the 2-norm of a vector or a matrix, i.e., ``sqrt(sum(x*x))``, however, without the RAM overhead."""
+//|    ...
+//|
+
+static mp_obj_t linalg_norm(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE} } ,
+        { MP_QSTR_axis, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t x = args[0].u_obj;
+    mp_obj_t axis = args[1].u_obj;
+
+    mp_float_t dot = 0.0, value;
+    size_t count = 1;
+
+    if(mp_obj_is_type(x, &mp_type_tuple) || mp_obj_is_type(x, &mp_type_list) || mp_obj_is_type(x, &mp_type_range)) {
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t item, iterable = mp_getiter(x, &iter_buf);
+        while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+            value = mp_obj_get_float(item);
+            // we could simply take the sum of value ** 2,
+            // but this method is numerically stable
+            dot = dot + (value * value - dot) / count++;
+        }
+        return mp_obj_new_float(MICROPY_FLOAT_C_FUN(sqrt)(dot * (count - 1)));
+    } else if(mp_obj_is_type(x, &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(x);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+        uint8_t *array = (uint8_t *)ndarray->array;
+        // always get a float, so that we don't have to resolve the dtype later
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        shape_strides _shape_strides = tools_reduce_axes(ndarray, axis);
+        ndarray_obj_t *results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, NDARRAY_FLOAT);
+        mp_float_t *rarray = (mp_float_t *)results->array;
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    if(axis != mp_const_none) {
+                        count = 1;
+                        dot = 0.0;
+                    }
+                    do {
+                        value = func(array);
+                        dot = dot + (value * value - dot) / count++;
+                        array += _shape_strides.strides[0];
+                        l++;
+                    } while(l < _shape_strides.shape[0]);
+                    *rarray = MICROPY_FLOAT_C_FUN(sqrt)(dot * (count - 1));
+                #if ULAB_MAX_DIMS > 1
+                    rarray += _shape_strides.increment;
+                    array -= _shape_strides.strides[0] * _shape_strides.shape[0];
+                    array += _shape_strides.strides[ULAB_MAX_DIMS - 1];
+                    k++;
+                } while(k < _shape_strides.shape[ULAB_MAX_DIMS - 1]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                array -= _shape_strides.strides[ULAB_MAX_DIMS - 1] * _shape_strides.shape[ULAB_MAX_DIMS - 1];
+                array += _shape_strides.strides[ULAB_MAX_DIMS - 2];
+                j++;
+            } while(j < _shape_strides.shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= _shape_strides.strides[ULAB_MAX_DIMS - 2] * _shape_strides.shape[ULAB_MAX_DIMS - 2];
+            array += _shape_strides.strides[ULAB_MAX_DIMS - 3];
+            i++;
+        } while(i < _shape_strides.shape[ULAB_MAX_DIMS - 3]);
+        #endif
+        if(results->ndim == 0) {
+            return mp_obj_new_float(*rarray);
+        }
+        return MP_OBJ_FROM_PTR(results);
+    }
+    return mp_const_none; // we should never reach this point
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(linalg_norm_obj, 1, linalg_norm);
+
+#if ULAB_MAX_DIMS > 1
+//| def qr(m: ulab.numpy.ndarray) -> Tuple[ulab.numpy.ndarray, ulab.numpy.ndarray]:
+//|     """
+//|     :param m: a matrix
+//|     :return tuple (Q, R):
+//|
+//|     Factor the matrix a as QR, where Q is orthonormal and R is upper-triangular.
+//|     """
+//|     ...
+//|
+
+static mp_obj_t linalg_qr(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_mode, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_QSTR(MP_QSTR_reduced) } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("operation is defined for ndarrays only"));
+    }
+    ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0].u_obj);
+    if(source->ndim != 2) {
+        mp_raise_ValueError(MP_ERROR_TEXT("operation is defined for 2D arrays only"));
+    }
+
+    size_t m = source->shape[ULAB_MAX_DIMS - 2]; // rows
+    size_t n = source->shape[ULAB_MAX_DIMS - 1]; // columns
+
+    ndarray_obj_t *Q = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, m, m), NDARRAY_FLOAT);
+    ndarray_obj_t *R = ndarray_new_dense_ndarray(2, source->shape, NDARRAY_FLOAT);
+
+    mp_float_t *qarray = (mp_float_t *)Q->array;
+    mp_float_t *rarray = (mp_float_t *)R->array;
+
+    // simply copy the entries of source to a float array
+    mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+    uint8_t *sarray = (uint8_t *)source->array;
+
+    for(size_t i = 0; i < m; i++) {
+        for(size_t j = 0; j < n; j++) {
+            *rarray++ = func(sarray);
+            sarray += source->strides[ULAB_MAX_DIMS - 1];
+        }
+        sarray -= n * source->strides[ULAB_MAX_DIMS - 1];
+        sarray += source->strides[ULAB_MAX_DIMS - 2];
+    }
+    rarray -= m * n;
+
+    // start with the unit matrix
+    for(size_t i = 0; i < m; i++) {
+        qarray[i * (m + 1)] = 1.0;
+    }
+
+    for(size_t j = 0; j < n; j++) { // columns
+        for(size_t i = m - 1; i > j; i--) { // rows
+            mp_float_t c, s;
+            // Givens matrix: note that numpy uses a strange form of the rotation
+            // [[c  s],
+            //  [s -c]]
+            if(MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j]) < LINALG_EPSILON) { // r[i, j]
+                c = (rarray[(i - 1) * n + j] >= MICROPY_FLOAT_CONST(0.0)) ? MICROPY_FLOAT_CONST(1.0) : MICROPY_FLOAT_CONST(-1.0); // r[i-1, j]
+                s = 0.0;
+            } else if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) < LINALG_EPSILON) { // r[i-1, j]
+                c = 0.0;
+                s = (rarray[i * n + j] >= MICROPY_FLOAT_CONST(0.0)) ? MICROPY_FLOAT_CONST(-1.0) : MICROPY_FLOAT_CONST(1.0); // r[i, j]
+            } else {
+                mp_float_t t, u;
+                if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) > MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j])) { // r[i-1, j], r[i, j]
+                    t = rarray[i * n + j] / rarray[(i - 1) * n + j]; // r[i, j]/r[i-1, j]
+                    u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t);
+                    c = MICROPY_FLOAT_CONST(-1.0) / u;
+                    s = c * t;
+                } else {
+                    t = rarray[(i - 1) * n + j] / rarray[i * n + j]; // r[i-1, j]/r[i, j]
+                    u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t);
+                    s = MICROPY_FLOAT_CONST(-1.0) / u;
+                    c = s * t;
+                }
+            }
+
+            mp_float_t r1, r2;
+            // update R: multiply with the rotation matrix from the left
+            for(size_t k = 0; k < n; k++) {
+                r1 = rarray[(i - 1) * n + k]; // r[i-1, k]
+                r2 = rarray[i * n + k]; // r[i, k]
+                rarray[(i - 1) * n + k] = c * r1 + s * r2; // r[i-1, k]
+                rarray[i * n + k] = s * r1 - c * r2; // r[i, k]
+            }
+
+            // update Q: multiply with the transpose of the rotation matrix from the right
+            for(size_t k = 0; k < m; k++) {
+                r1 = qarray[k * m + (i - 1)];
+                r2 = qarray[k * m + i];
+                qarray[k * m + (i - 1)] = c * r1 + s * r2;
+                qarray[k * m + i] = s * r1 - c * r2;
+            }
+        }
+    }
+
+    mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(2, NULL));
+    GET_STR_DATA_LEN(args[1].u_obj, mode, len);
+    if(memcmp(mode, "complete", 8) == 0) {
+        tuple->items[0] = MP_OBJ_FROM_PTR(Q);
+        tuple->items[1] = MP_OBJ_FROM_PTR(R);
+    } else if(memcmp(mode, "reduced", 7) == 0) {
+        size_t k = MAX(m, n) - MIN(m, n);
+        ndarray_obj_t *q = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, m, m - k), NDARRAY_FLOAT);
+        ndarray_obj_t *r = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, m - k, n), NDARRAY_FLOAT);
+        mp_float_t *qa = (mp_float_t *)q->array;
+        mp_float_t *ra = (mp_float_t *)r->array;
+        for(size_t i = 0; i < m; i++) {
+            memcpy(qa, qarray, (m - k) * q->itemsize);
+            qa += (m - k);
+            qarray += m;
+        }
+        for(size_t i = 0; i < m - k; i++) {
+            memcpy(ra, rarray, n * r->itemsize);
+            ra += n;
+            rarray += n;
+        }
+        tuple->items[0] = MP_OBJ_FROM_PTR(q);
+        tuple->items[1] = MP_OBJ_FROM_PTR(r);
+    } else {
+        mp_raise_ValueError(MP_ERROR_TEXT("mode must be complete, or reduced"));
+    }
+    return MP_OBJ_FROM_PTR(tuple);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(linalg_qr_obj, 1, linalg_qr);
+#endif
+
+static const mp_rom_map_elem_t ulab_linalg_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_linalg) },
+    #if ULAB_MAX_DIMS > 1
+        #if ULAB_LINALG_HAS_CHOLESKY
+        { MP_ROM_QSTR(MP_QSTR_cholesky), MP_ROM_PTR(&linalg_cholesky_obj) },
+        #endif
+        #if ULAB_LINALG_HAS_DET
+        { MP_ROM_QSTR(MP_QSTR_det), MP_ROM_PTR(&linalg_det_obj) },
+        #endif
+        #if ULAB_LINALG_HAS_EIG
+        { MP_ROM_QSTR(MP_QSTR_eig), MP_ROM_PTR(&linalg_eig_obj) },
+        #endif
+        #if ULAB_LINALG_HAS_INV
+        { MP_ROM_QSTR(MP_QSTR_inv), MP_ROM_PTR(&linalg_inv_obj) },
+        #endif
+        #if ULAB_LINALG_HAS_QR
+        { MP_ROM_QSTR(MP_QSTR_qr), MP_ROM_PTR(&linalg_qr_obj) },
+        #endif
+    #endif
+    #if ULAB_LINALG_HAS_NORM
+    { MP_ROM_QSTR(MP_QSTR_norm), MP_ROM_PTR(&linalg_norm_obj) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_linalg_globals, ulab_linalg_globals_table);
+
+const mp_obj_module_t ulab_linalg_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_linalg_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_numpy_dot_linalg, ulab_linalg_module);
+#endif
+#endif
diff --git a/tulip/shared/ulab/code/numpy/linalg/linalg.h b/tulip/shared/ulab/code/numpy/linalg/linalg.h
new file mode 100644
index 000000000..35fc4035f
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/linalg/linalg.h
@@ -0,0 +1,27 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _LINALG_
+#define _LINALG_
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+#include "linalg_tools.h"
+
+extern const mp_obj_module_t ulab_linalg_module;
+
+MP_DECLARE_CONST_FUN_OBJ_1(linalg_cholesky_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(linalg_det_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(linalg_eig_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(linalg_inv_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(linalg_norm_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(linalg_qr_obj);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/linalg/linalg_tools.c b/tulip/shared/ulab/code/numpy/linalg/linalg_tools.c
new file mode 100644
index 000000000..7ae97d211
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/linalg/linalg_tools.c
@@ -0,0 +1,170 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2010 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <string.h>
+#include "py/runtime.h"
+
+#include "linalg_tools.h"
+
+/*
+ * The following function inverts a matrix, whose entries are given in the input array
+ * The function has no dependencies beyond micropython itself (for the definition of mp_float_t),
+ * and can be used independent of ulab.
+ */
+
+bool linalg_invert_matrix(mp_float_t *data, size_t N) {
+    // returns true, of the inversion was successful,
+    // false, if the matrix is singular
+
+    // initially, this is the unit matrix: the contents of this matrix is what
+    // will be returned after all the transformations
+    mp_float_t *unit = m_new0(mp_float_t, N*N);
+    mp_float_t elem = 1.0;
+
+    for(size_t m=0; m < N; m++) {
+        memcpy(&unit[m * (N+1)], &elem, sizeof(mp_float_t));
+    }
+    for(size_t m=0; m < N; m++){
+        // this could be faster with ((c < epsilon) && (c > -epsilon))
+        if(MICROPY_FLOAT_C_FUN(fabs)(data[m * (N+1)]) < LINALG_EPSILON) {
+            //look for a line to swap
+            size_t m1 = m + 1;
+            for(; m1 < N; m1++) {
+                if(!(MICROPY_FLOAT_C_FUN(fabs)(data[m1*N + m]) < LINALG_EPSILON)) {
+                    for(size_t m2=0; m2 < N; m2++) {
+                        mp_float_t swapVal = data[m*N+m2];
+                        data[m*N+m2] = data[m1*N+m2];
+                        data[m1*N+m2] = swapVal;
+                        swapVal = unit[m*N+m2];
+                        unit[m*N+m2] = unit[m1*N+m2];
+                        unit[m1*N+m2] = swapVal;
+                    }
+                    break;
+                }
+            }
+            if (m1 >= N) {
+                m_del(mp_float_t, unit, N*N);
+                return false;
+            }
+        }
+        for(size_t n=0; n < N; n++) {
+            if(m != n){
+                elem = data[N * n + m] / data[m * (N+1)];
+                for(size_t k=0; k < N; k++) {
+                    data[N * n + k] -= elem * data[N * m + k];
+                    unit[N * n + k] -= elem * unit[N * m + k];
+                }
+            }
+        }
+    }
+    for(size_t m=0; m < N; m++) {
+        elem = data[m * (N+1)];
+        for(size_t n=0; n < N; n++) {
+            data[N * m + n] /= elem;
+            unit[N * m + n] /= elem;
+        }
+    }
+    memcpy(data, unit, sizeof(mp_float_t)*N*N);
+    m_del(mp_float_t, unit, N * N);
+    return true;
+}
+
+/*
+ * The following function calculates the eigenvalues and eigenvectors of a symmetric
+ * real matrix, whose entries are given in the input array.
+ * The function has no dependencies beyond micropython itself (for the definition of mp_float_t),
+ * and can be used independent of ulab.
+ */
+
+size_t linalg_jacobi_rotations(mp_float_t *array, mp_float_t *eigvectors, size_t S) {
+    // eigvectors should be a 0-array; start out with the unit matrix
+    for(size_t m=0; m < S; m++) {
+        eigvectors[m * (S+1)] = 1.0;
+    }
+    mp_float_t largest, w, t, c, s, tau, aMk, aNk, vm, vn;
+    size_t M, N;
+    size_t iterations = JACOBI_MAX * S * S;
+    do {
+        iterations--;
+        // find the pivot here
+        M = 0;
+        N = 0;
+        largest = 0.0;
+        for(size_t m=0; m < S-1; m++) { // -1: no need to inspect last row
+            for(size_t n=m+1; n < S; n++) {
+                w = MICROPY_FLOAT_C_FUN(fabs)(array[m * S + n]);
+                if((largest < w) && (LINALG_EPSILON < w)) {
+                    M = m;
+                    N = n;
+                    largest = w;
+                }
+            }
+        }
+        if(M + N == 0) { // all entries are smaller than epsilon, there is not much we can do...
+            break;
+        }
+        // at this point, we have the pivot, and it is the entry (M, N)
+        // now we have to find the rotation angle
+        w = (array[N * S + N] - array[M * S + M]) / (MICROPY_FLOAT_CONST(2.0)*array[M * S + N]);
+        // The following if/else chooses the smaller absolute value for the tangent
+        // of the rotation angle. Going with the smaller should be numerically stabler.
+        if(w > 0) {
+            t = MICROPY_FLOAT_C_FUN(sqrt)(w*w + MICROPY_FLOAT_CONST(1.0)) - w;
+        } else {
+            t = MICROPY_FLOAT_CONST(-1.0)*(MICROPY_FLOAT_C_FUN(sqrt)(w*w + MICROPY_FLOAT_CONST(1.0)) + w);
+        }
+        s = t / MICROPY_FLOAT_C_FUN(sqrt)(t*t + MICROPY_FLOAT_CONST(1.0)); // the sine of the rotation angle
+        c = MICROPY_FLOAT_CONST(1.0) / MICROPY_FLOAT_C_FUN(sqrt)(t*t + MICROPY_FLOAT_CONST(1.0)); // the cosine of the rotation angle
+        tau = (MICROPY_FLOAT_CONST(1.0)-c)/s; // this is equal to the tangent of the half of the rotation angle
+
+        // at this point, we have the rotation angles, so we can transform the matrix
+        // first the two diagonal elements
+        // a(M, M) = a(M, M) - t*a(M, N)
+        array[M * S + M] = array[M * S + M] - t * array[M * S + N];
+        // a(N, N) = a(N, N) + t*a(M, N)
+        array[N * S + N] = array[N * S + N] + t * array[M * S + N];
+        // after the rotation, the a(M, N), and a(N, M) entries should become zero
+        array[M * S + N] = array[N * S + M] = MICROPY_FLOAT_CONST(0.0);
+        // then all other elements in the column
+        for(size_t k=0; k < S; k++) {
+            if((k == M) || (k == N)) {
+                continue;
+            }
+            aMk = array[M * S + k];
+            aNk = array[N * S + k];
+            // a(M, k) = a(M, k) - s*(a(N, k) + tau*a(M, k))
+            array[M * S + k] -= s * (aNk + tau * aMk);
+            // a(N, k) = a(N, k) + s*(a(M, k) - tau*a(N, k))
+            array[N * S + k] += s * (aMk - tau * aNk);
+            // a(k, M) = a(M, k)
+            array[k * S + M] = array[M * S + k];
+            // a(k, N) = a(N, k)
+            array[k * S + N] = array[N * S + k];
+        }
+        // now we have to update the eigenvectors
+        // the rotation matrix, R, multiplies from the right
+        // R is the unit matrix, except for the
+        // R(M,M) = R(N, N) = c
+        // R(N, M) = s
+        // (M, N) = -s
+        // entries. This means that only the Mth, and Nth columns will change
+        for(size_t m=0; m < S; m++) {
+            vm = eigvectors[m * S + M];
+            vn = eigvectors[m * S + N];
+            // the new value of eigvectors(m, M)
+            eigvectors[m * S + M] = c * vm - s * vn;
+            // the new value of eigvectors(m, N)
+            eigvectors[m * S + N] = s * vm + c * vn;
+        }
+    } while(iterations > 0);
+
+    return iterations;
+}
diff --git a/tulip/shared/ulab/code/numpy/linalg/linalg_tools.h b/tulip/shared/ulab/code/numpy/linalg/linalg_tools.h
new file mode 100644
index 000000000..942da001c
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/linalg/linalg_tools.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _TOOLS_TOOLS_
+#define _TOOLS_TOOLS_
+
+#ifndef LINALG_EPSILON
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define LINALG_EPSILON      MICROPY_FLOAT_CONST(1.2e-7)
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define LINALG_EPSILON      MICROPY_FLOAT_CONST(2.3e-16)
+#endif
+#endif /* LINALG_EPSILON */
+
+#define JACOBI_MAX     20
+
+bool linalg_invert_matrix(mp_float_t *, size_t );
+size_t linalg_jacobi_rotations(mp_float_t *, mp_float_t *, size_t );
+
+#endif /* _TOOLS_TOOLS_ */
+
diff --git a/tulip/shared/ulab/code/numpy/ndarray/ndarray_iter.c b/tulip/shared/ulab/code/numpy/ndarray/ndarray_iter.c
new file mode 100644
index 000000000..423e4a059
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/ndarray/ndarray_iter.c
@@ -0,0 +1,66 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Zoltán Vörös
+ *
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+
+#include "ndarray_iter.h"
+
+#ifdef NDARRAY_HAS_FLATITER
+mp_obj_t ndarray_flatiter_make_new(mp_obj_t self_in) {
+    ndarray_flatiter_t *flatiter = m_new_obj(ndarray_flatiter_t);
+    flatiter->base.type = &ndarray_flatiter_type;
+    flatiter->iternext = ndarray_flatiter_next;
+    flatiter->ndarray = self_in;
+    flatiter->cur = 0;
+    return MP_OBJ_FROM_PTR(flatiter);
+}
+
+mp_obj_t ndarray_flatiter_next(mp_obj_t self_in) {
+    ndarray_flatiter_t *self = MP_OBJ_TO_PTR(self_in);
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(self->ndarray);
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    if(self->cur < ndarray->len) {
+        uint32_t remainder = self->cur;
+        uint8_t i = ULAB_MAX_DIMS - 1;
+        do {
+            size_t div = (remainder / ndarray->shape[i]);
+            array += remainder * ndarray->strides[i];
+            remainder -= div * ndarray->shape[i];
+            i--;
+        } while(i > ULAB_MAX_DIMS - ndarray->ndim);
+        self->cur++;
+        return ndarray_get_item(ndarray, array);
+    }
+    return MP_OBJ_STOP_ITERATION;
+}
+
+mp_obj_t ndarray_new_flatiterator(mp_obj_t flatiter_in, mp_obj_iter_buf_t *iter_buf) {
+    assert(sizeof(ndarray_flatiter_t) <= sizeof(mp_obj_iter_buf_t));
+    ndarray_flatiter_t *iter = (ndarray_flatiter_t *)iter_buf;
+    ndarray_flatiter_t *flatiter = MP_OBJ_TO_PTR(flatiter_in);
+    iter->base.type = &mp_type_polymorph_iter;
+    iter->iternext = ndarray_flatiter_next;
+    iter->ndarray = flatiter->ndarray;
+    iter->cur = 0;
+    return MP_OBJ_FROM_PTR(iter);
+}
+
+mp_obj_t ndarray_get_flatiterator(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {
+    return ndarray_new_flatiterator(o_in, iter_buf);
+}
+#endif /* NDARRAY_HAS_FLATITER */
diff --git a/tulip/shared/ulab/code/numpy/ndarray/ndarray_iter.h b/tulip/shared/ulab/code/numpy/ndarray/ndarray_iter.h
new file mode 100644
index 000000000..f740f4165
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/ndarray/ndarray_iter.h
@@ -0,0 +1,36 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+*/
+
+#ifndef _NDARRAY_ITER_
+#define _NDARRAY_ITER_
+
+#include "py/runtime.h"
+#include "py/binary.h"
+#include "py/obj.h"
+#include "py/objarray.h"
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+
+// TODO: take simply mp_obj_ndarray_it_t from ndarray.c
+typedef struct _mp_obj_ndarray_flatiter_t {
+    mp_obj_base_t base;
+    mp_fun_1_t iternext;
+    mp_obj_t ndarray;
+    size_t cur;
+} ndarray_flatiter_t;
+
+mp_obj_t ndarray_get_flatiterator(mp_obj_t , mp_obj_iter_buf_t *);
+mp_obj_t ndarray_flatiter_make_new(mp_obj_t );
+mp_obj_t ndarray_flatiter_next(mp_obj_t );
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/numerical.c b/tulip/shared/ulab/code/numpy/numerical.c
new file mode 100644
index 000000000..0961e3c0f
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/numerical.c
@@ -0,0 +1,1425 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/objint.h"
+#include "py/runtime.h"
+#include "py/builtin.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../ulab_tools.h"
+#include "./carray/carray_tools.h"
+#include "numerical.h"
+
+enum NUMERICAL_FUNCTION_TYPE {
+    NUMERICAL_ALL,
+    NUMERICAL_ANY,
+    NUMERICAL_ARGMAX,
+    NUMERICAL_ARGMIN,
+    NUMERICAL_MAX,
+    NUMERICAL_MEAN,
+    NUMERICAL_MIN,
+    NUMERICAL_STD,
+    NUMERICAL_SUM,
+};
+
+//| """Numerical and Statistical functions
+//|
+//| Most of these functions take an "axis" argument, which indicates whether to
+//| operate over the flattened array (None), or a particular axis (integer)."""
+//|
+//| from typing import Dict
+//|
+//| _ArrayLike = Union[ndarray, List[_float], Tuple[_float], range]
+//| _ScalarOrArrayLike = Union[int, _float, _ArrayLike]
+//| _ScalarOrNdArray = Union[int, _float, ndarray]
+//|
+//| _DType = int
+//| """`ulab.numpy.int8`, `ulab.numpy.uint8`, `ulab.numpy.int16`, `ulab.numpy.uint16`, `ulab.numpy.float` or `ulab.numpy.bool`"""
+//|
+//| from builtins import float as _float
+//| from builtins import bool as _bool
+//|
+//| int8: _DType
+//| """Type code for signed integers in the range -128 .. 127 inclusive, like the 'b' typecode of `array.array`"""
+//|
+//| int16: _DType
+//| """Type code for signed integers in the range -32768 .. 32767 inclusive, like the 'h' typecode of `array.array`"""
+//|
+//| float: _DType
+//| """Type code for floating point values, like the 'f' typecode of `array.array`"""
+//|
+//| uint8: _DType
+//| """Type code for unsigned integers in the range 0 .. 255 inclusive, like the 'H' typecode of `array.array`"""
+//|
+//| uint16: _DType
+//| """Type code for unsigned integers in the range 0 .. 65535 inclusive, like the 'h' typecode of `array.array`"""
+//|
+//| bool: _DType
+//| """Type code for boolean values"""
+//|
+
+static void numerical_reduce_axes(ndarray_obj_t *ndarray, int8_t axis, size_t *shape, int32_t *strides) {
+    // removes the values corresponding to a single axis from the shape and strides array
+    uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + axis;
+    if((ndarray->ndim == 1) && (axis == 0)) {
+        index = 0;
+        shape[ULAB_MAX_DIMS - 1] = 1;
+        return;
+    }
+    for(uint8_t i = ULAB_MAX_DIMS - 1; i > 0; i--) {
+        if(i > index) {
+            shape[i] = ndarray->shape[i];
+            strides[i] = ndarray->strides[i];
+        } else {
+            shape[i] = ndarray->shape[i-1];
+            strides[i] = ndarray->strides[i-1];
+        }
+    }
+}
+
+#if ULAB_NUMPY_HAS_ALL | ULAB_NUMPY_HAS_ANY
+static mp_obj_t numerical_all_any(mp_obj_t oin, mp_obj_t axis, uint8_t optype) {
+    bool anytype = optype == NUMERICAL_ALL ? 1 : 0;
+    if(mp_obj_is_type(oin, &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);
+        uint8_t *array = (uint8_t *)ndarray->array;
+        if(ndarray->len == 0) { // return immediately with empty arrays
+        if(optype == NUMERICAL_ALL) {
+                return mp_const_true;
+            } else {
+                return mp_const_false;
+            }
+        }
+        // always get a float, so that we don't have to resolve the dtype later
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        ndarray_obj_t *results = NULL;
+        uint8_t *rarray = NULL;
+        shape_strides _shape_strides = tools_reduce_axes(ndarray, axis);
+        if(axis != mp_const_none) {
+            results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, NDARRAY_BOOL);
+            rarray = results->array;
+            if(optype == NUMERICAL_ALL) {
+                memset(rarray, 1, results->len);
+            }
+        }
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    if(axis == mp_const_none) {
+                        do {
+                            #if ULAB_SUPPORTS_COMPLEX
+                            if(ndarray->dtype == NDARRAY_COMPLEX) {
+                                mp_float_t real = *((mp_float_t *)array);
+                                mp_float_t imag = *((mp_float_t *)(array + sizeof(mp_float_t)));
+                                if(((real != MICROPY_FLOAT_CONST(0.0)) | (imag != MICROPY_FLOAT_CONST(0.0))) & !anytype) {
+                                    // optype = NUMERICAL_ANY
+                                    return mp_const_true;
+                                } else if(((real == MICROPY_FLOAT_CONST(0.0)) & (imag == MICROPY_FLOAT_CONST(0.0))) & anytype) {
+                                    // optype == NUMERICAL_ALL
+                                    return mp_const_false;
+                                }
+                            } else {
+                            #endif
+                                mp_float_t value = func(array);
+                                if((value != MICROPY_FLOAT_CONST(0.0)) & !anytype) {
+                                    // optype = NUMERICAL_ANY
+                                    return mp_const_true;
+                                } else if((value == MICROPY_FLOAT_CONST(0.0)) & anytype) {
+                                    // optype == NUMERICAL_ALL
+                                    return mp_const_false;
+                                }
+                            #if ULAB_SUPPORTS_COMPLEX
+                            }
+                            #endif
+                            array += _shape_strides.strides[0];
+                            l++;
+                        } while(l < _shape_strides.shape[0]);
+                    } else { // a scalar axis keyword was supplied
+                        do {
+                            #if ULAB_SUPPORTS_COMPLEX
+                            if(ndarray->dtype == NDARRAY_COMPLEX) {
+                                mp_float_t real = *((mp_float_t *)array);
+                                mp_float_t imag = *((mp_float_t *)(array + sizeof(mp_float_t)));
+                                if(((real != MICROPY_FLOAT_CONST(0.0)) | (imag != MICROPY_FLOAT_CONST(0.0))) & !anytype) {
+                                    // optype = NUMERICAL_ANY
+                                    *rarray = 1;
+                                    // since we are breaking out of the loop, move the pointer forward
+                                    array += _shape_strides.strides[0] * (_shape_strides.shape[0] - l);
+                                    break;
+                                } else if(((real == MICROPY_FLOAT_CONST(0.0)) & (imag == MICROPY_FLOAT_CONST(0.0))) & anytype) {
+                                    // optype == NUMERICAL_ALL
+                                    *rarray = 0;
+                                    // since we are breaking out of the loop, move the pointer forward
+                                    array += _shape_strides.strides[0] * (_shape_strides.shape[0] - l);
+                                    break;
+                                }
+                            } else {
+                            #endif
+                                mp_float_t value = func(array);
+                                if((value != MICROPY_FLOAT_CONST(0.0)) & !anytype) {
+                                    // optype == NUMERICAL_ANY
+                                    *rarray = 1;
+                                    // since we are breaking out of the loop, move the pointer forward
+                                    array += _shape_strides.strides[0] * (_shape_strides.shape[0] - l);
+                                    break;
+                                } else if((value == MICROPY_FLOAT_CONST(0.0)) & anytype) {
+                                    // optype == NUMERICAL_ALL
+                                    *rarray = 0;
+                                    // since we are breaking out of the loop, move the pointer forward
+                                    array += _shape_strides.strides[0] * (_shape_strides.shape[0] - l);
+                                    break;
+                                }
+                            #if ULAB_SUPPORTS_COMPLEX
+                            }
+                            #endif
+                            array += _shape_strides.strides[0];
+                            l++;
+                        } while(l < _shape_strides.shape[0]);
+                    }
+                #if ULAB_MAX_DIMS > 1
+                    rarray += _shape_strides.increment;
+                    array -= _shape_strides.strides[0] * _shape_strides.shape[0];
+                    array += _shape_strides.strides[ULAB_MAX_DIMS - 1];
+                    k++;
+                } while(k < _shape_strides.shape[ULAB_MAX_DIMS - 1]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                array -= _shape_strides.strides[ULAB_MAX_DIMS - 1] * _shape_strides.shape[ULAB_MAX_DIMS - 1];
+                array += _shape_strides.strides[ULAB_MAX_DIMS - 2];
+                j++;
+            } while(j < _shape_strides.shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= _shape_strides.strides[ULAB_MAX_DIMS - 2] * _shape_strides.shape[ULAB_MAX_DIMS - 2];
+            array += _shape_strides.strides[ULAB_MAX_DIMS - 3];
+            i++;
+        } while(i < _shape_strides.shape[ULAB_MAX_DIMS - 3]);
+        #endif
+        if(axis == mp_const_none) {
+            // the innermost loop fell through, so return the result here
+            if(!anytype) {
+                return mp_const_false;
+            } else {
+                return mp_const_true;
+            }
+        }
+        return MP_OBJ_FROM_PTR(results);
+    } else if(mp_obj_is_int(oin) || mp_obj_is_float(oin)) {
+        return mp_obj_is_true(oin) ? mp_const_true : mp_const_false;
+    } else {
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t item, iterable = mp_getiter(oin, &iter_buf);
+        while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+            if(!mp_obj_is_true(item) & !anytype) {
+                return mp_const_false;
+            } else if(mp_obj_is_true(item) & anytype) {
+                return mp_const_true;
+            }
+        }
+    }
+    return anytype ? mp_const_true : mp_const_false;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_SUM | ULAB_NUMPY_HAS_MEAN | ULAB_NUMPY_HAS_STD
+static mp_obj_t numerical_sum_mean_std_iterable(mp_obj_t oin, uint8_t optype, size_t ddof) {
+    mp_float_t value = MICROPY_FLOAT_CONST(0.0);
+    mp_float_t M = MICROPY_FLOAT_CONST(0.0);
+    mp_float_t m = MICROPY_FLOAT_CONST(0.0);
+    mp_float_t S = MICROPY_FLOAT_CONST(0.0);
+    mp_float_t s = MICROPY_FLOAT_CONST(0.0);
+    size_t count = 0;
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t item, iterable = mp_getiter(oin, &iter_buf);
+    while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+        value = mp_obj_get_float(item);
+        m = M + (value - M) / (count + 1);
+        s = S + (value - M) * (value - m);
+        M = m;
+        S = s;
+        count++;
+    }
+    if(optype == NUMERICAL_SUM) {
+        return mp_obj_new_float(m * count);
+    } else if(optype == NUMERICAL_MEAN) {
+        return count > 0 ? mp_obj_new_float(m) : mp_obj_new_float(MICROPY_FLOAT_CONST(0.0));
+    } else { // this should be the case of the standard deviation
+        return count > ddof ? mp_obj_new_float(MICROPY_FLOAT_C_FUN(sqrt)(s / (count - ddof))) : mp_obj_new_float(MICROPY_FLOAT_CONST(0.0));
+    }
+}
+
+static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t axis, uint8_t optype, size_t ddof) {
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    uint8_t *array = (uint8_t *)ndarray->array;
+    shape_strides _shape_strides = tools_reduce_axes(ndarray, axis);
+
+    if(axis == mp_const_none) {
+        // work with the flattened array
+        if((optype == NUMERICAL_STD) && (ddof > ndarray->len)) {
+            // if there are too many degrees of freedom, there is no point in calculating anything
+            return mp_obj_new_float(MICROPY_FLOAT_CONST(0.0));
+        }
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        mp_float_t M = MICROPY_FLOAT_CONST(0.0);
+        mp_float_t m = MICROPY_FLOAT_CONST(0.0);
+        mp_float_t S = MICROPY_FLOAT_CONST(0.0);
+        mp_float_t s = MICROPY_FLOAT_CONST(0.0);
+        size_t count = 0;
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        count++;
+                        mp_float_t value = func(array);
+                        m = M + (value - M) / (mp_float_t)count;
+                        if(optype == NUMERICAL_STD) {
+                            s = S + (value - M) * (value - m);
+                            S = s;
+                        }
+                        M = m;
+                        array += _shape_strides.strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < _shape_strides.shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    array -= _shape_strides.strides[ULAB_MAX_DIMS - 1] * _shape_strides.shape[ULAB_MAX_DIMS - 1];
+                    array += _shape_strides.strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < _shape_strides.shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                array -= _shape_strides.strides[ULAB_MAX_DIMS - 2] * _shape_strides.shape[ULAB_MAX_DIMS - 2];
+                array += _shape_strides.strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < _shape_strides.shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= _shape_strides.strides[ULAB_MAX_DIMS - 3] * _shape_strides.shape[ULAB_MAX_DIMS - 3];
+            array += _shape_strides.strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < _shape_strides.shape[ULAB_MAX_DIMS - 4]);
+        #endif
+        if(optype == NUMERICAL_SUM) {
+            // numpy returns an integer for integer input types
+            if(ndarray->dtype == NDARRAY_FLOAT) {
+                return mp_obj_new_float(M * ndarray->len);
+            } else {
+                return mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(M * ndarray->len));
+            }
+        } else if(optype == NUMERICAL_MEAN) {
+            return mp_obj_new_float(M);
+        } else { // this must be the case of the standard deviation
+            // we have already made certain that ddof < ndarray->len holds
+            return mp_obj_new_float(MICROPY_FLOAT_C_FUN(sqrt)(S / (ndarray->len - ddof)));
+        }
+    } else {
+        ndarray_obj_t *results = NULL;
+        uint8_t *rarray = NULL;
+        mp_float_t *farray = NULL;
+        if(optype == NUMERICAL_SUM) {
+            results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, ndarray->dtype);
+            rarray = (uint8_t *)results->array;
+            // TODO: numpy promotes the output to the highest integer type
+            if(ndarray->dtype == NDARRAY_UINT8) {
+                RUN_SUM(uint8_t, array, results, rarray, _shape_strides);
+            } else if(ndarray->dtype == NDARRAY_INT8) {
+                RUN_SUM(int8_t, array, results, rarray, _shape_strides);
+            } else if(ndarray->dtype == NDARRAY_UINT16) {
+                RUN_SUM(uint16_t, array, results, rarray, _shape_strides);
+            } else if(ndarray->dtype == NDARRAY_INT16) {
+                RUN_SUM(int16_t, array, results, rarray, _shape_strides);
+            } else {
+                // for floats, the sum might be inaccurate with the naive summation
+                // call mean, and multiply with the number of samples
+                farray = (mp_float_t *)results->array;
+                RUN_MEAN_STD(mp_float_t, array, farray, _shape_strides, MICROPY_FLOAT_CONST(0.0), 0);
+                mp_float_t norm = (mp_float_t)_shape_strides.shape[0];
+                // re-wind the array here
+                farray = (mp_float_t *)results->array;
+                for(size_t i=0; i < results->len; i++) {
+                    *farray++ *= norm;
+                }
+            }
+        } else {
+            bool isStd = optype == NUMERICAL_STD ? 1 : 0;
+            results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, NDARRAY_FLOAT);
+            farray = (mp_float_t *)results->array;
+            // we can return the 0 array here, if the degrees of freedom is larger than the length of the axis
+            if((optype == NUMERICAL_STD) && (_shape_strides.shape[0] <= ddof)) {
+                return MP_OBJ_FROM_PTR(results);
+            }
+            mp_float_t div = optype == NUMERICAL_STD ? (mp_float_t)(_shape_strides.shape[0] - ddof) : MICROPY_FLOAT_CONST(0.0);
+            if(ndarray->dtype == NDARRAY_UINT8) {
+                RUN_MEAN_STD(uint8_t, array, farray, _shape_strides, div, isStd);
+            } else if(ndarray->dtype == NDARRAY_INT8) {
+                RUN_MEAN_STD(int8_t, array, farray, _shape_strides, div, isStd);
+            } else if(ndarray->dtype == NDARRAY_UINT16) {
+                RUN_MEAN_STD(uint16_t, array, farray, _shape_strides, div, isStd);
+            } else if(ndarray->dtype == NDARRAY_INT16) {
+                RUN_MEAN_STD(int16_t, array, farray, _shape_strides, div, isStd);
+            } else {
+                RUN_MEAN_STD(mp_float_t, array, farray, _shape_strides, div, isStd);
+            }
+        }
+        if(results->ndim == 0) { // return a scalar here
+            return mp_binary_get_val_array(results->dtype, results->array, 0);
+        }
+        return MP_OBJ_FROM_PTR(results);
+    }
+    return mp_const_none;
+}
+#endif
+
+#if ULAB_NUMPY_HAS_ARGMINMAX
+static mp_obj_t numerical_argmin_argmax_iterable(mp_obj_t oin, uint8_t optype) {
+    if(MP_OBJ_SMALL_INT_VALUE(mp_obj_len_maybe(oin)) == 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("attempt to get argmin/argmax of an empty sequence"));
+    }
+    size_t idx = 0, best_idx = 0;
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t iterable = mp_getiter(oin, &iter_buf);
+    mp_obj_t item;
+    uint8_t op = 0; // argmin, min
+    if((optype == NUMERICAL_ARGMAX) || (optype == NUMERICAL_MAX)) op = 1;
+    item = mp_iternext(iterable);
+    mp_obj_t best_obj = item;
+    mp_float_t value, best_value = mp_obj_get_float(item);
+    value = best_value;
+    while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+        idx++;
+        value = mp_obj_get_float(item);
+        if((op == 0) && (value < best_value)) {
+            best_obj = item;
+            best_idx = idx;
+            best_value = value;
+        } else if((op == 1) && (value > best_value)) {
+            best_obj = item;
+            best_idx = idx;
+            best_value = value;
+        }
+    }
+    if((optype == NUMERICAL_ARGMIN) || (optype == NUMERICAL_ARGMAX)) {
+        return MP_OBJ_NEW_SMALL_INT(best_idx);
+    } else {
+        return best_obj;
+    }
+}
+
+static mp_obj_t numerical_argmin_argmax_ndarray(ndarray_obj_t *ndarray, mp_obj_t axis, uint8_t optype) {
+    // TODO: treat the flattened array
+    if(ndarray->len == 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("attempt to get (arg)min/(arg)max of empty sequence"));
+    }
+
+    if(axis == mp_const_none) {
+        // work with the flattened array
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        uint8_t *array = (uint8_t *)ndarray->array;
+        mp_float_t best_value = func(array);
+        mp_float_t value;
+        size_t index = 0, best_index = 0;
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        value = func(array);
+                        if((optype == NUMERICAL_ARGMAX) || (optype == NUMERICAL_MAX)) {
+                            if(best_value < value) {
+                                best_value = value;
+                                best_index = index;
+                            }
+                        } else {
+                            if(best_value > value) {
+                                best_value = value;
+                                best_index = index;
+                            }
+                        }
+                        array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                        index++;
+                    } while(l < ndarray->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                    array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < ndarray->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+                array += ndarray->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < ndarray->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+            array += ndarray->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < ndarray->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+
+        if((optype == NUMERICAL_ARGMIN) || (optype == NUMERICAL_ARGMAX)) {
+            return mp_obj_new_int(best_index);
+        } else {
+            if(ndarray->dtype == NDARRAY_FLOAT) {
+                return mp_obj_new_float(best_value);
+            } else {
+                return MP_OBJ_NEW_SMALL_INT((int32_t)best_value);
+            }
+        }
+    } else {
+        int8_t ax = tools_get_axis(axis, ndarray->ndim);
+
+        uint8_t *array = (uint8_t *)ndarray->array;
+        size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+        int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+
+        numerical_reduce_axes(ndarray, ax, shape, strides);
+        uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + ax;
+
+        ndarray_obj_t *results = NULL;
+
+        if((optype == NUMERICAL_ARGMIN) || (optype == NUMERICAL_ARGMAX)) {
+            results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), shape, NDARRAY_INT16);
+        } else {
+            results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), shape, ndarray->dtype);
+        }
+
+        uint8_t *rarray = (uint8_t *)results->array;
+
+        if(ndarray->dtype == NDARRAY_UINT8) {
+            RUN_ARGMIN(ndarray, uint8_t, array, results, rarray, shape, strides, index, optype);
+        } else if(ndarray->dtype == NDARRAY_INT8) {
+            RUN_ARGMIN(ndarray, int8_t, array, results, rarray, shape, strides, index, optype);
+        } else if(ndarray->dtype == NDARRAY_UINT16) {
+            RUN_ARGMIN(ndarray, uint16_t, array, results, rarray, shape, strides, index, optype);
+        } else if(ndarray->dtype == NDARRAY_INT16) {
+            RUN_ARGMIN(ndarray, int16_t, array, results, rarray, shape, strides, index, optype);
+        } else {
+            RUN_ARGMIN(ndarray, mp_float_t, array, results, rarray, shape, strides, index, optype);
+        }
+
+        m_del(int32_t, strides, ULAB_MAX_DIMS);
+
+        if(results->len == 1) {
+            return mp_binary_get_val_array(results->dtype, results->array, 0);
+        }
+        return MP_OBJ_FROM_PTR(results);
+    }
+    return mp_const_none;
+}
+#endif
+
+static mp_obj_t numerical_function(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, uint8_t optype) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE} } ,
+        { MP_QSTR_axis, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t oin = args[0].u_obj;
+    mp_obj_t axis = args[1].u_obj;
+    if((axis != mp_const_none) && (!mp_obj_is_int(axis))) {
+        mp_raise_TypeError(MP_ERROR_TEXT("axis must be None, or an integer"));
+    }
+
+#if ULAB_NUMPY_HAS_ALL | ULAB_NUMPY_HAS_ANY
+    if((optype == NUMERICAL_ALL) || (optype == NUMERICAL_ANY)) {
+        return numerical_all_any(oin, axis, optype);
+    }
+#endif
+    if(mp_obj_is_type(oin, &mp_type_tuple) || mp_obj_is_type(oin, &mp_type_list) ||
+        mp_obj_is_type(oin, &mp_type_range)) {
+        switch(optype) {
+            case NUMERICAL_MIN:
+            case NUMERICAL_ARGMIN:
+            case NUMERICAL_MAX:
+            case NUMERICAL_ARGMAX:
+                return numerical_argmin_argmax_iterable(oin, optype);
+            case NUMERICAL_SUM:
+            case NUMERICAL_MEAN:
+                return numerical_sum_mean_std_iterable(oin, optype, 0);
+            default: // we should never reach this point, but whatever
+                return mp_const_none;
+        }
+    } else if(mp_obj_is_type(oin, &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);
+        switch(optype) {
+            case NUMERICAL_MIN:
+            case NUMERICAL_MAX:
+            case NUMERICAL_ARGMIN:
+            case NUMERICAL_ARGMAX:
+                COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+                return numerical_argmin_argmax_ndarray(ndarray, axis, optype);
+            case NUMERICAL_SUM:
+            case NUMERICAL_MEAN:
+                COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+                return numerical_sum_mean_std_ndarray(ndarray, axis, optype, 0);
+            default:
+                mp_raise_NotImplementedError(MP_ERROR_TEXT("operation is not implemented on ndarrays"));
+        }
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be tuple, list, range, or ndarray"));
+    }
+    return mp_const_none;
+}
+
+#if ULAB_NUMPY_HAS_SORT | NDARRAY_HAS_SORT
+static mp_obj_t numerical_sort_helper(mp_obj_t oin, mp_obj_t axis, uint8_t inplace) {
+    if(!mp_obj_is_type(oin, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("sort argument must be an ndarray"));
+    }
+
+    ndarray_obj_t *ndarray;
+    if(inplace == 1) {
+        ndarray = MP_OBJ_TO_PTR(oin);
+    } else {
+        ndarray = ndarray_copy_view(MP_OBJ_TO_PTR(oin));
+    }
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+
+    int8_t ax = 0;
+    if(axis == mp_const_none) {
+        // flatten the array
+        #if ULAB_MAX_DIMS > 1
+        for(uint8_t i=0; i < ULAB_MAX_DIMS - 1; i++) {
+            ndarray->shape[i] = 0;
+            ndarray->strides[i] = 0;
+        }
+        ndarray->shape[ULAB_MAX_DIMS - 1] = ndarray->len;
+        ndarray->strides[ULAB_MAX_DIMS - 1] = ndarray->itemsize;
+        ndarray->ndim = 1;
+        #endif
+    } else {
+        ax = tools_get_axis(axis, ndarray->ndim);
+    }
+
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+
+    numerical_reduce_axes(ndarray, ax, shape, strides);
+    ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+    // we work with the typed array, so re-scale the stride
+    int32_t increment = ndarray->strides[ax] / ndarray->itemsize;
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+    if(ndarray->shape[ax]) {
+        if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+            HEAPSORT(ndarray, uint8_t, array, shape, strides, ax, increment, ndarray->shape[ax]);
+        } else if((ndarray->dtype == NDARRAY_UINT16) || (ndarray->dtype == NDARRAY_INT16)) {
+            HEAPSORT(ndarray, uint16_t, array, shape, strides, ax, increment, ndarray->shape[ax]);
+        } else {
+            HEAPSORT(ndarray, mp_float_t, array, shape, strides, ax, increment, ndarray->shape[ax]);
+        }
+    }
+
+    m_del(int32_t, strides, ULAB_MAX_DIMS);
+
+    if(inplace == 1) {
+        return mp_const_none;
+    } else {
+        return MP_OBJ_FROM_PTR(ndarray);
+    }
+}
+#endif /* ULAB_NUMERICAL_HAS_SORT | NDARRAY_HAS_SORT */
+
+#if ULAB_NUMPY_HAS_ALL
+mp_obj_t numerical_all(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_ALL);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_all_obj, 1, numerical_all);
+#endif
+
+#if ULAB_NUMPY_HAS_ANY
+mp_obj_t numerical_any(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_ANY);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_any_obj, 1, numerical_any);
+#endif
+
+#if ULAB_NUMPY_HAS_ARGMINMAX
+//| def argmax(array: _ArrayLike, *, axis: Optional[int] = None) -> int:
+//|     """Return the index of the maximum element of the 1D array"""
+//|     ...
+//|
+
+mp_obj_t numerical_argmax(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_ARGMAX);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_argmax_obj, 1, numerical_argmax);
+
+//| def argmin(array: _ArrayLike, *, axis: Optional[int] = None) -> int:
+//|     """Return the index of the minimum element of the 1D array"""
+//|     ...
+//|
+
+static mp_obj_t numerical_argmin(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_ARGMIN);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_argmin_obj, 1, numerical_argmin);
+#endif
+
+#if ULAB_NUMPY_HAS_ARGSORT
+//| def argsort(array: ulab.numpy.ndarray, *, axis: int = -1) -> ulab.numpy.ndarray:
+//|     """Returns an array which gives indices into the input array from least to greatest."""
+//|     ...
+//|
+
+mp_obj_t numerical_argsort(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("argsort argument must be an ndarray"));
+    }
+
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    if(args[1].u_obj == mp_const_none) {
+        // bail out, though dense arrays could still be sorted
+        mp_raise_NotImplementedError(MP_ERROR_TEXT("argsort is not implemented for flattened arrays"));
+    }
+    // Since we are returning an NDARRAY_UINT16 array, bail out,
+    // if the axis is longer than what we can hold
+    for(uint8_t i=0; i < ULAB_MAX_DIMS; i++) {
+        if(ndarray->shape[i] > 65535) {
+            mp_raise_ValueError(MP_ERROR_TEXT("axis too long"));
+        }
+    }
+    int8_t ax = tools_get_axis(args[1].u_obj, ndarray->ndim);
+
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+    numerical_reduce_axes(ndarray, ax, shape, strides);
+
+    // We could return an NDARRAY_UINT8 array, if all lengths are shorter than 256
+    ndarray_obj_t *indices = ndarray_new_ndarray(ndarray->ndim, ndarray->shape, NULL, NDARRAY_UINT16, NULL);
+    int32_t *istrides = m_new0(int32_t, ULAB_MAX_DIMS);
+    numerical_reduce_axes(indices, ax, shape, istrides);
+
+    for(uint8_t i=0; i < ULAB_MAX_DIMS; i++) {
+        istrides[i] /= sizeof(uint16_t);
+    }
+
+    ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+    // we work with the typed array, so re-scale the stride
+    int32_t increment = ndarray->strides[ax] / ndarray->itemsize;
+    uint16_t iincrement = indices->strides[ax] / sizeof(uint16_t);
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+    uint16_t *iarray = (uint16_t *)indices->array;
+
+    // fill in the index values
+    #if ULAB_MAX_DIMS > 3
+    size_t j = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t k = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t l = 0;
+            do {
+            #endif
+            uint16_t m = 0;
+                do {
+                    *iarray = m++;
+                    iarray += iincrement;
+                } while(m < indices->shape[ax]);
+            #if ULAB_MAX_DIMS > 1
+                iarray -= iincrement * indices->shape[ax];
+                iarray += istrides[ULAB_MAX_DIMS - 1];
+                l++;
+            } while(l < shape[ULAB_MAX_DIMS - 1]);
+            iarray -= istrides[ULAB_MAX_DIMS - 1] * shape[ULAB_MAX_DIMS - 1];
+            iarray += istrides[ULAB_MAX_DIMS - 2];
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            k++;
+        } while(k < shape[ULAB_MAX_DIMS - 2]);
+        iarray -= istrides[ULAB_MAX_DIMS - 2] * shape[ULAB_MAX_DIMS - 2];
+        iarray += istrides[ULAB_MAX_DIMS - 3];
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        j++;
+    } while(j < shape[ULAB_MAX_DIMS - 3]);
+    #endif
+    // reset the array
+    iarray = indices->array;
+
+    if(ndarray->shape[ax]) {
+        if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+            HEAP_ARGSORT(ndarray, uint8_t, array, shape, strides, ax, increment, ndarray->shape[ax], iarray, istrides, iincrement);
+        } else if((ndarray->dtype == NDARRAY_UINT16) || (ndarray->dtype == NDARRAY_INT16)) {
+            HEAP_ARGSORT(ndarray, uint16_t, array, shape, strides, ax, increment, ndarray->shape[ax], iarray, istrides, iincrement);
+        } else {
+            HEAP_ARGSORT(ndarray, mp_float_t, array, shape, strides, ax, increment, ndarray->shape[ax], iarray, istrides, iincrement);
+        }
+    }
+
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(int32_t, strides, ULAB_MAX_DIMS);
+    m_del(int32_t, istrides, ULAB_MAX_DIMS);
+
+    return MP_OBJ_FROM_PTR(indices);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_argsort_obj, 1, numerical_argsort);
+#endif
+
+#if ULAB_NUMPY_HAS_CROSS
+//| def cross(a: ulab.numpy.ndarray, b: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """Return the cross product of two vectors of length 3"""
+//|     ...
+//|
+
+static mp_obj_t numerical_cross(mp_obj_t _a, mp_obj_t _b) {
+    if (!mp_obj_is_type(_a, &ulab_ndarray_type) || !mp_obj_is_type(_b, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("arguments must be ndarrays"));
+    }
+    ndarray_obj_t *a = MP_OBJ_TO_PTR(_a);
+    ndarray_obj_t *b = MP_OBJ_TO_PTR(_b);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(a->dtype)
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(b->dtype)
+    if((a->ndim != 1) || (b->ndim != 1) || (a->len != b->len) || (a->len != 3)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("cross is defined for 1D arrays of length 3"));
+    }
+
+    mp_float_t *results = m_new(mp_float_t, 3);
+    results[0] = ndarray_get_float_index(a->array, a->dtype, 1) * ndarray_get_float_index(b->array, b->dtype, 2);
+    results[0] -= ndarray_get_float_index(a->array, a->dtype, 2) * ndarray_get_float_index(b->array, b->dtype, 1);
+    results[1] = -ndarray_get_float_index(a->array, a->dtype, 0) * ndarray_get_float_index(b->array, b->dtype, 2);
+    results[1] += ndarray_get_float_index(a->array, a->dtype, 2) * ndarray_get_float_index(b->array, b->dtype, 0);
+    results[2] = ndarray_get_float_index(a->array, a->dtype, 0) * ndarray_get_float_index(b->array, b->dtype, 1);
+    results[2] -= ndarray_get_float_index(a->array, a->dtype, 1) * ndarray_get_float_index(b->array, b->dtype, 0);
+
+    /* The upcasting happens here with the rules
+
+        - if one of the operarands is a float, the result is always float
+        - operation on identical types preserves type
+
+        uint8 + int8 => int16
+        uint8 + int16 => int16
+        uint8 + uint16 => uint16
+        int8 + int16 => int16
+        int8 + uint16 => uint16
+        uint16 + int16 => float
+
+    */
+
+    uint8_t dtype = NDARRAY_FLOAT;
+    if(a->dtype == b->dtype) {
+        dtype = a->dtype;
+    } else if(((a->dtype == NDARRAY_UINT8) && (b->dtype == NDARRAY_INT8)) || ((a->dtype == NDARRAY_INT8) && (b->dtype == NDARRAY_UINT8))) {
+        dtype = NDARRAY_INT16;
+    } else if(((a->dtype == NDARRAY_UINT8) && (b->dtype == NDARRAY_INT16)) || ((a->dtype == NDARRAY_INT16) && (b->dtype == NDARRAY_UINT8))) {
+        dtype = NDARRAY_INT16;
+    } else if(((a->dtype == NDARRAY_UINT8) && (b->dtype == NDARRAY_UINT16)) || ((a->dtype == NDARRAY_UINT16) && (b->dtype == NDARRAY_UINT8))) {
+        dtype = NDARRAY_UINT16;
+    } else if(((a->dtype == NDARRAY_INT8) && (b->dtype == NDARRAY_INT16)) || ((a->dtype == NDARRAY_INT16) && (b->dtype == NDARRAY_INT8))) {
+        dtype = NDARRAY_INT16;
+    } else if(((a->dtype == NDARRAY_INT8) && (b->dtype == NDARRAY_UINT16)) || ((a->dtype == NDARRAY_UINT16) && (b->dtype == NDARRAY_INT8))) {
+        dtype = NDARRAY_UINT16;
+    }
+
+    ndarray_obj_t *ndarray = ndarray_new_linear_array(3, dtype);
+    if(dtype == NDARRAY_UINT8) {
+        uint8_t *array = (uint8_t *)ndarray->array;
+        for(uint8_t i=0; i < 3; i++) array[i] = (uint8_t)results[i];
+    } else if(dtype == NDARRAY_INT8) {
+        int8_t *array = (int8_t *)ndarray->array;
+        for(uint8_t i=0; i < 3; i++) array[i] = (int8_t)results[i];
+    } else if(dtype == NDARRAY_UINT16) {
+        uint16_t *array = (uint16_t *)ndarray->array;
+        for(uint8_t i=0; i < 3; i++) array[i] = (uint16_t)results[i];
+    } else if(dtype == NDARRAY_INT16) {
+        int16_t *array = (int16_t *)ndarray->array;
+        for(uint8_t i=0; i < 3; i++) array[i] = (int16_t)results[i];
+    } else {
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        for(uint8_t i=0; i < 3; i++) array[i] = results[i];
+    }
+    m_del(mp_float_t, results, 3);
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(numerical_cross_obj, numerical_cross);
+
+#endif /* ULAB_NUMERICAL_HAS_CROSS */
+
+#if ULAB_NUMPY_HAS_DIFF
+//| def diff(array: ulab.numpy.ndarray, *, n: int = 1, axis: int = -1) -> ulab.numpy.ndarray:
+//|     """Return the numerical derivative of successive elements of the array, as
+//|        an array.  axis=None is not supported."""
+//|     ...
+//|
+
+mp_obj_t numerical_diff(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_n, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 1 } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = -1 } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("diff argument must be an ndarray"));
+    }
+
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    int8_t ax = args[2].u_int;
+    if(ax < 0) ax += ndarray->ndim;
+
+    if((ax < 0) || (ax > ndarray->ndim - 1)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("index out of range"));
+    }
+
+    if((args[1].u_int < 0) || (args[1].u_int > 9)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("differentiation order out of range"));
+    }
+    uint8_t N = (uint8_t)args[1].u_int;
+    uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + ax;
+    if(N > ndarray->shape[index]) {
+        mp_raise_ValueError(MP_ERROR_TEXT("differentiation order out of range"));
+    }
+
+    int8_t *stencil = m_new(int8_t, N+1);
+    stencil[0] = 1;
+    for(uint8_t i = 1; i < N+1; i++) {
+        stencil[i] = -stencil[i-1]*(N-i+1)/i;
+    }
+
+    size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+    for(uint8_t i = 0; i < ULAB_MAX_DIMS; i++) {
+        shape[i] = ndarray->shape[i];
+        if(i == index) {
+            shape[i] -= N;
+        }
+    }
+    uint8_t *array = (uint8_t *)ndarray->array;
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, shape, ndarray->dtype);
+    uint8_t *rarray = (uint8_t *)results->array;
+
+    memset(shape, 0, sizeof(size_t)*ULAB_MAX_DIMS);
+    int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+    numerical_reduce_axes(ndarray, ax, shape, strides);
+
+    if(ndarray->dtype == NDARRAY_UINT8) {
+        RUN_DIFF(ndarray, uint8_t, array, results, rarray, shape, strides, index, stencil, N);
+    } else if(ndarray->dtype == NDARRAY_INT8) {
+        RUN_DIFF(ndarray, int8_t, array, results, rarray, shape, strides, index, stencil, N);
+    }  else if(ndarray->dtype == NDARRAY_UINT16) {
+        RUN_DIFF(ndarray, uint16_t, array, results, rarray, shape, strides, index, stencil, N);
+    } else if(ndarray->dtype == NDARRAY_INT16) {
+        RUN_DIFF(ndarray, int16_t, array, results, rarray, shape, strides, index, stencil, N);
+    } else {
+        RUN_DIFF(ndarray, mp_float_t, array, results, rarray, shape, strides, index, stencil, N);
+    }
+    m_del(int8_t, stencil, N+1);
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(int32_t, strides, ULAB_MAX_DIMS);
+    return MP_OBJ_FROM_PTR(results);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_diff_obj, 1, numerical_diff);
+#endif
+
+#if ULAB_NUMPY_HAS_FLIP
+//| def flip(array: ulab.numpy.ndarray, *, axis: Optional[int] = None) -> ulab.numpy.ndarray:
+//|     """Returns a new array that reverses the order of the elements along the
+//|        given axis, or along all axes if axis is None."""
+//|     ...
+//|
+
+mp_obj_t numerical_flip(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("flip argument must be an ndarray"));
+    }
+
+    ndarray_obj_t *results = NULL;
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    if(args[1].u_obj == mp_const_none) { // flip the flattened array
+        results = ndarray_new_linear_array(ndarray->len, ndarray->dtype);
+        ndarray_copy_array(ndarray, results, 0);
+        uint8_t *rarray = (uint8_t *)results->array;
+        rarray += (results->len - 1) * results->itemsize;
+        results->array = rarray;
+        results->strides[ULAB_MAX_DIMS - 1] = -results->strides[ULAB_MAX_DIMS - 1];
+    } else if(mp_obj_is_int(args[1].u_obj)){
+        int8_t ax = tools_get_axis(args[1].u_obj, ndarray->ndim);
+
+        ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+        int32_t offset = (ndarray->shape[ax] - 1) * ndarray->strides[ax];
+        results = ndarray_new_view(ndarray, ndarray->ndim, ndarray->shape, ndarray->strides, offset);
+        results->strides[ax] = -results->strides[ax];
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong axis index"));
+    }
+    return MP_OBJ_FROM_PTR(results);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_flip_obj, 1, numerical_flip);
+#endif
+
+#if ULAB_NUMPY_HAS_MINMAX
+//| def max(array: _ArrayLike, *, axis: Optional[int] = None) -> _float:
+//|     """Return the maximum element of the 1D array"""
+//|     ...
+//|
+
+mp_obj_t numerical_max(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_MAX);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_max_obj, 1, numerical_max);
+#endif
+
+#if ULAB_NUMPY_HAS_MEAN
+//| def mean(array: _ArrayLike, *, axis: Optional[int] = None) -> _float:
+//|     """Return the mean element of the 1D array, as a number if axis is None, otherwise as an array."""
+//|     ...
+//|
+
+mp_obj_t numerical_mean(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_MEAN);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_mean_obj, 1, numerical_mean);
+#endif
+
+#if ULAB_NUMPY_HAS_MEDIAN
+//| def median(array: ulab.numpy.ndarray, *, axis: int = -1) -> ulab.numpy.ndarray:
+//|     """Find the median value in an array along the given axis, or along all axes if axis is None."""
+//|     ...
+//|
+
+mp_obj_t numerical_median(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("median argument must be an ndarray"));
+    }
+
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    if(ndarray->len == 0) {
+        return mp_obj_new_float(MICROPY_FLOAT_C_FUN(nan)(""));
+    }
+
+    ndarray = MP_OBJ_TO_PTR(numerical_sort_helper(args[0].u_obj, args[1].u_obj, 0));
+
+    if((args[1].u_obj == mp_const_none) || (ndarray->ndim == 1)) {
+        // at this point, the array holding the sorted values should be flat
+        uint8_t *array = (uint8_t *)ndarray->array;
+        size_t len = ndarray->len;
+        array += (len >> 1) * ndarray->itemsize;
+        mp_float_t median = ndarray_get_float_value(array, ndarray->dtype);
+        if(!(len & 0x01)) { // len is an even number
+            array -= ndarray->itemsize;
+            median += ndarray_get_float_value(array, ndarray->dtype);
+            median *= MICROPY_FLOAT_CONST(0.5);
+        }
+        return mp_obj_new_float(median);
+    } else {
+        int8_t ax = tools_get_axis(args[1].u_obj, ndarray->ndim);
+
+        size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+        int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+        numerical_reduce_axes(ndarray, ax, shape, strides);
+
+        ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+        ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim-1, shape, NDARRAY_FLOAT);
+        m_del(size_t, shape, ULAB_MAX_DIMS);
+
+        mp_float_t *rarray = (mp_float_t *)results->array;
+
+        uint8_t *array = (uint8_t *)ndarray->array;
+
+        size_t len = ndarray->shape[ax];
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                size_t k = 0;
+                do {
+                    array += ndarray->strides[ax] * (len >> 1);
+                    mp_float_t median = ndarray_get_float_value(array, ndarray->dtype);
+                    if(!(len & 0x01)) { // len is an even number
+                        array -= ndarray->strides[ax];
+                        median += ndarray_get_float_value(array, ndarray->dtype);
+                        median *= MICROPY_FLOAT_CONST(0.5);
+                        array += ndarray->strides[ax];
+                    }
+                    array -= ndarray->strides[ax] * (len >> 1);
+                    array += strides[ULAB_MAX_DIMS - 1];
+                    *rarray = median;
+                    rarray++;
+                    k++;
+                } while(k < shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 2
+                array -= strides[ULAB_MAX_DIMS - 1] * shape[ULAB_MAX_DIMS - 1];
+                array += strides[ULAB_MAX_DIMS - 2];
+                j++;
+            } while(j < shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= strides[ULAB_MAX_DIMS - 2] * shape[ULAB_MAX_DIMS-2];
+            array += strides[ULAB_MAX_DIMS - 3];
+            i++;
+        } while(i < shape[ULAB_MAX_DIMS - 3]);
+        #endif
+
+        return MP_OBJ_FROM_PTR(results);
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_median_obj, 1, numerical_median);
+#endif
+
+#if ULAB_NUMPY_HAS_MINMAX
+//| def min(array: _ArrayLike, *, axis: Optional[int] = None) -> _float:
+//|     """Return the minimum element of the 1D array"""
+//|     ...
+//|
+
+mp_obj_t numerical_min(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_MIN);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_min_obj, 1, numerical_min);
+#endif
+
+#if ULAB_NUMPY_HAS_ROLL
+//| def roll(array: ulab.numpy.ndarray, distance: int, *, axis: Optional[int] = None) -> None:
+//|     """Shift the content of a vector by the positions given as the second
+//|        argument. If the ``axis`` keyword is supplied, the shift is applied to
+//|        the given axis.  The array is modified in place."""
+//|     ...
+//|
+
+mp_obj_t numerical_roll(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE  } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("roll argument must be an ndarray"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype);
+
+    int32_t shift = mp_obj_get_int(args[1].u_obj);
+
+    if(shift == 0) {
+        ndarray_copy_array(ndarray, results, 0);
+        return MP_OBJ_FROM_PTR(results);
+    }
+
+    int32_t _shift = shift < 0 ? -shift : shift;
+
+    size_t counter;
+    uint8_t *array = ndarray->array;
+    uint8_t *rarray = (uint8_t *)results->array;
+
+    if(args[2].u_obj == mp_const_none) { // roll the flattened array
+        _shift = _shift % results->len;
+        if(shift > 0) { // shift to the right
+            rarray += _shift * results->itemsize;
+            counter = results->len - _shift;
+        } else { // shift to the left
+            rarray += (results->len - _shift) * results->itemsize;
+            counter = _shift;
+        }
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        memcpy(rarray, array, ndarray->itemsize);
+                        rarray += results->itemsize;
+                        array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                        if(--counter == 0) {
+                            rarray = results->array;
+                        }
+                    } while(l <  ndarray->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                    array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k <  ndarray->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+                array += ndarray->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j <  ndarray->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+            array += ndarray->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+    } else if(mp_obj_is_int(args[2].u_obj)){
+        int8_t ax = tools_get_axis(args[2].u_obj, ndarray->ndim);
+
+        size_t *shape = m_new0(size_t, ULAB_MAX_DIMS);
+        int32_t *strides = m_new0(int32_t, ULAB_MAX_DIMS);
+        numerical_reduce_axes(ndarray, ax, shape, strides);
+
+        size_t *rshape = m_new0(size_t, ULAB_MAX_DIMS);
+        int32_t *rstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+        numerical_reduce_axes(results, ax, rshape, rstrides);
+
+        ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+        uint8_t *_rarray;
+        _shift = _shift % results->shape[ax];
+
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    _rarray = rarray;
+                    if(shift < 0) {
+                        rarray += (results->shape[ax] - _shift) * results->strides[ax];
+                        counter = _shift;
+                    } else {
+                        rarray += _shift * results->strides[ax];
+                        counter = results->shape[ax] - _shift;
+                    }
+                    do {
+                        memcpy(rarray, array, ndarray->itemsize);
+                        array += ndarray->strides[ax];
+                        rarray += results->strides[ax];
+                        if(--counter == 0) {
+                            rarray = _rarray;
+                        }
+                        l++;
+                    } while(l < ndarray->shape[ax]);
+                #if ULAB_MAX_DIMS > 1
+                    rarray = _rarray;
+                    rarray += rstrides[ULAB_MAX_DIMS - 1];
+                    array -= ndarray->strides[ax] * ndarray->shape[ax];
+                    array += strides[ULAB_MAX_DIMS - 1];
+                    k++;
+                } while(k < shape[ULAB_MAX_DIMS - 1]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                rarray -= rstrides[ULAB_MAX_DIMS - 1] * rshape[ULAB_MAX_DIMS-1];
+                rarray += rstrides[ULAB_MAX_DIMS - 2];
+                array -= strides[ULAB_MAX_DIMS - 1] * shape[ULAB_MAX_DIMS-1];
+                array += strides[ULAB_MAX_DIMS - 2];
+                j++;
+            } while(j < shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            rarray -= rstrides[ULAB_MAX_DIMS - 2] * rshape[ULAB_MAX_DIMS-2];
+            rarray += rstrides[ULAB_MAX_DIMS - 3];
+            array -= strides[ULAB_MAX_DIMS - 2] * shape[ULAB_MAX_DIMS-2];
+            array += strides[ULAB_MAX_DIMS - 3];
+            i++;
+        } while(i < shape[ULAB_MAX_DIMS - 3]);
+        #endif
+
+        m_del(size_t, shape, ULAB_MAX_DIMS);
+        m_del(int32_t, strides, ULAB_MAX_DIMS);
+        m_del(size_t, rshape, ULAB_MAX_DIMS);
+        m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong axis index"));
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_roll_obj, 2, numerical_roll);
+#endif
+
+#if ULAB_NUMPY_HAS_SORT
+//| def sort(array: ulab.numpy.ndarray, *, axis: int = -1) -> ulab.numpy.ndarray:
+//|     """Sort the array along the given axis, or along all axes if axis is None.
+//|        The array is modified in place."""
+//|     ...
+//|
+
+mp_obj_t numerical_sort(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    return numerical_sort_helper(args[0].u_obj, args[1].u_obj, 0);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_sort_obj, 1, numerical_sort);
+#endif
+
+#if NDARRAY_HAS_SORT
+// method of an ndarray
+static mp_obj_t numerical_sort_inplace(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_int = -1 } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    return numerical_sort_helper(args[0].u_obj, args[1].u_obj, 1);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_sort_inplace_obj, 1, numerical_sort_inplace);
+#endif /* NDARRAY_HAS_SORT */
+
+#if ULAB_NUMPY_HAS_STD
+//| def std(array: _ArrayLike, *, axis: Optional[int] = None, ddof: int = 0) -> _float:
+//|     """Return the standard deviation of the array, as a number if axis is None, otherwise as an array."""
+//|     ...
+//|
+
+mp_obj_t numerical_std(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } } ,
+        { MP_QSTR_axis, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_ddof, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 0} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t oin = args[0].u_obj;
+    mp_obj_t axis = args[1].u_obj;
+    size_t ddof = args[2].u_int;
+    if((axis != mp_const_none) && (mp_obj_get_int(axis) != 0) && (mp_obj_get_int(axis) != 1)) {
+        // this seems to pass with False, and True...
+        mp_raise_ValueError(MP_ERROR_TEXT("axis must be None, or an integer"));
+    }
+    if(mp_obj_is_type(oin, &mp_type_tuple) || mp_obj_is_type(oin, &mp_type_list) || mp_obj_is_type(oin, &mp_type_range)) {
+        return numerical_sum_mean_std_iterable(oin, NUMERICAL_STD, ddof);
+    } else if(mp_obj_is_type(oin, &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);
+        return numerical_sum_mean_std_ndarray(ndarray, axis, NUMERICAL_STD, ddof);
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be tuple, list, range, or ndarray"));
+    }
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_std_obj, 1, numerical_std);
+#endif
+
+#if ULAB_NUMPY_HAS_SUM
+//| def sum(array: _ArrayLike, *, axis: Optional[int] = None) -> Union[_float, int, ulab.numpy.ndarray]:
+//|     """Return the sum of the array, as a number if axis is None, otherwise as an array."""
+//|     ...
+//|
+
+mp_obj_t numerical_sum(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_SUM);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(numerical_sum_obj, 1, numerical_sum);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/numerical.h b/tulip/shared/ulab/code/numpy/numerical.h
new file mode 100644
index 000000000..186c817b0
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/numerical.h
@@ -0,0 +1,653 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _NUMERICAL_
+#define _NUMERICAL_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+// TODO: implement cumsum
+
+#define RUN_ARGMIN1(ndarray, type, array, results, rarray, index, op)\
+({\
+    uint16_t best_index = 0;\
+    type best_value = *((type *)(array));\
+    if(((op) == NUMERICAL_MAX) || ((op) == NUMERICAL_ARGMAX)) {\
+        for(uint16_t i=0; i < (ndarray)->shape[(index)]; i++) {\
+            if(*((type *)(array)) > best_value) {\
+                best_index = i;\
+                best_value = *((type *)(array));\
+            }\
+            (array) += (ndarray)->strides[(index)];\
+        }\
+    } else {\
+        for(uint16_t i=0; i < (ndarray)->shape[(index)]; i++) {\
+            if(*((type *)(array)) < best_value) {\
+                best_index = i;\
+                best_value = *((type *)(array));\
+            }\
+            (array) += (ndarray)->strides[(index)];\
+        }\
+    }\
+    if(((op) == NUMERICAL_ARGMAX) || ((op) == NUMERICAL_ARGMIN)) {\
+        memcpy((rarray), &best_index, (results)->itemsize);\
+    } else {\
+        memcpy((rarray), &best_value, (results)->itemsize);\
+    }\
+    (rarray) += (results)->itemsize;\
+})
+
+#define RUN_SUM1(type, array, results, rarray, ss)\
+({\
+    type sum = 0;\
+    for(size_t i=0; i < (ss).shape[0]; i++) {\
+        sum += *((type *)(array));\
+        (array) += (ss).strides[0];\
+    }\
+    memcpy((rarray), &sum, (results)->itemsize);\
+    (rarray) += (results)->itemsize;\
+})
+
+// The mean could be calculated by simply dividing the sum by
+// the number of elements, but that method is numerically unstable
+#define RUN_MEAN1(type, array, rarray, ss)\
+({\
+    mp_float_t M = 0.0;\
+    for(size_t i=0; i < (ss).shape[0]; i++) {\
+        mp_float_t value = (mp_float_t)(*(type *)(array));\
+        M = M + (value - M) / (mp_float_t)(i+1);\
+        (array) += (ss).strides[0];\
+    }\
+    *(rarray)++ = M;\
+})
+
+// Instead of the straightforward implementation of the definition,
+// we take the numerically stable Welford algorithm here
+// https://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
+#define RUN_STD1(type, array, rarray, ss, div)\
+({\
+    mp_float_t M = 0.0, m = 0.0, S = 0.0;\
+    for(size_t i=0; i < (ss).shape[0]; i++) {\
+        mp_float_t value = (mp_float_t)(*(type *)(array));\
+        m = M + (value - M) / (mp_float_t)(i+1);\
+        S = S + (value - M) * (value - m);\
+        M = m;\
+        (array) += (ss).strides[0];\
+    }\
+    *(rarray)++ = MICROPY_FLOAT_C_FUN(sqrt)(S / (div));\
+})
+
+#define RUN_MEAN_STD1(type, array, rarray, ss, div, isStd)\
+({\
+    mp_float_t M = 0.0, m = 0.0, S = 0.0;\
+    for(size_t i=0; i < (ss).shape[0]; i++) {\
+        mp_float_t value = (mp_float_t)(*(type *)(array));\
+        m = M + (value - M) / (mp_float_t)(i+1);\
+        if(isStd) {\
+            S += (value - M) * (value - m);\
+        }\
+        M = m;\
+        (array) += (ss).strides[0];\
+    }\
+    *(rarray)++ = isStd ? MICROPY_FLOAT_C_FUN(sqrt)(S / (div)) : M;\
+})
+
+#define RUN_DIFF1(ndarray, type, array, results, rarray, index, stencil, N)\
+({\
+    for(size_t i=0; i < (results)->shape[ULAB_MAX_DIMS - 1]; i++) {\
+        type sum = 0;\
+        uint8_t *source = (array);\
+        for(uint8_t d=0; d < (N)+1; d++) {\
+            sum -= (stencil)[d] * *((type *)source);\
+            source += (ndarray)->strides[(index)];\
+        }\
+        (array) += (ndarray)->strides[ULAB_MAX_DIMS - 1];\
+        *(type *)(rarray) = sum;\
+        (rarray) += (results)->itemsize;\
+    }\
+})
+
+#define HEAPSORT1(type, array, increment, N)\
+({\
+    type *_array = (type *)array;\
+    type tmp;\
+    size_t c, q = (N), p, r = (N) >> 1;\
+    for (;;) {\
+        if (r > 0) {\
+            tmp = _array[(--r)*(increment)];\
+        } else {\
+            q--;\
+            if(q == 0) {\
+                break;\
+            }\
+            tmp = _array[q*(increment)];\
+            _array[q*(increment)] = _array[0];\
+        }\
+        p = r;\
+        c = r + r + 1;\
+        while (c < q) {\
+            if((c + 1 < q)  &&  (_array[(c+1)*(increment)] > _array[c*(increment)])) {\
+                c++;\
+            }\
+            if(_array[c*(increment)] > tmp) {\
+                _array[p*(increment)] = _array[c*(increment)];\
+                p = c;\
+                c = p + p + 1;\
+            } else {\
+                break;\
+            }\
+        }\
+        _array[p*(increment)] = tmp;\
+    }\
+})
+
+#define HEAP_ARGSORT1(type, array, increment, N, iarray, iincrement)\
+({\
+    type *_array = (type *)array;\
+    type tmp;\
+    uint16_t itmp, c, q = (N), p, r = (N) >> 1;\
+    assert(N);\
+    for (;;) {\
+        if (r > 0) {\
+            r--;\
+            itmp = (iarray)[r*(iincrement)];\
+            tmp = _array[itmp*(increment)];\
+        } else {\
+            q--;\
+            if(q == 0) {\
+                break;\
+            }\
+            itmp = (iarray)[q*(iincrement)];\
+            tmp = _array[itmp*(increment)];\
+            (iarray)[q*(iincrement)] = (iarray)[0];\
+        }\
+        p = r;\
+        c = r + r + 1;\
+        while (c < q) {\
+            if((c + 1 < q)  &&  (_array[(iarray)[(c+1)*(iincrement)]*(increment)] > _array[(iarray)[c*(iincrement)]*(increment)])) {\
+                c++;\
+            }\
+            if(_array[(iarray)[c*(iincrement)]*(increment)] > tmp) {\
+                (iarray)[p*(iincrement)] = (iarray)[c*(iincrement)];\
+                p = c;\
+                c = p + p + 1;\
+            } else {\
+                break;\
+            }\
+        }\
+        (iarray)[p*(iincrement)] = itmp;\
+    }\
+})
+
+#if ULAB_MAX_DIMS == 1
+#define RUN_SUM(type, array, results, rarray, ss) do {\
+    RUN_SUM1(type, (array), (results), (rarray), (ss));\
+} while(0)
+
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    RUN_MEAN1(type, (array), (rarray), (ss));\
+} while(0)
+
+#define RUN_STD(type, array, rarray, ss, div) do {\
+    RUN_STD1(type, (array), (results), (rarray), (ss), (div));\
+} while(0)
+
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
+    RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
+} while(0)
+
+#define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
+    RUN_ARGMIN1((ndarray), type, (array), (results), (rarray), (index), (op));\
+} while(0)
+
+#define RUN_DIFF(ndarray, type, array, results, rarray, shape, strides, index, stencil, N) do {\
+    RUN_DIFF1((ndarray), type, (array), (results), (rarray), (index), (stencil), (N));\
+} while(0)
+
+#define HEAPSORT(ndarray, type, array, shape, strides, index, increment, N) do {\
+    HEAPSORT1(type, (array), (increment), (N));\
+} while(0)
+
+#define HEAP_ARGSORT(ndarray, type, array, shape, strides, index, increment, N, iarray, istrides, iincrement) do {\
+    HEAP_ARGSORT1(type, (array), (increment), (N), (iarray), (iincrement));\
+} while(0)
+
+#endif
+
+#if ULAB_MAX_DIMS == 2
+#define RUN_SUM(type, array, results, rarray, ss) do {\
+    size_t l = 0;\
+    do {\
+        RUN_SUM1(type, (array), (results), (rarray), (ss));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    size_t l = 0;\
+    do {\
+        RUN_MEAN1(type, (array), (rarray), (ss));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define RUN_STD(type, array, rarray, ss, div) do {\
+    size_t l = 0;\
+    do {\
+        RUN_STD1(type, (array), (rarray), (ss), (div));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
+    size_t l = 0;\
+    do {\
+        RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+
+#define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
+    size_t l = 0;\
+    do {\
+        RUN_ARGMIN1((ndarray), type, (array), (results), (rarray), (index), (op));\
+        (array) -= (ndarray)->strides[(index)] * (ndarray)->shape[(index)];\
+        (array) += (strides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define RUN_DIFF(ndarray, type, array, results, rarray, shape, strides, index, stencil, N) do {\
+    size_t l = 0;\
+    do {\
+        RUN_DIFF1((ndarray), type, (array), (results), (rarray), (index), (stencil), (N));\
+        (array) -= (ndarray)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ndarray)->strides[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define HEAPSORT(ndarray, type, array, shape, strides, index, increment, N) do {\
+    size_t l = 0;\
+    do {\
+        HEAPSORT1(type, (array), (increment), (N));\
+        (array) += (strides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define HEAP_ARGSORT(ndarray, type, array, shape, strides, index, increment, N, iarray, istrides, iincrement) do {\
+    size_t l = 0;\
+    do {\
+        HEAP_ARGSORT1(type, (array), (increment), (N), (iarray), (iincrement));\
+        (array) += (strides)[ULAB_MAX_DIMS - 1];\
+        (iarray) += (istrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#endif
+
+#if ULAB_MAX_DIMS == 3
+#define RUN_SUM(type, array, results, rarray, ss) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_SUM1(type, (array), (results), (rarray), (ss));\
+            (array) -= (ss).strides[0] * (ss).shape[0];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_MEAN1(type, (array), (rarray), (ss));\
+            (array) -= (ss).strides[0] * (ss).shape[0];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_STD(type, array, rarray, ss, div) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_STD1(type, (array), (rarray), (ss), (div));\
+            (array) -= (ss).strides[0] * (ss).shape[0];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
+            (array) -= (ss).strides[0] * (ss).shape[0];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_ARGMIN1((ndarray), type, (array), (results), (rarray), (index), (op));\
+            (array) -= (ndarray)->strides[(index)] * (ndarray)->shape[(index)];\
+            (array) += (strides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+        (array) -= (strides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+        (array) += (strides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (shape)[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_DIFF(ndarray, type, array, results, rarray, shape, strides, index, stencil, N) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            RUN_DIFF1((ndarray), type, (array), (results), (rarray), (index), (stencil), (N));\
+            (array) -= (ndarray)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+			(array) += (ndarray)->strides[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (rarray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+            l++;\
+        } while(l < (shape)[ULAB_MAX_DIMS - 2]);\
+        (array) -= (ndarray)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS-2];\
+        (array) += (ndarray)->strides[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+        k++;\
+    } while(k < (shape)[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define HEAPSORT(ndarray, type, array, shape, strides, index, increment, N) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            HEAPSORT1(type, (array), (increment), (N));\
+            (array) += (strides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+        (array) -= (strides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+        (array) += (strides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (shape)[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define HEAP_ARGSORT(ndarray, type, array, shape, strides, index, increment, N, iarray, istrides, iincrement) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            HEAP_ARGSORT1(type, (array), (increment), (N), (iarray), (iincrement));\
+            (array) += (strides)[ULAB_MAX_DIMS - 1];\
+            (iarray) += (istrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+        (iarray) -= (istrides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+        (iarray) += (istrides)[ULAB_MAX_DIMS - 2];\
+        (array) -= (strides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+        (array) += (strides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (shape)[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#endif
+
+#if ULAB_MAX_DIMS == 4
+#define RUN_SUM(type, array, results, rarray, ss) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_SUM1(type, (array), (results), (rarray), (ss));\
+                (array) -= (ss).strides[0] * (ss).shape[0];\
+                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define RUN_MEAN(type, array, rarray, ss) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_MEAN1(type, (array), (rarray), (ss));\
+                (array) -= (ss).strides[0] * (ss).shape[0];\
+                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define RUN_STD(type, array, rarray, ss, div) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_STD1(type, (array), (rarray), (ss), (div));\
+                (array) -= (ss).strides[0] * (ss).shape[0];\
+                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
+                (array) -= (ss).strides[0] * (ss).shape[0];\
+                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_ARGMIN1((ndarray), type, (array), (results), (rarray), (index), (op));\
+                (array) -= (ndarray)->strides[(index)] * (ndarray)->shape[(index)];\
+                (array) += (strides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+            (array) -= (strides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+            (array) += (strides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (shape)[ULAB_MAX_DIMS - 2]);\
+        (array) -= (strides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];\
+        (array) += (strides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (shape)[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define RUN_DIFF(ndarray, type, array, results, rarray, shape, strides, index, stencil, N) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                RUN_DIFF1((ndarray), type, (array), (results), (rarray), (index), (stencil), (N));\
+                (array) -= (ndarray)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (array) += (ndarray)->strides[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (results)->strides[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (rarray) += (results)->strides[ULAB_MAX_DIMS - 2];\
+                l++;\
+            } while(l < (shape)[ULAB_MAX_DIMS - 2]);\
+            (array) -= (strides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];\
+            (array) += (strides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (results)->strides[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (rarray) += (results)->strides[ULAB_MAX_DIMS - 3];\
+            k++;\
+        } while(k < (shape)[ULAB_MAX_DIMS - 3]);\
+        (array) -= (strides)[ULAB_MAX_DIMS - 3] * (shape)[ULAB_MAX_DIMS-3];\
+        (array) += (strides)[ULAB_MAX_DIMS - 4];\
+        (rarray) -= (results)->strides[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+        (rarray) += (results)->strides[ULAB_MAX_DIMS - 4];\
+        j++;\
+    } while(j < (shape)[ULAB_MAX_DIMS - 4]);\
+} while(0)
+
+#define HEAPSORT(ndarray, type, array, shape, strides, index, increment, N) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                HEAPSORT1(type, (array), (increment), (N));\
+                (array) += (strides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+            (array) -= (strides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+            (array) += (strides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (shape)[ULAB_MAX_DIMS - 2]);\
+        (array) -= (strides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];\
+        (array) += (strides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (shape)[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#define HEAP_ARGSORT(ndarray, type, array, shape, strides, index, increment, N, iarray, istrides, iincrement) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                HEAP_ARGSORT1(type, (array), (increment), (N), (iarray), (iincrement));\
+                (array) += (strides)[ULAB_MAX_DIMS - 1];\
+                (iarray) += (istrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (shape)[ULAB_MAX_DIMS - 1]);\
+            (iarray) -= (istrides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+            (iarray) += (istrides)[ULAB_MAX_DIMS - 2];\
+            (array) -= (strides)[ULAB_MAX_DIMS - 1] * (shape)[ULAB_MAX_DIMS-1];\
+            (array) += (strides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (shape)[ULAB_MAX_DIMS - 2]);\
+        (iarray) -= (istrides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];\
+        (iarray) += (istrides)[ULAB_MAX_DIMS - 3];\
+        (array) -= (strides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];\
+        (array) += (strides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (shape)[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#endif
+
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_all_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_any_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_argmax_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_argmin_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_argsort_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(numerical_cross_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_diff_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_flip_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_max_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_mean_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_median_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_min_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_roll_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_std_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_sum_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_sort_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(numerical_sort_inplace_obj);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/numpy.c b/tulip/shared/ulab/code/numpy/numpy.c
new file mode 100644
index 000000000..eafd7728a
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/numpy.c
@@ -0,0 +1,408 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020-2022 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <string.h>
+#include "py/runtime.h"
+
+#include "numpy.h"
+#include "approx.h"
+#include "bitwise.h"
+#include "carray/carray.h"
+#include "compare.h"
+#include "create.h"
+#include "fft/fft.h"
+#include "filter.h"
+#include "io/io.h"
+#include "linalg/linalg.h"
+#include "numerical.h"
+#include "random/random.h"
+#include "stats.h"
+#include "transform.h"
+#include "poly.h"
+#include "vector.h"
+
+//| """Compatibility layer for numpy"""
+//|
+
+//| class ndarray: ...
+
+//| def get_printoptions() -> Dict[str, int]:
+//|     """Get printing options"""
+//|     ...
+//|
+//| def set_printoptions(threshold: Optional[int] = None, edgeitems: Optional[int] = None) -> None:
+//|     """Set printing options"""
+//|     ...
+//|
+//| def ndinfo(array: ulab.numpy.ndarray) -> None:
+//|     ...
+//|
+//| def array(
+//|     values: Union[ndarray, Iterable[Union[_float, _bool, Iterable[Any]]]],
+//|     *,
+//|     dtype: _DType = ulab.numpy.float
+//| ) -> ulab.numpy.ndarray:
+//|     """alternate constructor function for `ulab.numpy.ndarray`. Mirrors numpy.array"""
+//|     ...
+
+// math constants
+#if ULAB_NUMPY_HAS_E
+ULAB_DEFINE_FLOAT_CONST(ulab_const_float_e, MP_E, 0x402df854UL, 0x4005bf0a8b145769ULL);
+#endif
+
+#if ULAB_NUMPY_HAS_INF
+ULAB_DEFINE_FLOAT_CONST(numpy_const_float_inf, (mp_float_t)INFINITY, 0x7f800000UL, 0x7ff0000000000000ULL);
+#endif
+
+#if ULAB_NUMPY_HAS_NAN
+ULAB_DEFINE_FLOAT_CONST(numpy_const_float_nan, (mp_float_t)NAN, 0x7fc00000UL, 0x7ff8000000000000ULL);
+#endif
+
+#if ULAB_NUMPY_HAS_PI
+ULAB_DEFINE_FLOAT_CONST(ulab_const_float_pi, MP_PI, 0x40490fdbUL, 0x400921fb54442d18ULL);
+#endif
+
+static const mp_rom_map_elem_t ulab_numpy_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_numpy) },
+    { MP_ROM_QSTR(MP_QSTR_ndarray), MP_ROM_PTR(&ulab_ndarray_type) },
+    { MP_ROM_QSTR(MP_QSTR_array), MP_ROM_PTR(&ndarray_array_constructor_obj) },
+    #if ULAB_NUMPY_HAS_FROMBUFFER
+        { MP_ROM_QSTR(MP_QSTR_frombuffer), MP_ROM_PTR(&create_frombuffer_obj) },
+    #endif
+    // math constants
+    #if ULAB_NUMPY_HAS_E
+        { MP_ROM_QSTR(MP_QSTR_e), ULAB_REFERENCE_FLOAT_CONST(ulab_const_float_e) },
+    #endif
+    #if ULAB_NUMPY_HAS_INF
+        { MP_ROM_QSTR(MP_QSTR_inf), ULAB_REFERENCE_FLOAT_CONST(numpy_const_float_inf) },
+    #endif
+    #if ULAB_NUMPY_HAS_NAN
+        { MP_ROM_QSTR(MP_QSTR_nan), ULAB_REFERENCE_FLOAT_CONST(numpy_const_float_nan) },
+    #endif
+    #if ULAB_NUMPY_HAS_PI
+        { MP_ROM_QSTR(MP_QSTR_pi), ULAB_REFERENCE_FLOAT_CONST(ulab_const_float_pi) },
+    #endif
+    // class constants, always included
+    { MP_ROM_QSTR(MP_QSTR_bool), MP_ROM_INT(NDARRAY_BOOL) },
+    { MP_ROM_QSTR(MP_QSTR_uint8), MP_ROM_INT(NDARRAY_UINT8) },
+    { MP_ROM_QSTR(MP_QSTR_int8), MP_ROM_INT(NDARRAY_INT8) },
+    { MP_ROM_QSTR(MP_QSTR_uint16), MP_ROM_INT(NDARRAY_UINT16) },
+    { MP_ROM_QSTR(MP_QSTR_int16), MP_ROM_INT(NDARRAY_INT16) },
+    { MP_ROM_QSTR(MP_QSTR_float), MP_ROM_INT(NDARRAY_FLOAT) },
+    #if ULAB_SUPPORTS_COMPLEX
+        { MP_ROM_QSTR(MP_QSTR_complex), MP_ROM_INT(NDARRAY_COMPLEX) },
+    #endif
+    // modules of numpy
+    #if ULAB_NUMPY_HAS_FFT_MODULE
+        { MP_ROM_QSTR(MP_QSTR_fft), MP_ROM_PTR(&ulab_fft_module) },
+    #endif
+    #if ULAB_NUMPY_HAS_LINALG_MODULE
+        { MP_ROM_QSTR(MP_QSTR_linalg), MP_ROM_PTR(&ulab_linalg_module) },
+    #endif
+    #if ULAB_NUMPY_HAS_RANDOM_MODULE
+        { MP_ROM_QSTR(MP_QSTR_random), MP_ROM_PTR(&ulab_numpy_random_module) },
+    #endif
+    #if ULAB_HAS_PRINTOPTIONS
+        { MP_ROM_QSTR(MP_QSTR_set_printoptions), MP_ROM_PTR(&ndarray_set_printoptions_obj) },
+        { MP_ROM_QSTR(MP_QSTR_get_printoptions), MP_ROM_PTR(&ndarray_get_printoptions_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_NDINFO
+        { MP_ROM_QSTR(MP_QSTR_ndinfo), MP_ROM_PTR(&ndarray_info_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ARANGE
+        { MP_ROM_QSTR(MP_QSTR_arange), MP_ROM_PTR(&create_arange_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_COMPRESS
+        { MP_ROM_QSTR(MP_QSTR_compress), MP_ROM_PTR(&transform_compress_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_CONCATENATE
+        { MP_ROM_QSTR(MP_QSTR_concatenate), MP_ROM_PTR(&create_concatenate_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_DELETE
+        { MP_ROM_QSTR(MP_QSTR_delete), MP_ROM_PTR(&transform_delete_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_DIAG
+        #if ULAB_MAX_DIMS > 1
+            { MP_ROM_QSTR(MP_QSTR_diag), MP_ROM_PTR(&create_diag_obj) },
+        #endif
+    #endif
+    #if ULAB_NUMPY_HAS_EMPTY
+        { MP_ROM_QSTR(MP_QSTR_empty), MP_ROM_PTR(&create_zeros_obj) },
+    #endif
+    #if ULAB_MAX_DIMS > 1
+        #if ULAB_NUMPY_HAS_EYE
+            { MP_ROM_QSTR(MP_QSTR_eye), MP_ROM_PTR(&create_eye_obj) },
+        #endif
+    #endif /* ULAB_MAX_DIMS */
+    // functions of the approx sub-module
+    #if ULAB_NUMPY_HAS_INTERP
+        { MP_ROM_QSTR(MP_QSTR_interp), MP_ROM_PTR(&approx_interp_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_TRAPZ
+        { MP_ROM_QSTR(MP_QSTR_trapz), MP_ROM_PTR(&approx_trapz_obj) },
+    #endif
+    // functions of the create sub-module
+    #if ULAB_NUMPY_HAS_FULL
+        { MP_ROM_QSTR(MP_QSTR_full), MP_ROM_PTR(&create_full_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LINSPACE
+        { MP_ROM_QSTR(MP_QSTR_linspace), MP_ROM_PTR(&create_linspace_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LOGSPACE
+        { MP_ROM_QSTR(MP_QSTR_logspace), MP_ROM_PTR(&create_logspace_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ONES
+        { MP_ROM_QSTR(MP_QSTR_ones), MP_ROM_PTR(&create_ones_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ZEROS
+        { MP_ROM_QSTR(MP_QSTR_zeros), MP_ROM_PTR(&create_zeros_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_CLIP
+        { MP_ROM_QSTR(MP_QSTR_clip), MP_ROM_PTR(&compare_clip_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_EQUAL
+        { MP_ROM_QSTR(MP_QSTR_equal), MP_ROM_PTR(&compare_equal_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_NOTEQUAL
+        { MP_ROM_QSTR(MP_QSTR_not_equal), MP_ROM_PTR(&compare_not_equal_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ISFINITE
+        { MP_ROM_QSTR(MP_QSTR_isfinite), MP_ROM_PTR(&compare_isfinite_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ISINF
+        { MP_ROM_QSTR(MP_QSTR_isinf), MP_ROM_PTR(&compare_isinf_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_MAXIMUM
+        { MP_ROM_QSTR(MP_QSTR_maximum), MP_ROM_PTR(&compare_maximum_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_MINIMUM
+        { MP_ROM_QSTR(MP_QSTR_minimum), MP_ROM_PTR(&compare_minimum_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_NONZERO
+        { MP_ROM_QSTR(MP_QSTR_nonzero), MP_ROM_PTR(&compare_nonzero_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_WHERE
+        { MP_ROM_QSTR(MP_QSTR_where), MP_ROM_PTR(&compare_where_obj) },
+    #endif
+    // bitwise operators
+    #if ULAB_NUMPY_HAS_BITWISE_AND
+        { MP_ROM_QSTR(MP_QSTR_bitwise_and), MP_ROM_PTR(&bitwise_bitwise_and_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_BITWISE_OR
+        { MP_ROM_QSTR(MP_QSTR_bitwise_or), MP_ROM_PTR(&bitwise_bitwise_or_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_BITWISE_XOR
+        { MP_ROM_QSTR(MP_QSTR_bitwise_xor), MP_ROM_PTR(&bitwise_bitwise_xor_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LEFT_SHIFT
+        { MP_ROM_QSTR(MP_QSTR_left_shift), MP_ROM_PTR(&left_shift_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_RIGHT_SHIFT
+        { MP_ROM_QSTR(MP_QSTR_right_shift), MP_ROM_PTR(&right_shift_obj) },
+    #endif
+    // functions of the filter sub-module
+    #if ULAB_NUMPY_HAS_CONVOLVE
+        { MP_ROM_QSTR(MP_QSTR_convolve), MP_ROM_PTR(&filter_convolve_obj) },
+    #endif
+    // functions of the numerical sub-module
+    #if ULAB_NUMPY_HAS_ALL
+        { MP_ROM_QSTR(MP_QSTR_all), MP_ROM_PTR(&numerical_all_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ANY
+        { MP_ROM_QSTR(MP_QSTR_any), MP_ROM_PTR(&numerical_any_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ARGMINMAX
+        { MP_ROM_QSTR(MP_QSTR_argmax), MP_ROM_PTR(&numerical_argmax_obj) },
+        { MP_ROM_QSTR(MP_QSTR_argmin), MP_ROM_PTR(&numerical_argmin_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ARGSORT
+        { MP_ROM_QSTR(MP_QSTR_argsort), MP_ROM_PTR(&numerical_argsort_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ASARRAY
+        { MP_ROM_QSTR(MP_QSTR_asarray), MP_ROM_PTR(&create_asarray_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_CROSS
+        { MP_ROM_QSTR(MP_QSTR_cross), MP_ROM_PTR(&numerical_cross_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_DIFF
+        { MP_ROM_QSTR(MP_QSTR_diff), MP_ROM_PTR(&numerical_diff_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_DOT
+        #if ULAB_MAX_DIMS > 1
+            { MP_ROM_QSTR(MP_QSTR_dot), MP_ROM_PTR(&transform_dot_obj) },
+        #endif
+    #endif
+    #if ULAB_NUMPY_HAS_TRACE
+        #if ULAB_MAX_DIMS > 1
+            { MP_ROM_QSTR(MP_QSTR_trace), MP_ROM_PTR(&stats_trace_obj) },
+        #endif
+    #endif
+    #if ULAB_NUMPY_HAS_FLIP
+        { MP_ROM_QSTR(MP_QSTR_flip), MP_ROM_PTR(&numerical_flip_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LOAD
+        { MP_ROM_QSTR(MP_QSTR_load), MP_ROM_PTR(&io_load_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LOADTXT
+        { MP_ROM_QSTR(MP_QSTR_loadtxt), MP_ROM_PTR(&io_loadtxt_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_MINMAX
+        { MP_ROM_QSTR(MP_QSTR_max), MP_ROM_PTR(&numerical_max_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_MEAN
+        { MP_ROM_QSTR(MP_QSTR_mean), MP_ROM_PTR(&numerical_mean_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_MEDIAN
+        { MP_ROM_QSTR(MP_QSTR_median), MP_ROM_PTR(&numerical_median_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_MINMAX
+        { MP_ROM_QSTR(MP_QSTR_min), MP_ROM_PTR(&numerical_min_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ROLL
+        { MP_ROM_QSTR(MP_QSTR_roll), MP_ROM_PTR(&numerical_roll_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SAVE
+        { MP_ROM_QSTR(MP_QSTR_save), MP_ROM_PTR(&io_save_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SAVETXT
+        { MP_ROM_QSTR(MP_QSTR_savetxt), MP_ROM_PTR(&io_savetxt_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SIZE
+        { MP_ROM_QSTR(MP_QSTR_size), MP_ROM_PTR(&transform_size_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SORT
+        { MP_ROM_QSTR(MP_QSTR_sort), MP_ROM_PTR(&numerical_sort_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_STD
+        { MP_ROM_QSTR(MP_QSTR_std), MP_ROM_PTR(&numerical_std_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SUM
+        { MP_ROM_QSTR(MP_QSTR_sum), MP_ROM_PTR(&numerical_sum_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_TAKE
+        { MP_ROM_QSTR(MP_QSTR_take), MP_ROM_PTR(&create_take_obj) },
+    #endif
+    // functions of the poly sub-module
+    #if ULAB_NUMPY_HAS_POLYFIT
+        { MP_ROM_QSTR(MP_QSTR_polyfit), MP_ROM_PTR(&poly_polyfit_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_POLYVAL
+        { MP_ROM_QSTR(MP_QSTR_polyval), MP_ROM_PTR(&poly_polyval_obj) },
+    #endif
+    // functions of the vector sub-module
+    #if ULAB_NUMPY_HAS_ACOS
+    { MP_ROM_QSTR(MP_QSTR_acos), MP_ROM_PTR(&vector_acos_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ACOSH
+    { MP_ROM_QSTR(MP_QSTR_acosh), MP_ROM_PTR(&vector_acosh_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ARCTAN2
+    { MP_ROM_QSTR(MP_QSTR_arctan2), MP_ROM_PTR(&vector_arctan2_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_AROUND
+    { MP_ROM_QSTR(MP_QSTR_around), MP_ROM_PTR(&vector_around_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ASIN
+    { MP_ROM_QSTR(MP_QSTR_asin), MP_ROM_PTR(&vector_asin_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ASINH
+    { MP_ROM_QSTR(MP_QSTR_asinh), MP_ROM_PTR(&vector_asinh_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ATAN
+    { MP_ROM_QSTR(MP_QSTR_atan), MP_ROM_PTR(&vector_atan_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_ATANH
+    { MP_ROM_QSTR(MP_QSTR_atanh), MP_ROM_PTR(&vector_atanh_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_CEIL
+    { MP_ROM_QSTR(MP_QSTR_ceil), MP_ROM_PTR(&vector_ceil_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_COS
+    { MP_ROM_QSTR(MP_QSTR_cos), MP_ROM_PTR(&vector_cos_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_COSH
+    { MP_ROM_QSTR(MP_QSTR_cosh), MP_ROM_PTR(&vector_cosh_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_DEGREES
+    { MP_ROM_QSTR(MP_QSTR_degrees), MP_ROM_PTR(&vector_degrees_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_EXP
+    { MP_ROM_QSTR(MP_QSTR_exp), MP_ROM_PTR(&vector_exp_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_EXPM1
+    { MP_ROM_QSTR(MP_QSTR_expm1), MP_ROM_PTR(&vector_expm1_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_FLOOR
+    { MP_ROM_QSTR(MP_QSTR_floor), MP_ROM_PTR(&vector_floor_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LOG
+    { MP_ROM_QSTR(MP_QSTR_log), MP_ROM_PTR(&vector_log_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LOG10
+    { MP_ROM_QSTR(MP_QSTR_log10), MP_ROM_PTR(&vector_log10_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_LOG2
+    { MP_ROM_QSTR(MP_QSTR_log2), MP_ROM_PTR(&vector_log2_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_RADIANS
+    { MP_ROM_QSTR(MP_QSTR_radians), MP_ROM_PTR(&vector_radians_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SIN
+    { MP_ROM_QSTR(MP_QSTR_sin), MP_ROM_PTR(&vector_sin_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SINC
+    { MP_ROM_QSTR(MP_QSTR_sinc), MP_ROM_PTR(&vector_sinc_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SINH
+    { MP_ROM_QSTR(MP_QSTR_sinh), MP_ROM_PTR(&vector_sinh_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_SQRT
+    { MP_ROM_QSTR(MP_QSTR_sqrt), MP_ROM_PTR(&vector_sqrt_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_TAN
+    { MP_ROM_QSTR(MP_QSTR_tan), MP_ROM_PTR(&vector_tan_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_TANH
+    { MP_ROM_QSTR(MP_QSTR_tanh), MP_ROM_PTR(&vector_tanh_obj) },
+    #endif
+    #if ULAB_NUMPY_HAS_VECTORIZE
+    { MP_ROM_QSTR(MP_QSTR_vectorize), MP_ROM_PTR(&vector_vectorize_obj) },
+    #endif
+    #if ULAB_SUPPORTS_COMPLEX
+        #if ULAB_NUMPY_HAS_REAL
+        { MP_ROM_QSTR(MP_QSTR_real), MP_ROM_PTR(&carray_real_obj) },
+        #endif
+        #if ULAB_NUMPY_HAS_IMAG
+        { MP_ROM_QSTR(MP_QSTR_imag), MP_ROM_PTR(&carray_imag_obj) },
+        #endif
+        #if ULAB_NUMPY_HAS_CONJUGATE
+            { MP_ROM_QSTR(MP_QSTR_conjugate), MP_ROM_PTR(&carray_conjugate_obj) },
+        #endif
+        #if ULAB_NUMPY_HAS_SORT_COMPLEX
+            { MP_ROM_QSTR(MP_QSTR_sort_complex), MP_ROM_PTR(&carray_sort_complex_obj) },
+        #endif
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_numpy_globals, ulab_numpy_globals_table);
+
+const mp_obj_module_t ulab_numpy_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_numpy_globals,
+};
+
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_numpy, ulab_numpy_module);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/numpy.h b/tulip/shared/ulab/code/numpy/numpy.h
new file mode 100644
index 000000000..f1348f381
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/numpy.h
@@ -0,0 +1,21 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *               
+*/
+
+#ifndef _NUMPY_
+#define _NUMPY_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+extern const mp_obj_module_t ulab_numpy_module;
+
+#endif /* _NUMPY_ */
diff --git a/tulip/shared/ulab/code/numpy/poly.c b/tulip/shared/ulab/code/numpy/poly.c
new file mode 100644
index 000000000..8b8e14358
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/poly.c
@@ -0,0 +1,251 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *               2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020 Taku Fukada
+*/
+
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/objarray.h"
+
+#include "../ulab.h"
+#include "linalg/linalg_tools.h"
+#include "../ulab_tools.h"
+#include "carray/carray_tools.h"
+#include "poly.h"
+
+#if ULAB_NUMPY_HAS_POLYFIT
+
+mp_obj_t poly_polyfit(size_t n_args, const mp_obj_t *args) {
+    if(!ndarray_object_is_array_like(args[0])) {
+        mp_raise_ValueError(MP_ERROR_TEXT("input data must be an iterable"));
+    }
+    #if ULAB_SUPPORTS_COMPLEX
+    if(mp_obj_is_type(args[0], &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0]);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    }
+    #endif
+    size_t lenx = 0, leny = 0;
+    uint8_t deg = 0;
+    mp_float_t *x, *XT, *y, *prod;
+
+    if(n_args == 2) { // only the y values are supplied
+        // TODO: this is actually not enough: the first argument can very well be a matrix,
+        // in which case we are between the rock and a hard place
+        leny = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[0]));
+        deg = (uint8_t)mp_obj_get_int(args[1]);
+        if(leny < deg) {
+            mp_raise_ValueError(MP_ERROR_TEXT("more degrees of freedom than data points"));
+        }
+        lenx = leny;
+        x = m_new(mp_float_t, lenx); // assume uniformly spaced data points
+        for(size_t i=0; i < lenx; i++) {
+            x[i] = i;
+        }
+        y = m_new(mp_float_t, leny);
+        fill_array_iterable(y, args[0]);
+    } else /* n_args == 3 */ {
+        if(!ndarray_object_is_array_like(args[1])) {
+            mp_raise_ValueError(MP_ERROR_TEXT("input data must be an iterable"));
+        }
+        lenx = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[0]));
+        leny = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[1]));
+        if(lenx != leny) {
+            mp_raise_ValueError(MP_ERROR_TEXT("input vectors must be of equal length"));
+        }
+        deg = (uint8_t)mp_obj_get_int(args[2]);
+        if(leny < deg) {
+            mp_raise_ValueError(MP_ERROR_TEXT("more degrees of freedom than data points"));
+        }
+        x = m_new(mp_float_t, lenx);
+        fill_array_iterable(x, args[0]);
+        y = m_new(mp_float_t, leny);
+        fill_array_iterable(y, args[1]);
+    }
+
+    // one could probably express X as a function of XT,
+    // and thereby save RAM, because X is used only in the product
+    XT = m_new(mp_float_t, (deg+1)*leny); // XT is a matrix of shape (deg+1, len) (rows, columns)
+    for(size_t i=0; i < leny; i++) { // column index
+        XT[i+0*lenx] = 1.0; // top row
+        for(uint8_t j=1; j < deg+1; j++) { // row index
+            XT[i+j*leny] = XT[i+(j-1)*leny]*x[i];
+        }
+    }
+
+    prod = m_new(mp_float_t, (deg+1)*(deg+1)); // the product matrix is of shape (deg+1, deg+1)
+    mp_float_t sum;
+    for(uint8_t i=0; i < deg+1; i++) { // column index
+        for(uint8_t j=0; j < deg+1; j++) { // row index
+            sum = 0.0;
+            for(size_t k=0; k < lenx; k++) {
+                // (j, k) * (k, i)
+                // Note that the second matrix is simply the transpose of the first:
+                // X(k, i) = XT(i, k) = XT[k*lenx+i]
+                sum += XT[j*lenx+k]*XT[i*lenx+k]; // X[k*(deg+1)+i];
+            }
+            prod[j*(deg+1)+i] = sum;
+        }
+    }
+    if(!linalg_invert_matrix(prod, deg+1)) {
+        // Although X was a Vandermonde matrix, whose inverse is guaranteed to exist,
+        // we bail out here, if prod couldn't be inverted: if the values in x are not all
+        // distinct, prod is singular
+        m_del(mp_float_t, XT, (deg+1)*lenx);
+        m_del(mp_float_t, x, lenx);
+        m_del(mp_float_t, y, lenx);
+        m_del(mp_float_t, prod, (deg+1)*(deg+1));
+        mp_raise_ValueError(MP_ERROR_TEXT("could not invert Vandermonde matrix"));
+    }
+    // at this point, we have the inverse of X^T * X
+    // y is a column vector; x is free now, we can use it for storing intermediate values
+    for(uint8_t i=0; i < deg+1; i++) { // row index
+        sum = 0.0;
+        for(size_t j=0; j < lenx; j++) { // column index
+            sum += XT[i*lenx+j]*y[j];
+        }
+        x[i] = sum;
+    }
+    // XT is no longer needed
+    m_del(mp_float_t, XT, (deg+1)*leny);
+
+    ndarray_obj_t *beta = ndarray_new_linear_array(deg+1, NDARRAY_FLOAT);
+    mp_float_t *betav = (mp_float_t *)beta->array;
+    // x[0..(deg+1)] contains now the product X^T * y; we can get rid of y
+    m_del(mp_float_t, y, leny);
+
+    // now, we calculate beta, i.e., we apply prod = (X^T * X)^(-1) on x = X^T * y; x is a column vector now
+    for(uint8_t i=0; i < deg+1; i++) {
+        sum = 0.0;
+        for(uint8_t j=0; j < deg+1; j++) {
+            sum += prod[i*(deg+1)+j]*x[j];
+        }
+        betav[i] = sum;
+    }
+    m_del(mp_float_t, x, lenx);
+    m_del(mp_float_t, prod, (deg+1)*(deg+1));
+    for(uint8_t i=0; i < (deg+1)/2; i++) {
+        // We have to reverse the array, for the leading coefficient comes first.
+        SWAP(mp_float_t, betav[i], betav[deg-i]);
+    }
+    return MP_OBJ_FROM_PTR(beta);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(poly_polyfit_obj, 2, 3, poly_polyfit);
+#endif
+
+#if ULAB_NUMPY_HAS_POLYVAL
+
+static mp_float_t poly_eval(mp_float_t x, mp_float_t *p, uint8_t plen) {
+    mp_float_t y = p[0];
+    for(uint8_t j=0; j < plen-1; j++) {
+        y *= x;
+        y += p[j+1];
+    }
+    return y;
+}
+
+mp_obj_t poly_polyval(mp_obj_t o_p, mp_obj_t o_x) {
+    if(!ndarray_object_is_array_like(o_p)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input is not iterable"));
+    }
+    #if ULAB_SUPPORTS_COMPLEX
+    ndarray_obj_t *input;
+    if(mp_obj_is_type(o_p, &ulab_ndarray_type)) {
+        input = MP_OBJ_TO_PTR(o_p);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(input->dtype)
+    }
+    if(mp_obj_is_type(o_x, &ulab_ndarray_type)) {
+        input = MP_OBJ_TO_PTR(o_x);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(input->dtype)
+    }
+    #endif
+    // p had better be a one-dimensional standard iterable
+    size_t plen = (size_t)mp_obj_get_int(mp_obj_len_maybe(o_p));
+    mp_float_t *p = m_new(mp_float_t, plen);
+    mp_obj_iter_buf_t p_buf;
+    mp_obj_t p_item, p_iterable = mp_getiter(o_p, &p_buf);
+    uint8_t i = 0;
+    while((p_item = mp_iternext(p_iterable)) != MP_OBJ_STOP_ITERATION) {
+        p[i] = mp_obj_get_float(p_item);
+        i++;
+    }
+
+    if(!ndarray_object_is_array_like(o_x)) {
+        return mp_obj_new_float(poly_eval(mp_obj_get_float(o_x), p, plen));
+    }
+
+    // polynomials are going to be of type float, except, when both
+    // the coefficients and the independent variable are integers
+    ndarray_obj_t *ndarray;
+    if(mp_obj_is_type(o_x, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(o_x);
+        uint8_t *sarray = (uint8_t *)source->array;
+        ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+
+        mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+
+        // TODO: these loops are really nothing, but the re-impplementation of
+        // ITERATE_VECTOR from vectorise.c. We could pass a function pointer here
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        *array++ = poly_eval(func(sarray), p, plen);
+                        sarray += source->strides[ULAB_MAX_DIMS - 1];
+                        l++;
+                    } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                    sarray += source->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                #endif
+            #if ULAB_MAX_DIMS > 2
+                sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                sarray += source->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+            #endif
+        #if ULAB_MAX_DIMS > 3
+            sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+            sarray += source->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+        #endif
+    } else {
+        // o_x had better be a one-dimensional standard iterable
+        ndarray = ndarray_new_linear_array(mp_obj_get_int(mp_obj_len_maybe(o_x)), NDARRAY_FLOAT);
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        mp_obj_iter_buf_t x_buf;
+        mp_obj_t x_item, x_iterable = mp_getiter(o_x, &x_buf);
+        while ((x_item = mp_iternext(x_iterable)) != MP_OBJ_STOP_ITERATION) {
+            *array++ = poly_eval(mp_obj_get_float(x_item), p, plen);
+        }
+    }
+    m_del(mp_float_t, p, plen);
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(poly_polyval_obj, poly_polyval);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/poly.h b/tulip/shared/ulab/code/numpy/poly.h
new file mode 100644
index 000000000..59cb9f514
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/poly.h
@@ -0,0 +1,21 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _POLY_
+#define _POLY_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+MP_DECLARE_CONST_FUN_OBJ_VAR_BETWEEN(poly_polyfit_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(poly_polyval_obj);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/random/random.c b/tulip/shared/ulab/code/numpy/random/random.c
new file mode 100644
index 000000000..165f11b5e
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/random/random.c
@@ -0,0 +1,378 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2024 Zoltán Vörös
+*/
+
+#include <math.h>
+
+#include "py/builtin.h"
+#include "py/obj.h"
+#include "py/runtime.h"
+
+#include "random.h"
+
+ULAB_DEFINE_FLOAT_CONST(random_zero, MICROPY_FLOAT_CONST(0.0), 0UL, 0ULL);
+ULAB_DEFINE_FLOAT_CONST(random_one, MICROPY_FLOAT_CONST(1.0), 0x3f800000UL, 0x3ff0000000000000ULL);
+
+// methods of the Generator object
+static const mp_rom_map_elem_t random_generator_locals_dict_table[] = {
+    #if ULAB_NUMPY_RANDOM_HAS_NORMAL
+        { MP_ROM_QSTR(MP_QSTR_normal), MP_ROM_PTR(&random_normal_obj) },
+    #endif
+    #if ULAB_NUMPY_RANDOM_HAS_RANDOM
+        { MP_ROM_QSTR(MP_QSTR_random), MP_ROM_PTR(&random_random_obj) },
+    #endif
+    #if ULAB_NUMPY_RANDOM_HAS_UNIFORM
+        { MP_ROM_QSTR(MP_QSTR_uniform), MP_ROM_PTR(&random_uniform_obj) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(random_generator_locals_dict, random_generator_locals_dict_table);
+
+// random's Generator object is defined here
+#if defined(MP_DEFINE_CONST_OBJ_TYPE)
+MP_DEFINE_CONST_OBJ_TYPE(
+    random_generator_type,
+    MP_QSTR_generator,
+    MP_TYPE_FLAG_NONE,
+    print, random_generator_print,
+    make_new, random_generator_make_new,
+    locals_dict, &random_generator_locals_dict
+);
+#else
+const mp_obj_type_t random_generator_type = {
+    { &mp_type_type },
+    .name = MP_QSTR_generator,
+    .print = random_generator_print,
+    .make_new = random_generator_make_new,
+    .locals_dict = (mp_obj_dict_t*)&random_generator_locals_dict
+};
+#endif
+
+void random_generator_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
+    (void)kind;
+    random_generator_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    mp_printf(MP_PYTHON_PRINTER, "Gnerator() at 0x%p", self);
+}
+
+mp_obj_t random_generator_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+    (void) type;
+    mp_arg_check_num(n_args, n_kw, 0, 1, true);
+    mp_map_t kw_args;
+    mp_map_init_fixed_table(&kw_args, n_kw, args + n_args);
+
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+    mp_arg_val_t _args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, args, &kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, _args);
+
+
+    if(args[0] == mp_const_none) {
+        #ifndef MICROPY_PY_RANDOM_SEED_INIT_FUNC
+        mp_raise_ValueError(MP_ERROR_TEXT("no default seed"));
+        #else
+        random_generator_obj_t *generator = m_new_obj(random_generator_obj_t);
+        generator->base.type = &random_generator_type;
+        generator->state = MICROPY_PY_RANDOM_SEED_INIT_FUNC;
+        return MP_OBJ_FROM_PTR(generator);
+        #endif
+    } else if(mp_obj_is_int(args[0])) {
+        random_generator_obj_t *generator = m_new_obj(random_generator_obj_t);
+        generator->base.type = &random_generator_type;
+        generator->state = (size_t)mp_obj_get_int(args[0]);
+        return MP_OBJ_FROM_PTR(generator);
+    } else if(mp_obj_is_type(args[0], &mp_type_tuple)){
+        mp_obj_tuple_t *seeds = MP_OBJ_TO_PTR(args[0]);
+        mp_obj_t *items = m_new(mp_obj_t, seeds->len);
+
+        for(uint8_t i = 0; i < seeds->len; i++) {
+            random_generator_obj_t *generator = m_new_obj(random_generator_obj_t);
+            generator->base.type = &random_generator_type;
+            generator->state = (size_t)mp_obj_get_int(seeds->items[i]);
+            items[i] = generator;
+        }
+        return mp_obj_new_tuple(seeds->len, items);
+    } else {
+        mp_raise_TypeError(MP_ERROR_TEXT("argument must be None, an integer or a tuple of integers"));
+    }
+    // we should never end up here
+    return mp_const_none;
+}
+// END OF GENERATOR COMPONENTS
+
+
+static inline uint32_t pcg32_next(uint64_t *state) {
+    uint64_t old_state = *state;
+    *state = old_state * PCG_MULTIPLIER_64 + PCG_INCREMENT_64;
+    uint32_t value = (uint32_t)((old_state ^ (old_state >> 18)) >> 27);
+    int rot = old_state >> 59;
+    return rot ? (value >> rot) | (value << (32 - rot)) : value;
+}
+
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+static inline uint64_t pcg32_next64(uint64_t *state) {
+    uint64_t value = pcg32_next(state);
+    value <<= 32;
+    value |= pcg32_next(state);
+    return value;
+}
+#endif
+
+#if ULAB_NUMPY_RANDOM_HAS_NORMAL
+static mp_obj_t random_normal(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_loc, MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(random_zero) } },
+        { MP_QSTR_scale, MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(random_one) } },
+        { MP_QSTR_size, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    random_generator_obj_t *self = MP_OBJ_TO_PTR(args[0].u_obj);
+    mp_float_t loc = mp_obj_get_float(args[1].u_obj);
+    mp_float_t scale = mp_obj_get_float(args[2].u_obj);
+    mp_obj_t size = args[3].u_obj;
+
+    ndarray_obj_t *ndarray = NULL;
+    mp_float_t u, v, value;
+
+    if(size != mp_const_none) {
+        if(mp_obj_is_int(size)) {
+            ndarray = ndarray_new_linear_array((size_t)mp_obj_get_int(size), NDARRAY_FLOAT);
+        } else if(mp_obj_is_type(size, &mp_type_tuple)) {
+            mp_obj_tuple_t *_shape = MP_OBJ_TO_PTR(size);
+            if(_shape->len > ULAB_MAX_DIMS) {
+                mp_raise_ValueError(MP_ERROR_TEXT("maximum number of dimensions is " MP_STRINGIFY(ULAB_MAX_DIMS)));
+            }
+            ndarray = ndarray_new_ndarray_from_tuple(_shape, NDARRAY_FLOAT);
+        } else { // input type not supported
+            mp_raise_TypeError(MP_ERROR_TEXT("shape must be None, and integer or a tuple of integers"));
+        }
+    } else {
+        // return single value
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        uint32_t x = pcg32_next(&self->state);
+        u = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        x = pcg32_next(&self->state);
+        v = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        #else
+        uint64_t x = pcg32_next64(&self->state);
+        u = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        x = pcg32_next64(&self->state);
+        v = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        #endif
+        mp_float_t sqrt_log = MICROPY_FLOAT_C_FUN(sqrt)(-MICROPY_FLOAT_CONST(2.0) * MICROPY_FLOAT_C_FUN(log)(u));
+        value = sqrt_log * MICROPY_FLOAT_C_FUN(cos)(MICROPY_FLOAT_CONST(2.0) * MP_PI * v);
+        return mp_obj_new_float(loc + scale * value);
+    }
+
+    mp_float_t *array = (mp_float_t *)ndarray->array;
+
+    // numpy's random supports only dense output arrays, so we can simply
+    // loop through the elements in a linear fashion
+    for(size_t i = 0; i < ndarray->len; i = i + 2) {
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        uint32_t x = pcg32_next(&self->state);
+        u = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        x = pcg32_next(&self->state);
+        v = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        #else
+        uint64_t x = pcg32_next64(&self->state);
+        u = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        x = pcg32_next64(&self->state);
+        v = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        #endif
+        mp_float_t sqrt_log = MICROPY_FLOAT_C_FUN(sqrt)(-MICROPY_FLOAT_CONST(2.0) * MICROPY_FLOAT_C_FUN(log)(u));
+        value = sqrt_log * MICROPY_FLOAT_C_FUN(cos)(MICROPY_FLOAT_CONST(2.0) * MP_PI * v);
+        *array++ = loc + scale * value;
+        if((i & 1) == 0) {
+            value = sqrt_log * MICROPY_FLOAT_C_FUN(sin)(MICROPY_FLOAT_CONST(2.0) * MP_PI * v);
+            *array++ = loc + scale * value;
+        }
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(random_normal_obj, 1, random_normal);
+#endif /* ULAB_NUMPY_RANDOM_HAS_NORMAL */
+
+#if ULAB_NUMPY_RANDOM_HAS_RANDOM
+static mp_obj_t random_random(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_size, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    random_generator_obj_t *self = MP_OBJ_TO_PTR(args[0].u_obj);
+
+    mp_obj_t size = args[1].u_obj;
+    mp_obj_t out = args[2].u_obj;
+
+    ndarray_obj_t *ndarray = NULL;
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    uint8_t ndim = 1;
+
+    if(size != mp_const_none) {
+        if(mp_obj_is_int(size)) {
+            shape[ULAB_MAX_DIMS - 1] = (size_t)mp_obj_get_int(size);
+        } else if(mp_obj_is_type(size, &mp_type_tuple)) {
+            mp_obj_tuple_t *_shape = MP_OBJ_TO_PTR(size);
+            if(_shape->len > ULAB_MAX_DIMS) {
+                mp_raise_ValueError(MP_ERROR_TEXT("maximum number of dimensions is " MP_STRINGIFY(ULAB_MAX_DIMS)));
+            }
+            ndim = _shape->len;
+            for(size_t i = 0; i < ULAB_MAX_DIMS; i++) {
+                if(i >= ndim) {
+                    shape[ULAB_MAX_DIMS - 1 - i] = 0;
+                } else {
+                    shape[ULAB_MAX_DIMS - 1 - i] = mp_obj_get_int(_shape->items[i]);
+                }
+            }
+        } else { // input type not supported
+            mp_raise_TypeError(MP_ERROR_TEXT("shape must be None, and integer or a tuple of integers"));
+        }
+    }
+
+    if(out != mp_const_none) {
+        if(!mp_obj_is_type(out, &ulab_ndarray_type)) {
+            mp_raise_TypeError(MP_ERROR_TEXT("out has wrong type"));
+        }
+
+        ndarray = MP_OBJ_TO_PTR(out);
+
+        if(ndarray->dtype != NDARRAY_FLOAT) {
+            mp_raise_TypeError(MP_ERROR_TEXT("output array has wrong type"));
+        }
+        if(size != mp_const_none) {
+            for(uint8_t i = 0; i < ULAB_MAX_DIMS; i++) {
+                if(ndarray->shape[i] != shape[i]) {
+                    mp_raise_ValueError(MP_ERROR_TEXT("size must match out.shape when used together"));
+                }
+            }
+        }
+        if(!ndarray_is_dense(ndarray)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("output array must be contiguous"));
+        }
+    } else { // out == None
+        if(size != mp_const_none) {
+            ndarray = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+        } else {
+            // return single value
+            mp_float_t value;
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            uint32_t x = pcg32_next(&self->state);
+            value = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+            #else
+            uint64_t x = pcg32_next64(&self->state);
+            value = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+            #endif
+            return mp_obj_new_float(value);
+        }
+    }
+
+    mp_float_t *array = (mp_float_t *)ndarray->array;
+
+    // numpy's random supports only dense output arrays, so we can simply
+    // loop through the elements in a linear fashion
+    for(size_t i = 0; i < ndarray->len; i++) {
+
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        uint32_t x = pcg32_next(&self->state);
+        *array = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        #else
+        uint64_t x = pcg32_next64(&self->state);
+        *array = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        #endif
+
+        array++;
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(random_random_obj, 1, random_random);
+#endif /* ULAB_NUMPY_RANDOM_HAS_RANDOM */
+
+#if ULAB_NUMPY_RANDOM_HAS_UNIFORM
+static mp_obj_t random_uniform(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_low, MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(random_zero) } },
+        { MP_QSTR_high, MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(random_one) } },
+        { MP_QSTR_size, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    random_generator_obj_t *self = MP_OBJ_TO_PTR(args[0].u_obj);
+    mp_float_t low = mp_obj_get_float(args[1].u_obj);
+    mp_float_t high = mp_obj_get_float(args[2].u_obj);
+    mp_obj_t size = args[3].u_obj;
+
+    ndarray_obj_t *ndarray = NULL;
+
+    if(size == mp_const_none) {
+        // return single value
+        mp_float_t value;
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        uint32_t x = pcg32_next(&self->state);
+        value = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        #else
+        uint64_t x = pcg32_next64(&self->state);
+        value = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        #endif
+        return mp_obj_new_float(value);
+    } else if(mp_obj_is_type(size, &mp_type_tuple)) {
+        mp_obj_tuple_t *_shape = MP_OBJ_TO_PTR(size);
+        // TODO: this could be reduced, if the inspection was in the ndarray_new_ndarray_from_tuple function
+        if(_shape->len > ULAB_MAX_DIMS) {
+            mp_raise_ValueError(MP_ERROR_TEXT("maximum number of dimensions is " MP_STRINGIFY(ULAB_MAX_DIMS)));
+        }
+        ndarray = ndarray_new_ndarray_from_tuple(_shape, NDARRAY_FLOAT);
+    } else { // input type not supported
+        mp_raise_TypeError(MP_ERROR_TEXT("shape must be None, and integer or a tuple of integers"));
+    }
+
+    mp_float_t *array = (mp_float_t *)ndarray->array;
+    mp_float_t diff = high - low;
+    for(size_t i = 0; i < ndarray->len; i++) {
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        uint32_t x = pcg32_next(&self->state);
+        *array = (float)(int32_t)(x >> 8) * 0x1.0p-24f;
+        #else
+        uint64_t x = pcg32_next64(&self->state);
+        *array = (double)(int64_t)(x >> 11) * 0x1.0p-53;
+        #endif
+        *array = low + diff * *array;
+        array++;
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(random_uniform_obj, 1, random_uniform);
+#endif /* ULAB_NUMPY_RANDOM_HAS_UNIFORM */
+
+
+static const mp_rom_map_elem_t ulab_numpy_random_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_random) },
+    { MP_ROM_QSTR(MP_QSTR_Generator), MP_ROM_PTR(&random_generator_type) },
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_numpy_random_globals, ulab_numpy_random_globals_table);
+
+const mp_obj_module_t ulab_numpy_random_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_numpy_random_globals,
+};
diff --git a/tulip/shared/ulab/code/numpy/random/random.h b/tulip/shared/ulab/code/numpy/random/random.h
new file mode 100644
index 000000000..314fefa00
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/random/random.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2024 Zoltán Vörös
+*/
+
+#include "../../ndarray.h"
+
+#ifndef _NUMPY_RANDOM_
+#define _NUMPY_RANDOM_
+
+
+#define PCG_MULTIPLIER_64       6364136223846793005ULL
+#define PCG_INCREMENT_64        1442695040888963407ULL
+
+extern const mp_obj_module_t ulab_numpy_random_module;
+
+extern const mp_obj_type_t random_generator_type;
+
+typedef struct _random_generator_obj_t {
+    mp_obj_base_t base;
+    uint64_t state;
+} random_generator_obj_t;
+
+mp_obj_t random_generator_make_new(const mp_obj_type_t *, size_t , size_t , const mp_obj_t *);
+void random_generator_print(const mp_print_t *, mp_obj_t , mp_print_kind_t );
+
+
+MP_DECLARE_CONST_FUN_OBJ_KW(random_normal_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(random_random_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(random_uniform_obj);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/stats.c b/tulip/shared/ulab/code/numpy/stats.c
new file mode 100644
index 000000000..2d3488937
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/stats.c
@@ -0,0 +1,54 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020 Roberto Colistete Jr.
+ *               2020 Taku Fukada
+ *
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../ulab_tools.h"
+#include "carray/carray_tools.h"
+#include "stats.h"
+
+#if ULAB_MAX_DIMS > 1
+#if ULAB_NUMPY_HAS_TRACE
+
+//| def trace(m: ulab.numpy.ndarray) -> _float:
+//|     """
+//|     :param m: a square matrix
+//|
+//|     Compute the trace of the matrix, the sum of its diagonal elements."""
+//|     ...
+//|
+
+static mp_obj_t stats_trace(mp_obj_t oin) {
+    ndarray_obj_t *ndarray = tools_object_is_square(oin);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    mp_float_t trace = 0.0;
+    for(size_t i=0; i < ndarray->shape[ULAB_MAX_DIMS - 1]; i++) {
+        int32_t pos = i * (ndarray->strides[ULAB_MAX_DIMS - 1] + ndarray->strides[ULAB_MAX_DIMS - 2]);
+        trace += ndarray_get_float_index(ndarray->array, ndarray->dtype, pos/ndarray->itemsize);
+    }
+    if(ndarray->dtype == NDARRAY_FLOAT) {
+        return mp_obj_new_float(trace);
+    }
+    return mp_obj_new_int_from_float(trace);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(stats_trace_obj, stats_trace);
+#endif
+#endif
diff --git a/tulip/shared/ulab/code/numpy/stats.h b/tulip/shared/ulab/code/numpy/stats.h
new file mode 100644
index 000000000..62bba9ff4
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/stats.h
@@ -0,0 +1,20 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _STATS_
+#define _STATS_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+MP_DECLARE_CONST_FUN_OBJ_1(stats_trace_obj);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/transform.c b/tulip/shared/ulab/code/numpy/transform.c
new file mode 100644
index 000000000..b77364ba0
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/transform.c
@@ -0,0 +1,456 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *
+*/
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../ulab_tools.h"
+#include "carray/carray_tools.h"
+#include "numerical.h"
+#include "transform.h"
+
+#if ULAB_NUMPY_HAS_COMPRESS
+static mp_obj_t transform_compress(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t condition = args[0].u_obj;
+
+    if(!mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("wrong input type"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[1].u_obj);
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    mp_obj_t axis = args[2].u_obj;
+
+    size_t len = MP_OBJ_SMALL_INT_VALUE(mp_obj_len_maybe(condition));
+    int8_t ax, shift_ax = 0;
+
+    if(axis != mp_const_none) {
+        ax = tools_get_axis(axis, ndarray->ndim);
+        shift_ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+    }
+
+    if(((axis == mp_const_none) && (len != ndarray->len)) ||
+        ((axis != mp_const_none) && (len != ndarray->shape[shift_ax]))) {
+        mp_raise_ValueError(MP_ERROR_TEXT("wrong length of condition array"));
+    }
+
+    size_t true_count = 0;
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t item, iterable = mp_getiter(condition, &iter_buf);
+    while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+        if(mp_obj_is_true(item)) {
+            true_count++;
+        }
+    }
+
+    iterable = mp_getiter(condition, &iter_buf);
+
+    ndarray_obj_t *result = NULL;
+
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    memcpy(shape, ndarray->shape, ULAB_MAX_DIMS * sizeof(size_t));
+
+    size_t *rshape = m_new(size_t, ULAB_MAX_DIMS);
+    memcpy(rshape, ndarray->shape, ULAB_MAX_DIMS * sizeof(size_t));
+
+    int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+    memcpy(strides, ndarray->strides, ULAB_MAX_DIMS * sizeof(int32_t));
+
+    int32_t *rstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+
+    if(axis == mp_const_none) {
+        result = ndarray_new_linear_array(true_count, ndarray->dtype);
+
+        rstrides[ULAB_MAX_DIMS - 1] = ndarray->itemsize;
+        rshape[ULAB_MAX_DIMS - 1] = 0;
+    } else {
+        rshape[shift_ax] = true_count;
+
+        result = ndarray_new_dense_ndarray(ndarray->ndim, rshape, ndarray->dtype);
+
+        SWAP(size_t, shape[shift_ax], shape[ULAB_MAX_DIMS - 1]);
+        SWAP(size_t, rshape[shift_ax], rshape[ULAB_MAX_DIMS - 1]);
+        SWAP(int32_t, strides[shift_ax], strides[ULAB_MAX_DIMS - 1]);
+
+        memcpy(rstrides, result->strides, ULAB_MAX_DIMS * sizeof(int32_t));
+        SWAP(int32_t, rstrides[shift_ax], rstrides[ULAB_MAX_DIMS - 1]);
+    }
+
+    uint8_t *rarray = (uint8_t *)result->array;
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                if(axis != mp_const_none) {
+                    iterable = mp_getiter(condition, &iter_buf);
+                }
+                do {
+                    item = mp_iternext(iterable);
+                    if(mp_obj_is_true(item)) {
+                        memcpy(rarray, array, ndarray->itemsize);
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                    }
+                    array += strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                array -= strides[ULAB_MAX_DIMS - 1] * shape[ULAB_MAX_DIMS - 1];
+                array += strides[ULAB_MAX_DIMS - 2];
+                rarray -= rstrides[ULAB_MAX_DIMS - 1] * rshape[ULAB_MAX_DIMS - 1];
+                rarray += rstrides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= strides[ULAB_MAX_DIMS - 2] * shape[ULAB_MAX_DIMS - 2];
+            array += strides[ULAB_MAX_DIMS - 3];
+            rarray -= rstrides[ULAB_MAX_DIMS - 2] * rshape[ULAB_MAX_DIMS - 2];
+            rarray += rstrides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= strides[ULAB_MAX_DIMS - 3] * shape[ULAB_MAX_DIMS - 3];
+        array += strides[ULAB_MAX_DIMS - 4];
+        rarray -= rstrides[ULAB_MAX_DIMS - 2] * rshape[ULAB_MAX_DIMS - 2];
+        rarray += rstrides[ULAB_MAX_DIMS - 3];
+        i++;
+    } while(i < shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(size_t, rshape, ULAB_MAX_DIMS);
+    m_del(int32_t, strides, ULAB_MAX_DIMS);
+    m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+
+    return MP_OBJ_FROM_PTR(result);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(transform_compress_obj, 2, transform_compress);
+#endif /* ULAB_NUMPY_HAS_COMPRESS */
+
+#if ULAB_NUMPY_HAS_DELETE
+static mp_obj_t transform_delete(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be an ndarray"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    mp_obj_t indices = args[1].u_obj;
+
+    mp_obj_t axis = args[2].u_obj;
+
+    int8_t shift_ax;
+
+    size_t axis_len;
+
+    if(axis != mp_const_none) {
+        int8_t ax = tools_get_axis(axis, ndarray->ndim);
+        shift_ax = ULAB_MAX_DIMS - ndarray->ndim + ax;
+        axis_len = ndarray->shape[shift_ax];
+    } else {
+        axis_len = ndarray->len;
+    }
+
+    size_t index_len;
+    if(mp_obj_is_int(indices)) {
+        index_len = 1;
+    } else {
+        if(mp_obj_len_maybe(indices) == MP_OBJ_NULL) {
+            mp_raise_TypeError(MP_ERROR_TEXT("wrong index type"));
+        }
+        index_len = MP_OBJ_SMALL_INT_VALUE(mp_obj_len_maybe(indices));
+        if (index_len == 0){
+            // if the second positional argument is empty
+            // return the original array
+            return MP_OBJ_FROM_PTR(ndarray);
+        }
+    }
+
+    if(index_len > axis_len) {
+        mp_raise_ValueError(MP_ERROR_TEXT("wrong length of index array"));
+    }
+
+    size_t *index_array = m_new(size_t, index_len);
+
+    if(mp_obj_is_int(indices)) {
+        ssize_t value = (ssize_t)mp_obj_get_int(indices);
+        if(value < 0) {
+            value += axis_len;
+        }
+        if((value < 0) || (value > (ssize_t)axis_len)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("index is out of bounds"));
+        } else {
+            *index_array++ = (size_t)value;
+        }
+    } else {
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t item, iterable = mp_getiter(indices, &iter_buf);
+        while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+            ssize_t value = (ssize_t)mp_obj_get_int(item);
+            if(value < 0) {
+                value += axis_len;
+            }
+            if((value < 0) || (value > (ssize_t)axis_len)) {
+                mp_raise_ValueError(MP_ERROR_TEXT("index is out of bounds"));
+            } else {
+                *index_array++ = (size_t)value;
+            }
+        }
+    }
+
+    // sort the array, since it is not guaranteed that the input is sorted
+    HEAPSORT1(size_t, index_array, 1, index_len);
+
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    memcpy(shape, ndarray->shape, ULAB_MAX_DIMS * sizeof(size_t));
+
+    size_t *rshape = m_new(size_t, ULAB_MAX_DIMS);
+    memcpy(rshape, ndarray->shape, ULAB_MAX_DIMS * sizeof(size_t));
+
+    int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+    memcpy(strides, ndarray->strides, ULAB_MAX_DIMS * sizeof(int32_t));
+
+    int32_t *rstrides = m_new0(int32_t, ULAB_MAX_DIMS);
+
+    ndarray_obj_t *result = NULL;
+
+    if(axis == mp_const_none) {
+        result = ndarray_new_linear_array(ndarray->len - index_len, ndarray->dtype);
+        rstrides[ULAB_MAX_DIMS - 1] = ndarray->itemsize;
+        memset(rshape, 0, sizeof(size_t) * ULAB_MAX_DIMS);
+    } else {
+        rshape[shift_ax] = shape[shift_ax] - index_len;
+
+        result = ndarray_new_dense_ndarray(ndarray->ndim, rshape, ndarray->dtype);
+
+        SWAP(size_t, shape[shift_ax], shape[ULAB_MAX_DIMS - 1]);
+        SWAP(size_t, rshape[shift_ax], rshape[ULAB_MAX_DIMS - 1]);
+        SWAP(int32_t, strides[shift_ax], strides[ULAB_MAX_DIMS - 1]);
+
+        memcpy(rstrides, result->strides, ULAB_MAX_DIMS * sizeof(int32_t));
+        SWAP(int32_t, rstrides[shift_ax], rstrides[ULAB_MAX_DIMS - 1]);
+    }
+
+    uint8_t *rarray = (uint8_t *)result->array;
+    index_array -= index_len;
+    size_t count = 0;
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    if(count == *index_array) {
+                        index_array++;
+                    } else {
+                        memcpy(rarray, array, ndarray->itemsize);
+                        rarray += rstrides[ULAB_MAX_DIMS - 1];
+                    }
+                    array += strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                    count++;
+                } while(l < shape[ULAB_MAX_DIMS - 1]);
+                if(axis != mp_const_none) {
+                    index_array -= index_len;
+                    count = 0;
+                }
+            #if ULAB_MAX_DIMS > 1
+                array -= strides[ULAB_MAX_DIMS - 1] * shape[ULAB_MAX_DIMS - 1];
+                array += strides[ULAB_MAX_DIMS - 2];
+                rarray -= rstrides[ULAB_MAX_DIMS - 1] * rshape[ULAB_MAX_DIMS - 1];
+                rarray += rstrides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= strides[ULAB_MAX_DIMS - 2] * shape[ULAB_MAX_DIMS - 2];
+            array += strides[ULAB_MAX_DIMS - 3];
+            rarray -= rstrides[ULAB_MAX_DIMS - 2] * rshape[ULAB_MAX_DIMS - 2];
+            rarray += rstrides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= strides[ULAB_MAX_DIMS - 3] * shape[ULAB_MAX_DIMS - 3];
+        array += strides[ULAB_MAX_DIMS - 4];
+        rarray -= rstrides[ULAB_MAX_DIMS - 3] * rshape[ULAB_MAX_DIMS - 3];
+        rarray += rstrides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    // TODO: deleting shape generates a seg fault
+    // m_del(size_t, shape, ULAB_MAX_DIMS);
+    m_del(size_t, rshape, ULAB_MAX_DIMS);
+    m_del(int32_t, strides, ULAB_MAX_DIMS);
+    m_del(int32_t, rstrides, ULAB_MAX_DIMS);
+
+    return MP_OBJ_FROM_PTR(result);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(transform_delete_obj, 2, transform_delete);
+#endif /* ULAB_NUMPY_HAS_DELETE */
+
+
+#if ULAB_MAX_DIMS > 1
+#if ULAB_NUMPY_HAS_DOT
+//| def dot(m1: ulab.numpy.ndarray, m2: ulab.numpy.ndarray) -> Union[ulab.numpy.ndarray, _float]:
+//|    """
+//|    :param ~ulab.numpy.ndarray m1: a matrix, or a vector
+//|    :param ~ulab.numpy.ndarray m2: a matrix, or a vector
+//|
+//|    Computes the product of two matrices, or two vectors. In the letter case, the inner product is returned."""
+//|    ...
+//|
+
+mp_obj_t transform_dot(mp_obj_t _m1, mp_obj_t _m2) {
+    // TODO: should the results be upcast?
+    // This implements 2D operations only!
+    if(!mp_obj_is_type(_m1, &ulab_ndarray_type) || !mp_obj_is_type(_m2, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("arguments must be ndarrays"));
+    }
+    ndarray_obj_t *m1 = MP_OBJ_TO_PTR(_m1);
+    ndarray_obj_t *m2 = MP_OBJ_TO_PTR(_m2);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(m1->dtype)
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(m2->dtype)
+
+    uint8_t *array1 = (uint8_t *)m1->array;
+    uint8_t *array2 = (uint8_t *)m2->array;
+
+    mp_float_t (*func1)(void *) = ndarray_get_float_function(m1->dtype);
+    mp_float_t (*func2)(void *) = ndarray_get_float_function(m2->dtype);
+
+    if(m1->shape[ULAB_MAX_DIMS - 1] != m2->shape[ULAB_MAX_DIMS - m2->ndim]) {
+        mp_raise_ValueError(MP_ERROR_TEXT("dimensions do not match"));
+    }
+    uint8_t ndim = MIN(m1->ndim, m2->ndim);
+    size_t shape1 = m1->ndim == 2 ? m1->shape[ULAB_MAX_DIMS - m1->ndim] : 1;
+    size_t shape2 = m2->ndim == 2 ? m2->shape[ULAB_MAX_DIMS - 1] : 1;
+
+    size_t *shape = NULL;
+    if(ndim == 2) { // matrix times matrix -> matrix
+        shape = ndarray_shape_vector(0, 0, shape1, shape2);
+    } else { // matrix times vector -> vector, vector times vector -> vector (size 1)
+        shape = ndarray_shape_vector(0, 0, 0, shape1 * shape2);
+    }
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+    mp_float_t *rarray = (mp_float_t *)results->array;
+
+    for(size_t i=0; i < shape1; i++) { // rows of m1
+        for(size_t j=0; j < shape2; j++) { // columns of m2
+            mp_float_t dot = 0.0;
+            for(size_t k=0; k < m1->shape[ULAB_MAX_DIMS - 1]; k++) {
+                // (i, k) * (k, j)
+                dot += func1(array1) * func2(array2);
+                array1 += m1->strides[ULAB_MAX_DIMS - 1];
+                array2 += m2->strides[ULAB_MAX_DIMS - m2->ndim];
+            }
+            *rarray++ = dot;
+            array1 -= m1->strides[ULAB_MAX_DIMS - 1] * m1->shape[ULAB_MAX_DIMS - 1];
+            array2 -= m2->strides[ULAB_MAX_DIMS - m2->ndim] * m2->shape[ULAB_MAX_DIMS - m2->ndim];
+            array2 += m2->strides[ULAB_MAX_DIMS - 1];
+        }
+        array1 += m1->strides[ULAB_MAX_DIMS - m1->ndim];
+        array2 = m2->array;
+    }
+    if((m1->ndim * m2->ndim) == 1) { // return a scalar, if product of two vectors
+        return mp_obj_new_float(*(--rarray));
+    } else {
+        return MP_OBJ_FROM_PTR(results);
+    }
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(transform_dot_obj, transform_dot);
+#endif /* ULAB_NUMPY_HAS_DOT */
+#endif /* ULAB_MAX_DIMS > 1 */
+
+#if ULAB_NUMPY_HAS_SIZE
+static mp_obj_t transform_size(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_axis, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(ulab_tools_mp_obj_is_scalar(args[0].u_obj)) {
+        return mp_obj_new_int(1);
+    }
+
+    if(!ndarray_object_is_array_like(args[0].u_obj)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be an ndarray"));
+    }
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        return mp_obj_len_maybe(args[0].u_obj);
+    }
+
+    // at this point, the args[0] is most certainly an ndarray
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
+    mp_obj_t axis = args[1].u_obj;
+    size_t len;
+    if(axis != mp_const_none) {
+        int8_t ax = tools_get_axis(axis, ndarray->ndim);
+        len = ndarray->shape[ULAB_MAX_DIMS - ndarray->ndim + ax];
+    } else {
+        len = ndarray->len;
+    }
+
+    return mp_obj_new_int(len);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(transform_size_obj, 1, transform_size);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/transform.h b/tulip/shared/ulab/code/numpy/transform.h
new file mode 100644
index 000000000..bfb4482cc
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/transform.h
@@ -0,0 +1,30 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *
+*/
+
+#ifndef _TRANSFORM_
+#define _TRANSFORM_
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../ulab.h"
+#include "../ulab_tools.h"
+
+MP_DECLARE_CONST_FUN_OBJ_KW(transform_compress_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(transform_delete_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(transform_dot_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(transform_size_obj);
+
+#endif
diff --git a/tulip/shared/ulab/code/numpy/vector.c b/tulip/shared/ulab/code/numpy/vector.c
new file mode 100644
index 000000000..95e2e2075
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/vector.c
@@ -0,0 +1,1206 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2023 Zoltán Vörös
+ *               2020-2023 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "py/runtime.h"
+#include "py/binary.h"
+#include "py/obj.h"
+#include "py/objarray.h"
+
+#include "../ulab.h"
+#include "../ulab_tools.h"
+#include "carray/carray_tools.h"
+#include "vector.h"
+
+//| """Element-by-element functions
+//|
+//| These functions can operate on numbers, 1-D iterables, and arrays of 1 to 4 dimensions by
+//| applying the function to every element in the array.  This is typically
+//| much more efficient than expressing the same operation as a Python loop."""
+//|
+
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+static mp_obj_t vector_generic_vector(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, mp_float_t (*f)(mp_float_t)) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE} } ,
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        // this keyword argument is not used; it's only here, so that functions that 
+        // support the complex dtype can call vector_generic_vector directly
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t o_in = args[0].u_obj;
+
+    // Return a single value, if o_in is not iterable
+    if(mp_obj_is_float(o_in) || mp_obj_is_int(o_in)) {
+        return mp_obj_new_float(f(mp_obj_get_float(o_in)));
+    }
+    mp_obj_t out = args[1].u_obj;
+
+    ndarray_obj_t *target = NULL;
+    ndarray_obj_t *source = NULL;
+
+    if(mp_obj_is_type(o_in, &ulab_ndarray_type)) {
+        source = MP_OBJ_TO_PTR(o_in);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(source->dtype)
+        if(out == mp_const_none) {
+            target = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
+        } else {
+            if(!mp_obj_is_type(out, &ulab_ndarray_type)) {
+                mp_raise_ValueError(MP_ERROR_TEXT("out must be an ndarray"));
+            }
+            target = MP_OBJ_TO_PTR(out);
+            if(target->dtype != NDARRAY_FLOAT) {
+                mp_raise_ValueError(MP_ERROR_TEXT("out must be of float dtype"));
+            }
+            if(target->ndim != source->ndim) {
+                mp_raise_ValueError(MP_ERROR_TEXT("input and output dimensions differ"));
+            }
+            for(uint8_t d = 0; d < target->ndim; d++) {
+                if(target->shape[ULAB_MAX_DIMS - 1 - d] != source->shape[ULAB_MAX_DIMS - 1 - d]) {
+                    mp_raise_ValueError(MP_ERROR_TEXT("input and output shapes differ"));
+                }
+            }
+        }
+        mp_float_t *tarray = (mp_float_t *)target->array;
+        int32_t *tstrides = m_new(int32_t, ULAB_MAX_DIMS);
+        for(uint8_t d = 0; d < target->ndim; d++) {
+            tstrides[ULAB_MAX_DIMS - 1 - d] = target->strides[ULAB_MAX_DIMS - 1 - d] / target->itemsize;
+        }
+
+        uint8_t *sarray = (uint8_t *)source->array;
+
+        #if ULAB_VECTORISE_USES_FUN_POINTER
+
+            mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+
+            #if ULAB_MAX_DIMS > 3
+            size_t i = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 2
+                size_t j = 0;
+                do {
+                #endif
+                    #if ULAB_MAX_DIMS > 1
+                    size_t k = 0;
+                    do {
+                    #endif
+                        size_t l = 0;
+                        do {
+                            mp_float_t value = func(sarray);
+                            *tarray++ = f(value);
+                            sarray += source->strides[ULAB_MAX_DIMS - 1];
+                            l++;
+                        } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                    #if ULAB_MAX_DIMS > 1
+                        sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                        sarray += source->strides[ULAB_MAX_DIMS - 2];
+                        k++;
+                    } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                    #endif /* ULAB_MAX_DIMS > 1 */
+                #if ULAB_MAX_DIMS > 2
+                    sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                    sarray += source->strides[ULAB_MAX_DIMS - 3];
+                    j++;
+                } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+                #endif /* ULAB_MAX_DIMS > 2 */
+            #if ULAB_MAX_DIMS > 3
+                sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+                sarray += source->strides[ULAB_MAX_DIMS - 4];
+                i++;
+            } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+            #endif /* ULAB_MAX_DIMS > 3 */
+        #else
+        if(source->dtype == NDARRAY_UINT8) {
+            ITERATE_VECTOR(uint8_t, target, tarray, tstrides, source, sarray);
+        } else if(source->dtype == NDARRAY_INT8) {
+           ITERATE_VECTOR(int8_t, target, tarray, tstrides, source, sarray);
+        } else if(source->dtype == NDARRAY_UINT16) {
+            ITERATE_VECTOR(uint16_t, target, tarray, tstrides, source, sarray);
+        } else if(source->dtype == NDARRAY_INT16) {
+            ITERATE_VECTOR(int16_t, target, tarray, tstrides, source, sarray);
+        } else {
+            ITERATE_VECTOR(mp_float_t, target, tarray, tstrides, source, sarray);
+        }
+        #endif /* ULAB_VECTORISE_USES_FUN_POINTER */
+    } else {
+        target = ndarray_from_mp_obj(o_in, 0);
+        mp_float_t *tarray = (mp_float_t *)target->array;
+        for(size_t i = 0; i < target->len; i++) {
+            *tarray = f(*tarray);
+            tarray++;
+        }
+    }
+    return MP_OBJ_FROM_PTR(target);
+}
+
+#else
+
+static mp_obj_t vector_generic_vector(mp_obj_t o_in, mp_float_t (*f)(mp_float_t)) {
+    // Return a single value, if o_in is not iterable
+    if(mp_obj_is_float(o_in) || mp_obj_is_int(o_in)) {
+        return mp_obj_new_float(f(mp_obj_get_float(o_in)));
+    }
+    ndarray_obj_t *ndarray = NULL;
+    if(mp_obj_is_type(o_in, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(o_in);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(source->dtype)
+        uint8_t *sarray = (uint8_t *)source->array;
+        ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+
+        #if ULAB_VECTORISE_USES_FUN_POINTER
+
+            mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+
+            #if ULAB_MAX_DIMS > 3
+            size_t i = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 2
+                size_t j = 0;
+                do {
+                #endif
+                    #if ULAB_MAX_DIMS > 1
+                    size_t k = 0;
+                    do {
+                    #endif
+                        size_t l = 0;
+                        do {
+                            mp_float_t value = func(sarray);
+                            *array++ = f(value);
+                            sarray += source->strides[ULAB_MAX_DIMS - 1];
+                            l++;
+                        } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                    #if ULAB_MAX_DIMS > 1
+                        sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                        sarray += source->strides[ULAB_MAX_DIMS - 2];
+                        k++;
+                    } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                    #endif /* ULAB_MAX_DIMS > 1 */
+                #if ULAB_MAX_DIMS > 2
+                    sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                    sarray += source->strides[ULAB_MAX_DIMS - 3];
+                    j++;
+                } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+                #endif /* ULAB_MAX_DIMS > 2 */
+            #if ULAB_MAX_DIMS > 3
+                sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+                sarray += source->strides[ULAB_MAX_DIMS - 4];
+                i++;
+            } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+            #endif /* ULAB_MAX_DIMS > 3 */
+        #else
+        if(source->dtype == NDARRAY_UINT8) {
+            ITERATE_VECTOR(uint8_t, array, source, sarray);
+        } else if(source->dtype == NDARRAY_INT8) {
+            ITERATE_VECTOR(int8_t, array, source, sarray);
+        } else if(source->dtype == NDARRAY_UINT16) {
+            ITERATE_VECTOR(uint16_t, array, source, sarray);
+        } else if(source->dtype == NDARRAY_INT16) {
+            ITERATE_VECTOR(int16_t, array, source, sarray);
+        } else {
+            ITERATE_VECTOR(mp_float_t, array, source, sarray);
+        }
+        #endif /* ULAB_VECTORISE_USES_FUN_POINTER */
+    } else {
+        ndarray = ndarray_from_mp_obj(o_in, 0);
+        mp_float_t *narray = (mp_float_t *)ndarray->array;
+        for(size_t i = 0; i < ndarray->len; i++) {
+            *narray = f(*narray);
+            narray++;
+        }
+    }
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+
+
+#if ULAB_NUMPY_HAS_ACOS
+//| def acos(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse cosine function"""
+//|    ...
+//|
+
+MATH_FUN_1(acos, acos);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_acos_obj, 1, vector_acos);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_acos_obj, vector_acos);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_ACOS */
+
+#if ULAB_NUMPY_HAS_ACOSH
+//| def acosh(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse hyperbolic cosine function"""
+//|    ...
+//|
+
+MATH_FUN_1(acosh, acosh);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_acosh_obj, 1, vector_acosh);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_acosh_obj, vector_acosh);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_ACOSH */
+
+#if ULAB_NUMPY_HAS_ASIN
+//| def asin(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse sine function"""
+//|    ...
+//|
+
+MATH_FUN_1(asin, asin);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_asin_obj, 1, vector_asin);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_asin_obj, vector_asin);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_ASIN */
+
+#if ULAB_NUMPY_HAS_ASINH
+//| def asinh(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse hyperbolic sine function"""
+//|    ...
+//|
+
+MATH_FUN_1(asinh, asinh);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_asinh_obj, 1, vector_asinh);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_asinh_obj, vector_asinh);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_ASINH */
+
+#if ULAB_NUMPY_HAS_AROUND
+//| def around(a: ulab.numpy.ndarray, *, decimals: int = 0) -> ulab.numpy.ndarray:
+//|    """Returns a new float array in which each element is rounded to
+//|       ``decimals`` places."""
+//|    ...
+//|
+
+mp_obj_t vector_around(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+        { MP_QSTR_decimals, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 0 } },
+        #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } }
+        #endif  
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be an ndarray"));
+    }
+    int8_t n = args[1].u_int;
+    mp_float_t mul = MICROPY_FLOAT_C_FUN(pow)(10.0, n);
+    ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0].u_obj);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(source->dtype)
+    #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+    mp_obj_t out = args[2].u_obj;
+    if(out != mp_const_none) {
+        mp_raise_ValueError(MP_ERROR_TEXT("out keyword is not supported for function"));
+    }
+    #endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
+    mp_float_t *narray = (mp_float_t *)ndarray->array;
+    uint8_t *sarray = (uint8_t *)source->array;
+
+    mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    mp_float_t f = func(sarray);
+                    *narray++ = MICROPY_FLOAT_C_FUN(round)(f * mul) / mul;
+                    sarray += source->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                sarray += source->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+            sarray += source->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+        sarray += source->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_around_obj, 1, vector_around);
+#endif /* ULAB_NUMPY_HAS_AROUND */
+
+#if ULAB_NUMPY_HAS_ATAN
+//| def atan(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse tangent function; the return values are in the
+//|       range [-pi/2,pi/2]."""
+//|    ...
+//|
+
+MATH_FUN_1(atan, atan);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_atan_obj, 1, vector_atan);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_atan_obj, vector_atan);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_ATAN */
+
+
+#if ULAB_NUMPY_HAS_ATANH
+//| def atanh(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse hyperbolic tangent function"""
+//|    ...
+//|
+
+MATH_FUN_1(atanh, atanh);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_atanh_obj, 1, vector_atanh);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_atanh_obj, vector_atanh);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_ATANH */
+
+#if ULAB_NUMPY_HAS_ARCTAN2
+//| def arctan2(ya: _ScalarOrArrayLike, xa: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the inverse tangent function of y/x; the return values are in
+//|       the range [-pi, pi]."""
+//|    ...
+//|
+
+mp_obj_t vector_arctan2(mp_obj_t y, mp_obj_t x) {
+    if((mp_obj_is_float(y) || mp_obj_is_int(y)) &&
+        (mp_obj_is_float(x) || mp_obj_is_int(x))) {
+        mp_float_t _y = mp_obj_get_float(y);
+        mp_float_t _x = mp_obj_get_float(x);
+        return mp_obj_new_float(MICROPY_FLOAT_C_FUN(atan2)(_y, _x));
+    }
+
+    ndarray_obj_t *ndarray_x = ndarray_from_mp_obj(x, 0);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray_x->dtype)
+
+    ndarray_obj_t *ndarray_y = ndarray_from_mp_obj(y, 0);
+    COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray_y->dtype)
+
+    uint8_t ndim = 0;
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    int32_t *xstrides = m_new(int32_t, ULAB_MAX_DIMS);
+    int32_t *ystrides = m_new(int32_t, ULAB_MAX_DIMS);
+    if(!ndarray_can_broadcast(ndarray_x, ndarray_y, &ndim, shape, xstrides, ystrides)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("operands could not be broadcast together"));
+        m_del(size_t, shape, ULAB_MAX_DIMS);
+        m_del(int32_t, xstrides, ULAB_MAX_DIMS);
+        m_del(int32_t, ystrides, ULAB_MAX_DIMS);
+    }
+
+    uint8_t *xarray = (uint8_t *)ndarray_x->array;
+    uint8_t *yarray = (uint8_t *)ndarray_y->array;
+
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+    mp_float_t *rarray = (mp_float_t *)results->array;
+
+    mp_float_t (*funcx)(void *) = ndarray_get_float_function(ndarray_x->dtype);
+    mp_float_t (*funcy)(void *) = ndarray_get_float_function(ndarray_y->dtype);
+
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    mp_float_t _x = funcx(xarray);
+                    mp_float_t _y = funcy(yarray);
+                    *rarray++ = MICROPY_FLOAT_C_FUN(atan2)(_y, _x);
+                    xarray += xstrides[ULAB_MAX_DIMS - 1];
+                    yarray += ystrides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                xarray -= xstrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                xarray += xstrides[ULAB_MAX_DIMS - 2];
+                yarray -= ystrides[ULAB_MAX_DIMS - 1] * results->shape[ULAB_MAX_DIMS-1];
+                yarray += ystrides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k < results->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            xarray -= xstrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+            xarray += xstrides[ULAB_MAX_DIMS - 3];
+            yarray -= ystrides[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+            yarray += ystrides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j < results->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        xarray -= xstrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+        xarray += xstrides[ULAB_MAX_DIMS - 4];
+        yarray -= ystrides[ULAB_MAX_DIMS - 3] * results->shape[ULAB_MAX_DIMS-3];
+        yarray += ystrides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i < results->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    return MP_OBJ_FROM_PTR(results);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(vector_arctan2_obj, vector_arctan2);
+#endif /* ULAB_VECTORISE_HAS_ARCTAN2 */
+
+#if ULAB_NUMPY_HAS_CEIL
+//| def ceil(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Rounds numbers up to the next whole number"""
+//|    ...
+//|
+
+MATH_FUN_1(ceil, ceil);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_ceil_obj, 1, vector_ceil);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_ceil_obj, vector_ceil);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_CEIL */
+
+#if ULAB_NUMPY_HAS_COS
+//| def cos(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the cosine function"""
+//|    ...
+//|
+
+MATH_FUN_1(cos, cos);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_cos_obj, 1, vector_cos);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_cos_obj, vector_cos);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_COS */
+
+#if ULAB_NUMPY_HAS_COSH
+//| def cosh(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the hyperbolic cosine function"""
+//|    ...
+//|
+
+MATH_FUN_1(cosh, cosh);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_cosh_obj, 1, vector_cosh);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_cosh_obj, vector_cosh);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_COSH */
+
+#if ULAB_NUMPY_HAS_DEGREES
+//| def degrees(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Converts angles from radians to degrees"""
+//|    ...
+//|
+
+static mp_float_t vector_degrees_(mp_float_t value) {
+    return value * MICROPY_FLOAT_CONST(180.0) / MP_PI;
+}
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+static mp_obj_t vector_degrees(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return vector_generic_vector(n_args, pos_args, kw_args, vector_degrees_);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_degrees_obj, 1, vector_degrees);
+#else
+static mp_obj_t vector_degrees(mp_obj_t x_obj) {
+    return vector_generic_vector(x_obj, vector_degrees_);
+}
+MP_DEFINE_CONST_FUN_OBJ_1(vector_degrees_obj, vector_degrees);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_DEGREES */
+
+#if ULAB_SCIPY_SPECIAL_HAS_ERF
+//| def erf(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the error function, which has applications in statistics"""
+//|    ...
+//|
+
+MATH_FUN_1(erf, erf);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_erf_obj, 1, vector_erf);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_erf_obj, vector_erf);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_SCIPY_SPECIAL_HAS_ERF */
+
+#if ULAB_SCIPY_SPECIAL_HAS_ERFC
+//| def erfc(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the complementary error function, which has applications in statistics"""
+//|    ...
+//|
+
+MATH_FUN_1(erfc, erfc);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_erfc_obj, 1, vector_erfc);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_erfc_obj, vector_erfc);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_SCIPY_SPECIAL_HAS_ERFC */
+
+#if ULAB_NUMPY_HAS_EXP
+//| def exp(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the exponent function."""
+//|    ...
+//|
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+static mp_obj_t vector_exp(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+#else
+static mp_obj_t vector_exp(mp_obj_t o_in) {
+#endif
+    #if ULAB_SUPPORTS_COMPLEX
+    
+    #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+    // since the complex case is dissimilar to the rest, we've got to do the parsing of the keywords here
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE} } ,
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    mp_obj_t o_in = args[0].u_obj;
+    mp_obj_t out = args[1].u_obj;
+    #endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+    
+    if(mp_obj_is_type(o_in, &mp_type_complex)) {
+        mp_float_t real, imag;
+        mp_obj_get_complex(o_in, &real, &imag);
+        mp_float_t exp_real = MICROPY_FLOAT_C_FUN(exp)(real);
+        return mp_obj_new_complex(exp_real * MICROPY_FLOAT_C_FUN(cos)(imag), exp_real * MICROPY_FLOAT_C_FUN(sin)(imag));
+    } else if(mp_obj_is_type(o_in, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(o_in);
+        
+        #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+        if((out != mp_const_none) && (source->dtype == NDARRAY_COMPLEX)){
+            mp_raise_ValueError(MP_ERROR_TEXT("out keyword is not supported for complex dtype"));
+        }
+        #endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+
+        if(source->dtype == NDARRAY_COMPLEX) {
+            uint8_t *sarray = (uint8_t *)source->array;
+            ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_COMPLEX);
+            mp_float_t *array = (mp_float_t *)ndarray->array;
+            uint8_t itemsize = sizeof(mp_float_t);
+
+            #if ULAB_MAX_DIMS > 3
+            size_t i = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 2
+                size_t j = 0;
+                do {
+                #endif
+                    #if ULAB_MAX_DIMS > 1
+                    size_t k = 0;
+                    do {
+                    #endif
+                        size_t l = 0;
+                        do {
+                            mp_float_t real = *(mp_float_t *)sarray;
+                            mp_float_t imag = *(mp_float_t *)(sarray + itemsize);
+                            mp_float_t exp_real = MICROPY_FLOAT_C_FUN(exp)(real);
+                            *array++ = exp_real * MICROPY_FLOAT_C_FUN(cos)(imag);
+                            *array++ = exp_real * MICROPY_FLOAT_C_FUN(sin)(imag);
+                            sarray += source->strides[ULAB_MAX_DIMS - 1];
+                            l++;
+                        } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                    #if ULAB_MAX_DIMS > 1
+                        sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                        sarray += source->strides[ULAB_MAX_DIMS - 2];
+                        k++;
+                    } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                    #endif /* ULAB_MAX_DIMS > 1 */
+                #if ULAB_MAX_DIMS > 2
+                    sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                    sarray += source->strides[ULAB_MAX_DIMS - 3];
+                    j++;
+                } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+                #endif /* ULAB_MAX_DIMS > 2 */
+            #if ULAB_MAX_DIMS > 3
+                sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+                sarray += source->strides[ULAB_MAX_DIMS - 4];
+                i++;
+            } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+            #endif /* ULAB_MAX_DIMS > 3 */
+            return MP_OBJ_FROM_PTR(ndarray);
+        }
+    }
+    #endif /* ULAB_SUPPORTS_COMPLEX */
+    #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+    return vector_generic_vector(n_args, pos_args, kw_args, MICROPY_FLOAT_C_FUN(exp));   
+    #else
+    return vector_generic_vector(o_in, MICROPY_FLOAT_C_FUN(exp));
+    #endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+}
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_exp_obj, 1, vector_exp);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_exp_obj, vector_exp);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_EXP */
+
+#if ULAB_NUMPY_HAS_EXPM1
+//| def expm1(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes $e^x-1$.  In certain applications, using this function preserves numeric accuracy better than the `exp` function."""
+//|    ...
+//|
+
+MATH_FUN_1(expm1, expm1);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_expm1_obj, 1, vector_expm1);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_expm1_obj, vector_expm1);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_EXPM1 */
+
+#if ULAB_NUMPY_HAS_FLOOR
+//| def floor(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Rounds numbers up to the next whole number"""
+//|    ...
+//|
+
+MATH_FUN_1(floor, floor);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_floor_obj, 1, vector_floor);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_floor_obj, vector_floor);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_FLOOR */
+
+#if ULAB_SCIPY_SPECIAL_HAS_GAMMA
+//| def gamma(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the gamma function"""
+//|    ...
+//|
+
+MATH_FUN_1(gamma, tgamma);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_gamma_obj, 1, vector_gamma);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_gamma_obj, vector_gamma);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_SCIPY_SPECIAL_HAS_GAMMA */
+
+#if ULAB_SCIPY_SPECIAL_HAS_GAMMALN
+//| def lgamma(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the natural log of the gamma function"""
+//|    ...
+//|
+
+MATH_FUN_1(lgamma, lgamma);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_lgamma_obj, 1, vector_lgamma);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_lgamma_obj, vector_lgamma);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_SCIPY_SEPCIAL_HAS_GAMMALN */
+
+#if ULAB_NUMPY_HAS_LOG
+//| def log(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the natural log"""
+//|    ...
+//|
+
+MATH_FUN_1(log, log);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_log_obj, 1, vector_log);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_log_obj, vector_log);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_LOG */
+
+#if ULAB_NUMPY_HAS_LOG10
+//| def log10(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the log base 10"""
+//|    ...
+//|
+
+MATH_FUN_1(log10, log10);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_log10_obj, 1, vector_log10);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_log10_obj, vector_log10);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_LOG10 */
+
+#if ULAB_NUMPY_HAS_LOG2
+//| def log2(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the log base 2"""
+//|    ...
+//|
+
+MATH_FUN_1(log2, log2);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_log2_obj, 1, vector_log2);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_log2_obj, vector_log2);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_LOG2 */
+
+#if ULAB_NUMPY_HAS_RADIANS
+//| def radians(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Converts angles from degrees to radians"""
+//|    ...
+//|
+
+static mp_float_t vector_radians_(mp_float_t value) {
+    return value * MP_PI / MICROPY_FLOAT_CONST(180.0);
+}
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+static mp_obj_t vector_radians(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return vector_generic_vector(n_args, pos_args, kw_args, vector_radians_);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_radians_obj, 1, vector_radians);
+#else
+static mp_obj_t vector_radians(mp_obj_t x_obj) {
+    return vector_generic_vector(x_obj, vector_radians_);
+}
+MP_DEFINE_CONST_FUN_OBJ_1(vector_radians_obj, vector_radians);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_RADIANS */
+
+#if ULAB_NUMPY_HAS_SIN
+//| def sin(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the sine function"""
+//|    ...
+//|
+
+MATH_FUN_1(sin, sin);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_sin_obj, 1, vector_sin);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_sin_obj, vector_sin);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_SIN */
+
+#if ULAB_NUMPY_HAS_SINC
+//| def sinc(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the normalized sinc function"""
+//|    ...
+//|
+
+static mp_float_t vector_sinc1(mp_float_t x) {
+    if (x == MICROPY_FLOAT_CONST(0.)) {
+        return MICROPY_FLOAT_CONST(1.);
+    }
+    x *= MP_PI;
+    return MICROPY_FLOAT_C_FUN(sin)(x) / x;
+}
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+static mp_obj_t vector_sinc(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return vector_generic_vector(n_args, pos_args, kw_args, vector_sinc1);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_sinc_obj, 1, vector_sinc);
+#else
+static mp_obj_t vector_sinc(mp_obj_t x_obj) {
+    return vector_generic_vector(x_obj, vector_sinc1);
+}
+MP_DEFINE_CONST_FUN_OBJ_1(vector_sinc_obj, vector_sinc);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_SINC */
+
+#if ULAB_NUMPY_HAS_SINH
+//| def sinh(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the hyperbolic sine"""
+//|    ...
+//|
+
+MATH_FUN_1(sinh, sinh);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_sinh_obj, 1, vector_sinh);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_sinh_obj, vector_sinh);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_SINH */
+
+
+#if ULAB_NUMPY_HAS_SQRT
+//| def sqrt(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the square root"""
+//|    ...
+//|
+
+#if ULAB_SUPPORTS_COMPLEX | ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+mp_obj_t vector_sqrt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        #endif
+        #if ULAB_SUPPORTS_COMPLEX
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_INT(NDARRAY_FLOAT) } },
+        #endif
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+
+    #if ULAB_SUPPORTS_COMPLEX
+    mp_obj_t o_in = args[0].u_obj;
+    uint8_t dtype = mp_obj_get_int(args[2].u_obj);
+    if((dtype != NDARRAY_FLOAT) && (dtype != NDARRAY_COMPLEX)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("dtype must be float, or complex"));
+    }
+    
+    if(mp_obj_is_type(o_in, &mp_type_complex)) {
+        mp_float_t real, imag;
+        mp_obj_get_complex(o_in, &real, &imag);
+        mp_float_t sqrt_abs = MICROPY_FLOAT_C_FUN(sqrt)(real * real + imag * imag);
+        sqrt_abs = MICROPY_FLOAT_C_FUN(sqrt)(sqrt_abs);
+        mp_float_t theta = MICROPY_FLOAT_CONST(0.5) * MICROPY_FLOAT_C_FUN(atan2)(imag, real);
+        return mp_obj_new_complex(sqrt_abs * MICROPY_FLOAT_C_FUN(cos)(theta), sqrt_abs * MICROPY_FLOAT_C_FUN(sin)(theta));
+    } else if(mp_obj_is_type(o_in, &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(o_in);
+        if((source->dtype == NDARRAY_COMPLEX) && (dtype == NDARRAY_FLOAT)) {
+            mp_raise_TypeError(MP_ERROR_TEXT("can't convert complex to float"));
+        }
+
+        if(dtype == NDARRAY_COMPLEX) {
+            #if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+            mp_obj_t out = args[1].u_obj;
+            if(out != mp_const_none) {
+                mp_raise_ValueError(MP_ERROR_TEXT("out keyword is not supported for complex dtype"));
+            }
+            #endif
+            if(source->dtype == NDARRAY_COMPLEX) {
+                uint8_t *sarray = (uint8_t *)source->array;
+                ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_COMPLEX);
+                mp_float_t *array = (mp_float_t *)ndarray->array;
+                uint8_t itemsize = sizeof(mp_float_t);
+
+                #if ULAB_MAX_DIMS > 3
+                size_t i = 0;
+                do {
+                #endif
+                    #if ULAB_MAX_DIMS > 2
+                    size_t j = 0;
+                    do {
+                    #endif
+                        #if ULAB_MAX_DIMS > 1
+                        size_t k = 0;
+                        do {
+                        #endif
+                            size_t l = 0;
+                            do {
+                                mp_float_t real = *(mp_float_t *)sarray;
+                                mp_float_t imag = *(mp_float_t *)(sarray + itemsize);
+                                mp_float_t sqrt_abs = MICROPY_FLOAT_C_FUN(sqrt)(real * real + imag * imag);
+                                sqrt_abs = MICROPY_FLOAT_C_FUN(sqrt)(sqrt_abs);
+                                mp_float_t theta = MICROPY_FLOAT_CONST(0.5) * MICROPY_FLOAT_C_FUN(atan2)(imag, real);
+                                *array++ = sqrt_abs * MICROPY_FLOAT_C_FUN(cos)(theta);
+                                *array++ = sqrt_abs * MICROPY_FLOAT_C_FUN(sin)(theta);
+                                sarray += source->strides[ULAB_MAX_DIMS - 1];
+                                l++;
+                            } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                        #if ULAB_MAX_DIMS > 1
+                            sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                            sarray += source->strides[ULAB_MAX_DIMS - 2];
+                            k++;
+                        } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                        #endif /* ULAB_MAX_DIMS > 1 */
+                    #if ULAB_MAX_DIMS > 2
+                        sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                        sarray += source->strides[ULAB_MAX_DIMS - 3];
+                        j++;
+                    } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+                    #endif /* ULAB_MAX_DIMS > 2 */
+                #if ULAB_MAX_DIMS > 3
+                    sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+                    sarray += source->strides[ULAB_MAX_DIMS - 4];
+                    i++;
+                } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+                #endif /* ULAB_MAX_DIMS > 3 */
+                return MP_OBJ_FROM_PTR(ndarray);
+            } else if(source->dtype == NDARRAY_FLOAT) {
+                uint8_t *sarray = (uint8_t *)source->array;
+                ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_COMPLEX);
+                mp_float_t *array = (mp_float_t *)ndarray->array;
+
+                #if ULAB_MAX_DIMS > 3
+                size_t i = 0;
+                do {
+                #endif
+                    #if ULAB_MAX_DIMS > 2
+                    size_t j = 0;
+                    do {
+                    #endif
+                        #if ULAB_MAX_DIMS > 1
+                        size_t k = 0;
+                        do {
+                        #endif
+                            size_t l = 0;
+                            do {
+                                mp_float_t value = *(mp_float_t *)sarray;
+                                if(value >= MICROPY_FLOAT_CONST(0.0)) {
+                                    *array++ = MICROPY_FLOAT_C_FUN(sqrt)(value);
+                                    array++;
+                                } else {
+                                    array++;
+                                    *array++ = MICROPY_FLOAT_C_FUN(sqrt)(-value);
+                                }
+                                sarray += source->strides[ULAB_MAX_DIMS - 1];
+                                l++;
+                            } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                        #if ULAB_MAX_DIMS > 1
+                            sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS-1];
+                            sarray += source->strides[ULAB_MAX_DIMS - 2];
+                            k++;
+                        } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                        #endif /* ULAB_MAX_DIMS > 1 */
+                    #if ULAB_MAX_DIMS > 2
+                        sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS-2];
+                        sarray += source->strides[ULAB_MAX_DIMS - 3];
+                        j++;
+                    } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+                    #endif /* ULAB_MAX_DIMS > 2 */
+                #if ULAB_MAX_DIMS > 3
+                    sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS-3];
+                    sarray += source->strides[ULAB_MAX_DIMS - 4];
+                    i++;
+                } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+                #endif /* ULAB_MAX_DIMS > 3 */
+                return MP_OBJ_FROM_PTR(ndarray);
+            } else {
+                mp_raise_TypeError(MP_ERROR_TEXT("input dtype must be float or complex"));
+            }
+        }
+    }
+    #endif /* ULAB_SUPPORTS_COMPLEX */
+    return vector_generic_vector(n_args, pos_args, kw_args, MICROPY_FLOAT_C_FUN(sqrt));
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_sqrt_obj, 1, vector_sqrt);
+#else
+MATH_FUN_1(sqrt, sqrt);
+MP_DEFINE_CONST_FUN_OBJ_1(vector_sqrt_obj, vector_sqrt);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD | ULAB_SUPPORTS_COMPLEX */
+#endif /* ULAB_NUMPY_HAS_SQRT */
+
+#if ULAB_NUMPY_HAS_TAN
+//| def tan(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the tangent"""
+//|    ...
+//|
+
+MATH_FUN_1(tan, tan);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_tan_obj, 1, vector_tan);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_tan_obj, vector_tan);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_TAN */
+
+#if ULAB_NUMPY_HAS_TANH
+//| def tanh(a: _ScalarOrArrayLike) -> _ScalarOrNdArray:
+//|    """Computes the hyperbolic tangent"""
+//|    ...
+
+MATH_FUN_1(tanh, tanh);
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_tanh_obj, 1, vector_tanh);
+#else
+MP_DEFINE_CONST_FUN_OBJ_1(vector_tanh_obj, vector_tanh);
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* ULAB_NUMPY_HAS_TANH */
+
+#if ULAB_NUMPY_HAS_VECTORIZE
+static mp_obj_t vector_vectorized_function_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+    (void) n_args;
+    (void) n_kw;
+    vectorized_function_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    mp_obj_t avalue[1];
+    mp_obj_t fvalue;
+    if(mp_obj_is_type(args[0], &ulab_ndarray_type)) {
+        ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0]);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(source->dtype)
+
+        ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, self->otypes);
+        uint8_t *sarray = (uint8_t *)source->array;
+        uint8_t *narray = (uint8_t *)ndarray->array;
+        
+        #if ULAB_MAX_DIMS > 3
+        size_t i = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 2
+            size_t j = 0;
+            do {
+            #endif
+                #if ULAB_MAX_DIMS > 1
+                size_t k = 0;
+                do {
+                #endif
+                    size_t l = 0;
+                    do {
+                        avalue[0] = mp_binary_get_val_array(source->dtype, sarray, 0);
+                        fvalue = MP_OBJ_TYPE_GET_SLOT(self->type, call)(self->fun, 1, 0, avalue);
+                        ndarray_set_value(self->otypes, narray, 0, fvalue);
+                        sarray += source->strides[ULAB_MAX_DIMS - 1];
+                        narray += ndarray->itemsize;
+                        l++;
+                    } while(l < source->shape[ULAB_MAX_DIMS - 1]);
+                #if ULAB_MAX_DIMS > 1
+                    sarray -= source->strides[ULAB_MAX_DIMS - 1] * source->shape[ULAB_MAX_DIMS - 1];
+                    sarray += source->strides[ULAB_MAX_DIMS - 2];
+                    k++;
+                } while(k < source->shape[ULAB_MAX_DIMS - 2]);
+                #endif /* ULAB_MAX_DIMS > 1 */
+            #if ULAB_MAX_DIMS > 2
+                sarray -= source->strides[ULAB_MAX_DIMS - 2] * source->shape[ULAB_MAX_DIMS - 2];
+                sarray += source->strides[ULAB_MAX_DIMS - 3];
+                j++;
+            } while(j < source->shape[ULAB_MAX_DIMS - 3]);
+            #endif /* ULAB_MAX_DIMS > 2 */
+        #if ULAB_MAX_DIMS > 3
+            sarray -= source->strides[ULAB_MAX_DIMS - 3] * source->shape[ULAB_MAX_DIMS - 3];
+            sarray += source->strides[ULAB_MAX_DIMS - 4];
+            i++;
+        } while(i < source->shape[ULAB_MAX_DIMS - 4]);
+        #endif /* ULAB_MAX_DIMS > 3 */
+        
+        return MP_OBJ_FROM_PTR(ndarray);
+    } else if(mp_obj_is_type(args[0], &mp_type_tuple) || mp_obj_is_type(args[0], &mp_type_list) ||
+        mp_obj_is_type(args[0], &mp_type_range)) { // i.e., the input is a generic iterable
+        size_t len = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[0]));
+        ndarray_obj_t *ndarray = ndarray_new_linear_array(len, self->otypes);
+        mp_obj_iter_buf_t iter_buf;
+        mp_obj_t iterable = mp_getiter(args[0], &iter_buf);
+        size_t i=0;
+        while ((avalue[0] = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+            fvalue = MP_OBJ_TYPE_GET_SLOT(self->type, call)(self->fun, 1, 0, avalue);
+            ndarray_set_value(self->otypes, ndarray->array, i, fvalue);
+            i++;
+        }
+        return MP_OBJ_FROM_PTR(ndarray);
+    } else if(mp_obj_is_int(args[0]) || mp_obj_is_float(args[0])) {
+        ndarray_obj_t *ndarray = ndarray_new_linear_array(1, self->otypes);
+        fvalue = MP_OBJ_TYPE_GET_SLOT(self->type, call)(self->fun, 1, 0, args);
+        ndarray_set_value(self->otypes, ndarray->array, 0, fvalue);
+        return MP_OBJ_FROM_PTR(ndarray);
+    } else {
+        mp_raise_ValueError(MP_ERROR_TEXT("wrong input type"));
+    }
+    return mp_const_none;
+}
+
+#if defined(MP_DEFINE_CONST_OBJ_TYPE)
+MP_DEFINE_CONST_OBJ_TYPE(
+    vector_function_type,
+    MP_QSTR_,
+    MP_TYPE_FLAG_NONE,
+    call, vector_vectorized_function_call
+);
+#else
+const mp_obj_type_t vector_function_type = {
+    { &mp_type_type },
+    .flags = MP_TYPE_FLAG_EXTENDED,
+    .name = MP_QSTR_,
+    MP_TYPE_EXTENDED_FIELDS(
+    .call = vector_vectorized_function_call,
+    )
+};
+#endif
+
+//| def vectorize(
+//|     f: Union[Callable[[int], _float], Callable[[_float], _float]],
+//|     *,
+//|     otypes: Optional[_DType] = None
+//| ) -> Callable[[_ScalarOrArrayLike], ulab.numpy.ndarray]:
+//|    """
+//|    :param callable f: The function to wrap
+//|    :param otypes: List of array types that may be returned by the function.  None is interpreted to mean the return value is float.
+//|
+//|    Wrap a Python function ``f`` so that it can be applied to arrays or scalars. A scalar passed to the wrapped function is treated as a single-element 1-D array.
+//|    The callable must return only values of the types specified by ``otypes``, or the result is undefined."""
+//|    ...
+//|
+
+static mp_obj_t vector_vectorize(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+        { MP_QSTR_otypes, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} }
+    };
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+    const mp_obj_type_t *type = mp_obj_get_type(args[0].u_obj);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a callable"));
+    }
+    mp_obj_t _otypes = args[1].u_obj;
+    uint8_t otypes = NDARRAY_FLOAT;
+    if(_otypes == mp_const_none) {
+        // TODO: is this what numpy does?
+        otypes = NDARRAY_FLOAT;
+    } else if(mp_obj_is_int(_otypes)) {
+        otypes = mp_obj_get_int(_otypes);
+        if(otypes != NDARRAY_FLOAT && otypes != NDARRAY_UINT8 && otypes != NDARRAY_INT8 &&
+            otypes != NDARRAY_UINT16 && otypes != NDARRAY_INT16) {
+                mp_raise_ValueError(MP_ERROR_TEXT("wrong output type"));
+        }
+    }
+    else {
+        mp_raise_ValueError(MP_ERROR_TEXT("wrong output type"));
+    }
+    vectorized_function_obj_t *function = m_new_obj(vectorized_function_obj_t);
+    function->base.type = &vector_function_type;
+    function->otypes = otypes;
+    function->fun = args[0].u_obj;
+    function->type = type;
+    return MP_OBJ_FROM_PTR(function);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(vector_vectorize_obj, 1, vector_vectorize);
+#endif
diff --git a/tulip/shared/ulab/code/numpy/vector.h b/tulip/shared/ulab/code/numpy/vector.h
new file mode 100644
index 000000000..2d22af8f3
--- /dev/null
+++ b/tulip/shared/ulab/code/numpy/vector.h
@@ -0,0 +1,319 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+*/
+
+#ifndef _VECTOR_
+#define _VECTOR_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_acos_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_acosh_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_asin_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_asinh_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_atan_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_atanh_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_ceil_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_cos_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_cosh_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_degrees_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_erf_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_erfc_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_exp_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_expm1_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_floor_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_gamma_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_lgamma_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_log_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_log10_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_log2_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_radians_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_sin_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_sinc_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_sinh_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_tan_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_tanh_obj);
+#else
+MP_DECLARE_CONST_FUN_OBJ_1(vector_acos_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_acosh_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_asin_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_asinh_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_atan_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_atanh_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_ceil_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_cos_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_cosh_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_degrees_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_erf_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_erfc_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_exp_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_expm1_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_floor_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_gamma_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_lgamma_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_log_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_log10_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_log2_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_radians_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_sin_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_sinc_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_sinh_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_tan_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(vector_tanh_obj);
+#endif
+
+MP_DECLARE_CONST_FUN_OBJ_2(vector_arctan2_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_around_obj);
+
+#if ULAB_SUPPORTS_COMPLEX | ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_sqrt_obj);
+#else
+MP_DECLARE_CONST_FUN_OBJ_1(vector_sqrt_obj);
+#endif
+
+MP_DECLARE_CONST_FUN_OBJ_KW(vector_vectorize_obj);
+
+typedef struct _vectorized_function_obj_t {
+    mp_obj_base_t base;
+    uint8_t otypes;
+    mp_obj_t fun;
+    const mp_obj_type_t *type;
+} vectorized_function_obj_t;
+
+
+#if ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+
+#if ULAB_HAS_FUNCTION_ITERATOR
+#define ITERATE_VECTOR(type, target, tarray, tstrides, source, sarray)\
+({\
+    size_t *scoords = ndarray_new_coords((source)->ndim);\
+    for(size_t i = 0; i < (source)->len / (source)->shape[ULAB_MAX_DIMS - 1]; i++) {\
+        for(size_t l = 0; l < (source)->shape[ULAB_MAX_DIMS - 1]; l++) {\
+            *(tarray) = f(*((type *)(sarray)));\
+            (tarray) += (tstrides)[ULAB_MAX_DIMS - 1];\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+        }\
+        ndarray_rewind_array((source)->ndim, sarray, (source)->shape, (source)->strides, scoords);\
+    }\
+})
+
+#else 
+
+#if ULAB_MAX_DIMS == 1
+#define ITERATE_VECTOR(type, target, tarray, tstrides, source, sarray) do {\
+    size_t l = 0;\
+    do {\
+        *(tarray) = f(*((type *)(sarray)));\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+        (tarray) += (tstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define ITERATE_VECTOR(type, target, tarray, tstrides, source, sarray) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *(tarray) = f(*((type *)(sarray)));\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+            (tarray) += (tstrides)[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\
+        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\
+        (tarray) -= (tstrides)[ULAB_MAX_DIMS - 1] * (target)->shape[ULAB_MAX_DIMS - 1];\
+        (tarray) += (tstrides)[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (source)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define ITERATE_VECTOR(type, target, tarray, tstrides, source, sarray) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *(tarray) = f(*((type *)(sarray)));\
+                (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+                (tarray) += (tstrides)[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\
+            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\
+            (tarray) -= (tstrides)[ULAB_MAX_DIMS - 1] * (target)->shape[ULAB_MAX_DIMS - 1];\
+            (tarray) += (tstrides)[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (source)->shape[ULAB_MAX_DIMS-2]);\
+        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS - 2];\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];\
+        (tarray) -= (tstrides)[ULAB_MAX_DIMS - 2] * (target)->shape[ULAB_MAX_DIMS - 2];\
+        (tarray) += (tstrides)[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (source)->shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define ITERATE_VECTOR(type, target, tshape, tstrides, source, sarray) do {\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *(tarray) = f(*((type *)(sarray)));\
+                    (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+                    (tarray) += (tstrides)[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\
+                (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\
+                (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\
+                (tarray) -= (tstrides)[ULAB_MAX_DIMS - 1] * (target)->shape[ULAB_MAX_DIMS - 1];\
+                (tarray) += (tstrides)[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (source)->shape[ULAB_MAX_DIMS-2]);\
+            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS - 2];\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];\
+            (tarray) -= (tstrides)[ULAB_MAX_DIMS - 2] * (target)->shape[ULAB_MAX_DIMS - 2];\
+            (tarray) += (tstrides)[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (source)->shape[ULAB_MAX_DIMS - 3]);\
+        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 3] * (source)->shape[ULAB_MAX_DIMS - 3];\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 4];\
+        (tarray) -= (tstrides)[ULAB_MAX_DIMS - 3] * (target)->shape[ULAB_MAX_DIMS - 3];\
+        (tarray) += (tstrides)[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (source)->shape[ULAB_MAX_DIMS - 4]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 4 */
+#endif /* ULAB_HAS_FUNCTION_ITERATOR */
+
+#define MATH_FUN_1(py_name, c_name) \
+    static mp_obj_t vector_ ## py_name(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { \
+        return vector_generic_vector(n_args, pos_args, kw_args, MICROPY_FLOAT_C_FUN(c_name)); \
+}
+
+#else /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+
+#if ULAB_HAS_FUNCTION_ITERATOR
+#define ITERATE_VECTOR(type, array, source, sarray, shift)\
+({\
+    size_t *scoords = ndarray_new_coords((source)->ndim);\
+    for(size_t i=0; i < (source)->len / (source)->shape[ULAB_MAX_DIMS - 1]; i++) {\
+        for(size_t l = 0; l < (source)->shape[ULAB_MAX_DIMS - 1]; l++) {\
+            *(array) = f(*((type *)(sarray)));\
+            (array)++;\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+        }\
+        ndarray_rewind_array((source)->ndim, sarray, (source)->shape, (source)->strides, scoords);\
+    }\
+})
+
+#else 
+
+#if ULAB_MAX_DIMS == 1
+#define ITERATE_VECTOR(type, array, source, sarray) do {\
+    size_t l = 0;\
+    do {\
+        *(array)++ = f(*((type *)(sarray)));\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (source)->shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define ITERATE_VECTOR(type, array, source, sarray) do {\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            *(array)++ = f(*((type *)(sarray)));\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+            l++;\
+        } while(l < (source)->shape[ULAB_MAX_DIMS - 1]);\
+        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (source)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define ITERATE_VECTOR(type, array, source, sarray) do {\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                *(array)++ = f(*((type *)(sarray)));\
+                (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+                l++;\
+            } while(l < (source)->shape[ULAB_MAX_DIMS - 1]);\
+            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\
+            k++;\
+        } while(k < (source)->shape[ULAB_MAX_DIMS - 2]);\
+        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS - 2];\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (source)->shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define ITERATE_VECTOR(type, array, source, sarray) do {\
+    size_t i = 0;\
+    do {\
+        size_t j = 0;\
+        do {\
+            size_t k = 0;\
+            do {\
+                size_t l = 0;\
+                do {\
+                    *(array)++ = f(*((type *)(sarray)));\
+                    (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\
+                    l++;\
+                } while(l < (source)->shape[ULAB_MAX_DIMS - 1]);\
+                (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\
+                (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\
+                k++;\
+            } while(k < (source)->shape[ULAB_MAX_DIMS - 2]);\
+            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS - 2];\
+            (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];\
+            j++;\
+        } while(j < (source)->shape[ULAB_MAX_DIMS - 3]);\
+        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 3] * (source)->shape[ULAB_MAX_DIMS - 3];\
+        (sarray) += (source)->strides[ULAB_MAX_DIMS - 4];\
+        i++;\
+    } while(i < (source)->shape[ULAB_MAX_DIMS - 4]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 4 */
+
+#endif /* ULAB_HAS_FUNCTION_ITERATOR */
+
+#define MATH_FUN_1(py_name, c_name) \
+    static mp_obj_t vector_ ## py_name(mp_obj_t x_obj) { \
+        return vector_generic_vector(x_obj, MICROPY_FLOAT_C_FUN(c_name)); \
+}
+#endif /* ULAB_MATH_FUNCTIONS_OUT_KEYWORD */
+#endif /* _VECTOR_ */
diff --git a/tulip/shared/ulab/code/scipy/integrate/integrate.c b/tulip/shared/ulab/code/scipy/integrate/integrate.c
new file mode 100644
index 000000000..80def7113
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/integrate/integrate.c
@@ -0,0 +1,701 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2024 Harald Milz <hm@seneca.muc.de>
+ *
+ * References:
+ * - Dr. Robert van Engelen, Improving the mp_float_t Exponential Quadrature Tanh-Sinh, Sinh-Sinh and Exp-Sinh Formulas,
+ *   2021, https://www.genivia.com/qthsh.html 
+ * - Borwein, Bailey & Girgensohn, "Experimentation in Mathematics - Computational Paths to Discovery", A K Peters,
+ *   2003, pages 312-313
+ * - Joren Vanherck, Bart Sorée, Wim Magnus, Tanh-sinh quadrature for single and multiple integration using 
+ *   floating-point arithmetic, 2020, https://arxiv.org/abs/2007.15057
+ * - Tanh-Sinh quadrature, Wikipedia, https://en.wikipedia.org/wiki/Tanh-sinh_quadrature
+ * - Romberg's method, Wikipedia, https://en.wikipedia.org/wiki/Romberg%27s_method
+ * - Adaptive Simpson's method, Wikipedia, https://en.wikipedia.org/wiki/Adaptive_Simpson%27s_method
+ * - Gauss–Kronrod quadrature formula, Wikipedia, https://en.wikipedia.org/wiki/Gauss%E2%80%93Kronrod_quadrature_formula
+ *  
+ * This module provides four integration methods, and thus deviates from scipy.integrate a bit. 
+ * As for the pros and cons of the different methods please consult the literature above. 
+ * The code was ported to Micropython from Dr. Engelen's paper and used with his written kind permission
+ * - quad    - Tanh-Sinh, Sinh-Sinh and Exp-Sinh quadrature
+ * - romberg - Romberg quadrature
+ * - simpson - Adaptive Simpson quadrature
+ * - quadgk  - Adaptive Gauss-Kronrod (G10,K21) quadrature
+ */
+
+#include <math.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+#include "py/objtuple.h"
+
+#include "../../ndarray.h"
+#include "../../ulab.h"
+#include "../../ulab_tools.h"
+#include "integrate.h"
+
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+ULAB_DEFINE_FLOAT_CONST(etolerance, MICROPY_FLOAT_CONST(1e-14), 0x283424dcUL, 0x3e901b2b29a4692bULL);
+#define MACHEPS        MICROPY_FLOAT_CONST(1e-17)
+#else
+ULAB_DEFINE_FLOAT_CONST(etolerance, MICROPY_FLOAT_CONST(1e-8), 0x358637cfUL, 0x3e7010c6f7d42d18ULL);
+#define MACHEPS        MICROPY_FLOAT_CONST(1e-8)
+#endif
+
+#define ZERO           MICROPY_FLOAT_CONST(0.0)
+#define POINT_TWO_FIVE MICROPY_FLOAT_CONST(0.25)
+#define ONE            MICROPY_FLOAT_CONST(1.0)
+#define TWO            MICROPY_FLOAT_CONST(2.0)
+#define FOUR           MICROPY_FLOAT_CONST(4.0)
+#define SIX            MICROPY_FLOAT_CONST(6.0)
+#define TEN            MICROPY_FLOAT_CONST(10.0)
+#define FIFTEEN        MICROPY_FLOAT_CONST(15.0)
+#define EPS_5          MICROPY_FLOAT_CONST(1e-5)
+
+
+static mp_float_t integrate_python_call(const mp_obj_type_t *type, mp_obj_t fun, mp_float_t x, mp_obj_t *fargs, uint8_t nparams) {
+    // Helper function for calculating the value of f(x, a, b, c, ...),
+    // where f is defined in python. Takes a float, returns a float.
+    // The array of mp_obj_t type must be supplied, as must the number of parameters (a, b, c...) in nparams
+    fargs[0] = mp_obj_new_float(x);
+    return mp_obj_get_float(MP_OBJ_TYPE_GET_SLOT(type, call)(fun, nparams+1, 0, fargs));
+}
+
+// sign helper function
+int sign(mp_float_t x) {
+    if (x >= ZERO) 
+        return 1;
+    else
+        return -1;
+}        
+
+
+#if ULAB_INTEGRATE_HAS_TANHSINH
+// Tanh-Sinh, Sinh-Sinh and Exp-Sinh quadrature
+// https://www.genivia.com/qthsh.html
+
+// return optimized Exp-Sinh integral split point d
+mp_float_t exp_sinh_opt_d(mp_float_t (*fun)(mp_float_t), mp_float_t a, mp_float_t eps, mp_float_t d) {
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    mp_obj_t fargs[1];
+    mp_float_t h2 = integrate_python_call(type, fun, a + d/2, fargs, 0) - integrate_python_call(type, fun, (a + d*2)*4, fargs, 0);
+    int i = 1, j = 32;                   // j=32 is optimal to find r
+    if (isfinite(h2) && MICROPY_FLOAT_C_FUN(fabs)(h2) > EPS_5) {    // if |h2| > 2^-16
+        mp_float_t r, fl, fr, h, s = 0, lfl, lfr, lr = 2;
+        do {                                  // find max j such that fl and fr are finite
+            j /= 2;
+            r = 1 << (i + j);
+            fl = integrate_python_call(type, fun, a + d/r, fargs, 0);
+            fr = integrate_python_call(type, fun, (a + d*r)*r*r, fargs, 0);
+            h = fl - fr;
+        } while (j > 1 && !isfinite(h));
+        if (j > 1 && isfinite(h) && sign(h) != sign(h2)) {
+            lfl = fl;                         // last fl=f(a+d/r)
+            lfr = fr;                         // last fr=f(a+d*r)*r*r
+            do {                              // bisect in 4 iterations
+                j /= 2;
+                r = 1 << (i + j);
+                fl = integrate_python_call(type, fun, a + d/r, fargs, 0);
+                fr = integrate_python_call(type, fun, (a + d*r)*r*r, fargs, 0);
+                h = fl - fr;
+                if (isfinite(h)) {
+                    s += MICROPY_FLOAT_C_FUN(fabs)(h);  // sum |h| to remove noisy cases
+                    if (sign(h) == sign(h2)) {
+                        i += j;               // search right half
+                    }
+                    else {                    // search left half
+                        lfl = fl;             // record last fl=f(a+d/r)
+                        lfr = fr;             // record last fl=f(a+d*r)*r*r
+                        lr = r;               // record last r
+                    }
+                }
+            } while (j > 1);
+            if (s > eps) {                    // if sum of |h| > eps
+                h = lfl - lfr;                // use last fl and fr before the sign change
+                r = lr;                       // use last r before the sign change
+                if (h != ZERO)                   // if last diff != 0, back up r by one step
+                    r /= TWO;
+                if (MICROPY_FLOAT_C_FUN(fabs)(lfl) < MICROPY_FLOAT_C_FUN(fabs)(lfr))
+                    d /= r;                   // move d closer to the finite endpoint
+                else
+                    d *= r;                   // move d closer to the infinite endpoint
+            }
+        }
+    }
+    return d;
+}
+
+
+// integrate function f, range a..b, max levels n, error tolerance eps
+mp_float_t tanhsinh(mp_float_t (*fun)(mp_float_t), mp_float_t a, mp_float_t b, uint16_t n, mp_float_t eps, mp_float_t *e) {
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    mp_obj_t fargs[1];
+    const mp_float_t tol = TEN*eps;
+    mp_float_t c = ZERO, d = ONE, s, sign = ONE, v, h = TWO;
+    int k = 0, mode = 0;                   // Tanh-Sinh = 0, Exp-Sinh = 1, Sinh-Sinh = 2
+    if (b < a) {                                // swap bounds
+        v = b;
+        b = a;
+        a = v;
+        sign = -1;
+    }
+    if (isfinite(a) && isfinite(b)) {
+        c = (a+b)/TWO;
+        d = (b-a)/TWO;
+        v = c;
+    }
+    else if (isfinite(a)) {
+        mode = 1;                               // Exp-Sinh
+        d = exp_sinh_opt_d(fun, a, eps, d);
+        c = a;
+        v = a+d;
+    }
+    else if (isfinite(b)) {
+        mode = 1;                               // Exp-Sinh
+        // d = -d;
+        d = exp_sinh_opt_d(fun, b, eps, -d);
+        sign = -sign;
+        c = b;
+        v = b+d;
+    }
+    else {
+        mode = 2;                               // Sinh-Sinh
+        v = ZERO;
+    }
+    s = integrate_python_call(type, fun, v, fargs, 0);
+    do {
+        mp_float_t p = ZERO, q, fp = ZERO, fm = ZERO, t, eh;
+        h /= TWO;
+        t = eh = MICROPY_FLOAT_C_FUN(exp)(h);
+        if (k > ZERO)
+           eh *= eh;
+        if (mode == 0) {                        // Tanh-Sinh
+            do {
+                mp_float_t u = MICROPY_FLOAT_C_FUN(exp)(ONE / t - t); // = exp(-2*sinh(j*h)) = 1/exp(sinh(j*h))^2
+                mp_float_t r = TWO * u / (ONE + u);                   // = 1 - tanh(sinh(j*h))
+                mp_float_t w = (t + ONE / t) * r / (ONE + u);         // = cosh(j*h)/cosh(sinh(j*h))^2
+                mp_float_t x = d*r;
+                if (a+x > a) {                  // if too close to a then reuse previous fp
+                    mp_float_t y = integrate_python_call(type, fun, a+x, fargs, 0);
+                    if (isfinite(y))
+                        fp = y;                 // if f(x) is finite, add to local sum
+                }
+                if (b-x < b) {                  // if too close to a then reuse previous fp
+                    mp_float_t y = integrate_python_call(type, fun, b-x, fargs, 0);
+                    if (isfinite(y))
+                        fm = y;                 // if f(x) is finite, add to local sum
+                }
+                q = w*(fp+fm);
+                p += q;
+                t *= eh;
+            } while (MICROPY_FLOAT_C_FUN(fabs)(q) > eps*MICROPY_FLOAT_C_FUN(fabs)(p));
+        }
+        else {
+            t /= TWO;
+            do {
+                mp_float_t r = MICROPY_FLOAT_C_FUN(exp)(t - POINT_TWO_FIVE / t);    // = exp(sinh(j*h))
+                mp_float_t x, y, w = r;
+                q = ZERO;
+                if (mode == 1) {                // Exp-Sinh
+                    x = c + d/r;
+                    if (x == c)                 // if x hit the finite endpoint then break
+                        break;
+                    y = integrate_python_call(type, fun, x, fargs, 0);
+                    if (isfinite(y))    // if f(x) is finite, add to local sum
+                        q += y/w;
+                }
+                else {                          // Sinh-Sinh
+                    r = (r - ONE / r) / TWO;              // = sinh(sinh(j*h))
+                    w = (w + ONE / w) / TWO;              // = cosh(sinh(j*h))
+                    x = c - d*r;
+                    y = integrate_python_call(type, fun, x, fargs, 0);
+                    if (isfinite(y))    // if f(x) is finite, add to local sum
+                        q += y*w;
+                }
+                x = c + d*r;
+                y = integrate_python_call(type, fun, x, fargs, 0);
+                if (isfinite(y))        // if f(x) is finite, add to local sum
+                    q += y*w;
+                q *= t + POINT_TWO_FIVE / t;                   // q *= cosh(j*h)
+                p += q;
+                t *= eh;
+            } while (MICROPY_FLOAT_C_FUN(fabs)(q) > eps*MICROPY_FLOAT_C_FUN(fabs)(p));
+        }
+        v = s-p;
+        s += p;
+        ++k;
+    } while (MICROPY_FLOAT_C_FUN(fabs)(v) > tol*MICROPY_FLOAT_C_FUN(fabs)(s) && k <= n);
+    // return the error estimate by reference
+    *e = MICROPY_FLOAT_C_FUN(fabs)(v)/(MICROPY_FLOAT_C_FUN(fabs)(s)+eps);
+    return sign*d*s*h;                          // result with estimated relative error e
+}
+
+//| def tanhsinh(
+//|     fun: Callable[[float], float],
+//|     a: float,
+//|     b: float,
+//|     *,
+//|     levels: int = 6
+//|     eps: float = etolerance
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to integrate
+//|     :param float a: The lower integration limit
+//|     :param float b: The upper integration limit
+//|     :param float levels: The number of levels to perform (6..7 is optimal)
+//|     :param float eps: The error tolerance value 
+//|
+//|     Find a quadrature of the function ``f(x)`` on the interval
+//|     (``a``..``b``) using an optimized double exponential.  The result is accurate to within
+//|     ``eps`` unless more than ``levels`` levels are required."""
+//|
+
+
+static mp_obj_t integrate_tanhsinh(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_levels, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 6} },
+        { MP_QSTR_eps, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(etolerance)} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a callable"));
+    }
+
+	// iterate over args 1, 2, and 4
+	// arg 3 will be handled by MP_ARG_INT above. 
+	for (int i=1; i<=4; i*=2) {
+		type = mp_obj_get_type(args[i].u_obj); 
+		if (type != &mp_type_float && type != &mp_type_int) {
+	        mp_raise_msg_varg(&mp_type_TypeError,
+	            MP_ERROR_TEXT("can't convert arg %d from %s to float"), i, mp_obj_get_type_str(args[i].u_obj));
+		}			
+	}
+    mp_float_t a = mp_obj_get_float(args[1].u_obj);
+    mp_float_t b = mp_obj_get_float(args[2].u_obj);
+    uint16_t n = (uint16_t)args[3].u_int;
+	if (n < 1) {
+		mp_raise_ValueError(MP_ERROR_TEXT("levels needs to be a positive integer"));
+    }			
+    mp_float_t eps = mp_obj_get_float(args[4].u_obj);
+    
+    mp_obj_t res[2];
+    mp_float_t e;
+    res[0] = mp_obj_new_float(tanhsinh(fun, a, b, n, eps, &e));
+    res[1] = mp_obj_new_float(e);
+    return mp_obj_new_tuple(2, res); 
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(integrate_tanhsinh_obj, 2, integrate_tanhsinh);
+#endif /* ULAB_INTEGRATE_HAS_TANHSINH */
+
+#if ULAB_INTEGRATE_HAS_ROMBERG
+// Romberg quadrature
+// This function is deprecated as of SciPy 1.12.0 and will be removed in SciPy 1.15.0. Please use scipy.integrate.quad instead. 
+// https://en.wikipedia.org/wiki/Romberg%27s_method, https://www.genivia.com/qthsh.html, 
+// https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.romberg.html (which is different 
+// insofar as the latter expects an array of function values). 
+
+mp_float_t qromb(mp_float_t (*fun)(mp_float_t), mp_float_t a, mp_float_t b, uint16_t n, mp_float_t eps) {
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    mp_obj_t fargs[1];
+    mp_float_t R1[n], R2[n];
+    mp_float_t *Ro = &R1[0], *Ru = &R2[0];
+    mp_float_t h = b-a;
+    uint16_t i, j;
+    Ro[0] = (integrate_python_call(type, fun, a, fargs, 0) + integrate_python_call(type, fun, b, fargs, 0)) * h/2;
+    for (i = 1; i < n; ++i) {
+        unsigned long long k = 1UL << i;
+        unsigned long long s = 1;
+        mp_float_t sum = ZERO;
+        mp_float_t *Rt;
+        h /= TWO;
+        for (j = 1; j < k; j += 2)
+            sum += integrate_python_call(type, fun, a+j*h, fargs, 0);
+        Ru[0] = h*sum + Ro[0] / TWO;
+        for (j = 1; j <= i; ++j) {
+            s <<= 2;
+            Ru[j] = (s*Ru[j-1] - Ro[j-1])/(s-1);
+        }
+        if (i > 2 && MICROPY_FLOAT_C_FUN(fabs)(Ro[i-1]-Ru[i]) <= eps*MICROPY_FLOAT_C_FUN(fabs)(Ru[i])+eps)
+            return Ru[i];
+        Rt = Ro;
+        Ro = Ru;
+        Ru = Rt;
+    }
+    return Ro[n-1];
+}
+
+//| def romberg(
+//|     fun: Callable[[float], float],
+//|     a: float,
+//|     b: float,
+//|     *,
+//|     steps: int = 100
+//|     eps: float = etolerance
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to integrate
+//|     :param float a: The lower integration limit
+//|     :param float b: The upper integration limit
+//|     :param float steps: The number of equidistant steps
+//|     :param float eps: The tolerance value
+//|
+//|     Find a quadrature of the function ``f(x)`` on the interval
+//|     (``a``..``b``) using the Romberg method.  The result is accurate to within
+//|     ``eps`` unless more than ``steps`` steps are required."""
+//|     ...
+//|
+
+static mp_obj_t integrate_romberg(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_steps, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 100} },
+        { MP_QSTR_eps, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(etolerance)} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a callable"));
+    }
+
+	// iterate over args 1, 2, and 4
+	// arg 3 will be handled by MP_ARG_INT above. 
+	for (int i=1; i<=4; i*=2) {
+		type = mp_obj_get_type(args[i].u_obj); 
+		if (type != &mp_type_float && type != &mp_type_int) {
+	        mp_raise_msg_varg(&mp_type_TypeError,
+	            MP_ERROR_TEXT("can't convert arg %d from %s to float"), i, mp_obj_get_type_str(args[i].u_obj));
+		}			
+	}
+    mp_float_t a = mp_obj_get_float(args[1].u_obj);
+    mp_float_t b = mp_obj_get_float(args[2].u_obj);
+    uint16_t steps = (uint16_t)args[3].u_int;
+	if (steps < 1) {
+		mp_raise_ValueError(MP_ERROR_TEXT("steps needs to be a positive integer"));
+    }			
+    mp_float_t eps = mp_obj_get_float(args[4].u_obj);
+    
+    return mp_obj_new_float(qromb(fun, a, b, steps, eps)); 
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(integrate_romberg_obj, 2, integrate_romberg);
+#endif /* ULAB_INTEGRATE_HAS_ROMBERG */
+
+#if ULAB_INTEGRATE_HAS_SIMPSON
+// Adaptive Simpson quadrature
+// https://en.wikipedia.org/wiki/Adaptive_Simpson%27s_method, https://www.genivia.com/qthsh.html
+
+mp_float_t as(mp_float_t (*fun)(mp_float_t), mp_float_t a, mp_float_t b, mp_float_t fa, mp_float_t fm,
+              mp_float_t fb, mp_float_t v, mp_float_t eps, int n, mp_float_t t) {
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    mp_obj_t fargs[1];
+    mp_float_t h = (b-a) / TWO;
+    mp_float_t f1 = integrate_python_call(type, fun, a + h / TWO, fargs, 0);
+    mp_float_t f2 = integrate_python_call(type, fun, b - h / TWO, fargs, 0);
+    mp_float_t sl = h*(fa + FOUR * f1 + fm) / SIX;
+    mp_float_t sr = h*(fm + FOUR * f2 + fb) / SIX;
+    mp_float_t s = sl+sr;
+    mp_float_t d = (s-v) / FIFTEEN;
+    mp_float_t m = a+h;
+    if (n <= 0 || MICROPY_FLOAT_C_FUN(fabs)(d) < eps)
+        return t + s + d; // note: fabs(d) can be used as error estimate
+    eps /= TWO;
+    --n;
+    t = as(fun, a, m, fa, f1, fm, sl, eps, n, t);
+    return as(fun, m, b, fm, f2, fb, sr, eps, n, t);
+}
+
+mp_float_t qasi(mp_float_t (*fun)(mp_float_t), mp_float_t a, mp_float_t b, int n, mp_float_t eps) {
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    mp_obj_t fargs[1];
+    mp_float_t fa = integrate_python_call(type, fun, a, fargs, 0);
+    mp_float_t fm = integrate_python_call(type, fun, (a+b)/2, fargs, 0);
+    mp_float_t fb = integrate_python_call(type, fun, b, fargs, 0);
+    mp_float_t v = (fa + FOUR * fm + fb) * (b-a) / SIX;
+    return as(fun, a, b, fa, fm, fb, v, eps, n, 0);
+}
+
+//| def simpson(
+//|     fun: Callable[[float], float],
+//|     a: float,
+//|     b: float,
+//|     *,
+//|     steps: int = 100
+//|     eps: float = etolerance
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to integrate
+//|     :param float a: The lower integration limit
+//|     :param float b: The upper integration limit
+//|     :param float steps: The number of equidistant steps
+//|     :param float eps: The tolerance value
+//|
+//|     Find a quadrature of the function ``f(x)`` on the interval
+//|     (``a``..``b``) using the Adaptive Simpson's method.  The result is accurate to within
+//|     ``eps`` unless more than ``steps`` steps are required."""
+//|     ...
+//|
+
+static mp_obj_t integrate_simpson(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_steps, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 100} },
+        { MP_QSTR_eps, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(etolerance)} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a function"));
+    }
+
+	// iterate over args 1, 2, and 4
+	// arg 3 will be handled by MP_ARG_INT above. 
+	for (int i=1; i<=4; i*=2) {
+		type = mp_obj_get_type(args[i].u_obj); 
+		if (type != &mp_type_float && type != &mp_type_int) {
+	        mp_raise_msg_varg(&mp_type_TypeError,
+	            MP_ERROR_TEXT("can't convert arg %d from %s to float"), i, mp_obj_get_type_str(args[i].u_obj));
+		}			
+	}
+    mp_float_t a = mp_obj_get_float(args[1].u_obj);
+    mp_float_t b = mp_obj_get_float(args[2].u_obj);
+    uint16_t steps = (uint16_t)args[3].u_int;
+	if (steps < 1) {
+		mp_raise_ValueError(MP_ERROR_TEXT("steps needs to be a positive integer"));
+    }			
+    mp_float_t eps = mp_obj_get_float(args[4].u_obj);
+    
+    return mp_obj_new_float(qasi(fun, a, b, steps, eps)); 
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(integrate_simpson_obj, 2, integrate_simpson);
+#endif /* ULAB_INTEGRATE_HAS_SIMPSON */
+
+#if ULAB_INTEGRATE_HAS_QUAD
+// Adaptive Gauss-Kronrod (G10,K21) quadrature
+// https://en.wikipedia.org/wiki/Gauss%E2%80%93Kronrod_quadrature_formula, https://www.genivia.com/qthsh.html
+
+mp_float_t gk(mp_float_t (*fun)(mp_float_t), mp_float_t c, mp_float_t d, mp_float_t *err) {
+// abscissas and weights pre-calculated with Legendre Stieltjes polynomials
+    static const mp_float_t abscissas[21] = {
+        MICROPY_FLOAT_CONST(0.00000000000000000e+00),
+        MICROPY_FLOAT_CONST(7.65265211334973338e-02),
+        MICROPY_FLOAT_CONST(1.52605465240922676e-01),
+        MICROPY_FLOAT_CONST(2.27785851141645078e-01),
+        MICROPY_FLOAT_CONST(3.01627868114913004e-01),
+        MICROPY_FLOAT_CONST(3.73706088715419561e-01),
+        MICROPY_FLOAT_CONST(4.43593175238725103e-01),
+        MICROPY_FLOAT_CONST(5.10867001950827098e-01),
+        MICROPY_FLOAT_CONST(5.75140446819710315e-01),
+        MICROPY_FLOAT_CONST(6.36053680726515025e-01),
+        MICROPY_FLOAT_CONST(6.93237656334751385e-01),
+        MICROPY_FLOAT_CONST(7.46331906460150793e-01),
+        MICROPY_FLOAT_CONST(7.95041428837551198e-01),
+        MICROPY_FLOAT_CONST(8.39116971822218823e-01),
+        MICROPY_FLOAT_CONST(8.78276811252281976e-01),
+        MICROPY_FLOAT_CONST(9.12234428251325906e-01),
+        MICROPY_FLOAT_CONST(9.40822633831754754e-01),
+        MICROPY_FLOAT_CONST(9.63971927277913791e-01),
+        MICROPY_FLOAT_CONST(9.81507877450250259e-01),
+        MICROPY_FLOAT_CONST(9.93128599185094925e-01),
+        MICROPY_FLOAT_CONST(9.98859031588277664e-01),
+    };
+    static const mp_float_t weights[21] = {
+        MICROPY_FLOAT_CONST(7.66007119179996564e-02),
+        MICROPY_FLOAT_CONST(7.63778676720807367e-02),
+        MICROPY_FLOAT_CONST(7.57044976845566747e-02),
+        MICROPY_FLOAT_CONST(7.45828754004991890e-02),
+        MICROPY_FLOAT_CONST(7.30306903327866675e-02),
+        MICROPY_FLOAT_CONST(7.10544235534440683e-02),
+        MICROPY_FLOAT_CONST(6.86486729285216193e-02),
+        MICROPY_FLOAT_CONST(6.58345971336184221e-02),
+        MICROPY_FLOAT_CONST(6.26532375547811680e-02),
+        MICROPY_FLOAT_CONST(5.91114008806395724e-02),
+        MICROPY_FLOAT_CONST(5.51951053482859947e-02),
+        MICROPY_FLOAT_CONST(5.09445739237286919e-02),
+        MICROPY_FLOAT_CONST(4.64348218674976747e-02),
+        MICROPY_FLOAT_CONST(4.16688733279736863e-02),
+        MICROPY_FLOAT_CONST(3.66001697582007980e-02),
+        MICROPY_FLOAT_CONST(3.12873067770327990e-02),
+        MICROPY_FLOAT_CONST(2.58821336049511588e-02),
+        MICROPY_FLOAT_CONST(2.03883734612665236e-02),
+        MICROPY_FLOAT_CONST(1.46261692569712530e-02),
+        MICROPY_FLOAT_CONST(8.60026985564294220e-03),
+        MICROPY_FLOAT_CONST(3.07358371852053150e-03),
+    };
+    static const mp_float_t gauss_weights[10] = {
+        MICROPY_FLOAT_CONST(1.52753387130725851e-01),
+        MICROPY_FLOAT_CONST(1.49172986472603747e-01),
+        MICROPY_FLOAT_CONST(1.42096109318382051e-01),
+        MICROPY_FLOAT_CONST(1.31688638449176627e-01),
+        MICROPY_FLOAT_CONST(1.18194531961518417e-01),
+        MICROPY_FLOAT_CONST(1.01930119817240435e-01),
+        MICROPY_FLOAT_CONST(8.32767415767047487e-02),
+        MICROPY_FLOAT_CONST(6.26720483341090636e-02),
+        MICROPY_FLOAT_CONST(4.06014298003869413e-02),
+        MICROPY_FLOAT_CONST(1.76140071391521183e-02),
+    };
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    mp_obj_t fargs[1];
+    mp_float_t p = ZERO; // kronrod quadrature sum
+    mp_float_t q = ZERO; // gauss quadrature sum
+    mp_float_t fp, fm;
+    mp_float_t e;
+    int i;
+    fp = integrate_python_call(type, fun, c, fargs, 0);
+    p = fp * weights[0];
+    for (i = 1; i < 21; i += 2) {
+        fp = integrate_python_call(type, fun, c + d * abscissas[i], fargs, 0);
+        fm = integrate_python_call(type, fun, c - d * abscissas[i], fargs, 0);
+        p += (fp + fm) * weights[i];
+        q += (fp + fm) * gauss_weights[i/2];
+    }
+    for (i = 2; i < 21; i += 2) {
+        fp = integrate_python_call(type, fun, c + d * abscissas[i], fargs, 0);
+        fm = integrate_python_call(type, fun, c - d * abscissas[i], fargs, 0);
+        p += (fp + fm) * weights[i];
+    }
+    *err = MICROPY_FLOAT_C_FUN(fabs)(p - q);
+    e = MICROPY_FLOAT_C_FUN(fabs)(2 * p * MACHEPS); // optional, to take 1e-17 MachEps prec. into account
+    if (*err < e)
+        *err = e;
+    return p;
+}
+
+mp_float_t qakro(mp_float_t (*fun)(mp_float_t), mp_float_t a, mp_float_t b, int n, mp_float_t tol, mp_float_t eps, mp_float_t *err) {
+    mp_float_t c = (a+b) / TWO;
+    mp_float_t d = (b-a) / TWO;
+    mp_float_t e;
+    mp_float_t r = gk(fun, c, d, &e);
+    mp_float_t s = d*r;
+    mp_float_t t = MICROPY_FLOAT_C_FUN(fabs)(s*tol);
+    if (tol == ZERO)
+        tol = t;
+    if (n > 0 && t < e && tol < e) {
+        s = qakro(fun, a, c, n-1, t / TWO, eps, err);
+        s += qakro(fun, c, b, n-1, t / TWO, eps, &e);
+        *err += e;
+        return s;
+    }
+    *err = e;
+    return s;
+}
+
+
+//| def quad(
+//|     fun: Callable[[float], float],
+//|     a: float,
+//|     b: float,
+//|     *,
+//|     order: int = 5
+//|     eps: float = etolerance
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to integrate
+//|     :param float a: The lower integration limit
+//|     :param float b: The upper integration limit
+//|     :param float order: Order of quadrature integration. Default is 5.
+//|     :param float eps: The tolerance value
+//|
+//|     Find a quadrature of the function ``f(x)`` on the interval
+//|     (``a``..``b``) using the Adaptive Gauss-Kronrod method.  The result is accurate to within
+//|     ``eps`` unless a higher order than ``order`` is required."""
+//|     ...
+//|
+
+static mp_obj_t integrate_quad(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_order, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 5} },
+        { MP_QSTR_eps, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(etolerance)} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a callable"));
+    }
+
+	// iterate over args 1, 2, and 4
+	// arg 3 will be handled by MP_ARG_INT above. 
+	for (int i=1; i<=4; i*=2) {
+		type = mp_obj_get_type(args[i].u_obj); 
+		if (type != &mp_type_float && type != &mp_type_int) {
+	        mp_raise_msg_varg(&mp_type_TypeError,
+	            MP_ERROR_TEXT("can't convert arg %d from %s to float"), i, mp_obj_get_type_str(args[i].u_obj));
+		}			
+	}
+    mp_float_t a = mp_obj_get_float(args[1].u_obj);
+    mp_float_t b = mp_obj_get_float(args[2].u_obj);
+    uint16_t order = (uint16_t)args[3].u_int;
+	if (order < 1) {
+		mp_raise_ValueError(MP_ERROR_TEXT("order needs to be a positive integer"));
+    }			
+    mp_float_t eps = mp_obj_get_float(args[4].u_obj);
+    
+    mp_obj_t res[2];
+    mp_float_t e;
+    res[0] = mp_obj_new_float(qakro(fun, a, b, order, 0, eps, &e));
+    res[1] = mp_obj_new_float(e);
+    return mp_obj_new_tuple(2, res); 
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(integrate_quad_obj, 2, integrate_quad);
+#endif /* ULAB_INTEGRATE_HAS_QUAD */
+
+static const mp_rom_map_elem_t ulab_scipy_integrate_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_integrate) },
+#if ULAB_INTEGRATE_HAS_TANHSINH
+    { MP_ROM_QSTR(MP_QSTR_tanhsinh), MP_ROM_PTR(&integrate_tanhsinh_obj) },
+#endif
+#if ULAB_INTEGRATE_HAS_ROMBERG	
+    { MP_ROM_QSTR(MP_QSTR_romberg), MP_ROM_PTR(&integrate_romberg_obj) },
+#endif	
+#if ULAB_INTEGRATE_HAS_SIMPSON	
+    { MP_ROM_QSTR(MP_QSTR_simpson), MP_ROM_PTR(&integrate_simpson_obj) },
+#endif
+#if ULAB_INTEGRATE_HAS_QUAD	
+    { MP_ROM_QSTR(MP_QSTR_quad), MP_ROM_PTR(&integrate_quad_obj) },
+#endif	
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_integrate_globals, ulab_scipy_integrate_globals_table);
+
+const mp_obj_module_t ulab_scipy_integrate_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_scipy_integrate_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_scipy_dot_integrate, ulab_scipy_integrate_module);
+#endif
+
diff --git a/tulip/shared/ulab/code/scipy/integrate/integrate.h b/tulip/shared/ulab/code/scipy/integrate/integrate.h
new file mode 100644
index 000000000..ebfac2ea6
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/integrate/integrate.h
@@ -0,0 +1,34 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2024 Harald Milz <hm@seneca.muc.de>
+ *
+*/
+
+#ifndef _SCIPY_INTEGRATE_
+#define _SCIPY_INTEGRATE_
+
+#include "../../ulab_tools.h"
+
+extern const mp_obj_module_t ulab_scipy_integrate_module;
+
+#if ULAB_INTEGRATE_HAS_TANHSINH
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_tanhsinh_obj);
+#endif
+#if ULAB_INTEGRATE_HAS_ROMBERG
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_romberg_obj);
+#endif
+#if ULAB_INTEGRATE_HAS_SIMPSON
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_simpson_obj);
+#endif
+#if ULAB_INTEGRATE_HAS_QUAD
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_quad_obj);
+#endif
+
+#endif /* _SCIPY_INTEGRATE_ */
+
diff --git a/tulip/shared/ulab/code/scipy/linalg/linalg.c b/tulip/shared/ulab/code/scipy/linalg/linalg.c
new file mode 100644
index 000000000..d7942d319
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/linalg/linalg.c
@@ -0,0 +1,281 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Vikas Udupa
+ *
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../../ulab.h"
+#include "../../ulab_tools.h"
+#include "../../numpy/linalg/linalg_tools.h"
+#include "linalg.h"
+
+#if ULAB_SCIPY_HAS_LINALG_MODULE
+//|
+//| import ulab.scipy
+//| import ulab.numpy
+//|
+//| """Linear algebra functions"""
+//|
+
+#if ULAB_MAX_DIMS > 1
+
+//| def solve_triangular(A: ulab.numpy.ndarray, b: ulab.numpy.ndarray, lower: bool) -> ulab.numpy.ndarray:
+//|    """
+//|    :param ~ulab.numpy.ndarray A: a matrix
+//|    :param ~ulab.numpy.ndarray b: a vector
+//|    :param ~bool lower: if true, use only data contained in lower triangle of A, else use upper triangle of A
+//|    :return: solution to the system A x = b. Shape of return matches b
+//|    :raises TypeError: if A and b are not of type ndarray and are not dense
+//|    :raises ValueError: if A is a singular matrix
+//|
+//|    Solve the equation A x = b for x, assuming A is a triangular matrix"""
+//|    ...
+//|
+
+static mp_obj_t solve_triangular(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+
+    size_t i, j;
+
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE} } ,
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE} } ,
+        { MP_QSTR_lower, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_TRUE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type) || !mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first two arguments must be ndarrays"));
+    }
+
+    ndarray_obj_t *A = MP_OBJ_TO_PTR(args[0].u_obj);
+    ndarray_obj_t *b = MP_OBJ_TO_PTR(args[1].u_obj);
+
+    if(!ndarray_is_dense(A) || !ndarray_is_dense(b)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be a dense ndarray"));
+    }
+
+    size_t A_rows = A->shape[ULAB_MAX_DIMS - 2];
+    size_t A_cols = A->shape[ULAB_MAX_DIMS - 1];
+
+    uint8_t *A_arr = (uint8_t *)A->array;
+    uint8_t *b_arr = (uint8_t *)b->array;
+
+    mp_float_t (*get_A_ele)(void *) = ndarray_get_float_function(A->dtype);
+    mp_float_t (*get_b_ele)(void *) = ndarray_get_float_function(b->dtype);
+
+    uint8_t *temp_A = A_arr;
+
+    // check if input matrix A is singular
+    for (i = 0; i < A_rows; i++) {
+        if (MICROPY_FLOAT_C_FUN(fabs)(get_A_ele(A_arr)) < LINALG_EPSILON)
+            mp_raise_ValueError(MP_ERROR_TEXT("input matrix is singular"));
+        A_arr += A->strides[ULAB_MAX_DIMS - 2];
+        A_arr += A->strides[ULAB_MAX_DIMS - 1];
+    }
+
+    A_arr = temp_A;
+
+    ndarray_obj_t *x = ndarray_new_dense_ndarray(b->ndim, b->shape, NDARRAY_FLOAT);
+    mp_float_t *x_arr = (mp_float_t *)x->array;
+
+    if (mp_obj_is_true(args[2].u_obj)) {
+        // Solve the lower triangular matrix by iterating each row of A.
+        // Start by finding the first unknown using the first row.
+        // On finding this unknown, find the second unknown using the second row.
+        // Continue the same till the last unknown is found using the last row.
+
+        for (i = 0; i < A_rows; i++) {
+            mp_float_t sum = 0.0;
+            for (j = 0; j < i; j++) {
+                sum += (get_A_ele(A_arr) * (*x_arr++));
+                A_arr += A->strides[ULAB_MAX_DIMS - 1];
+            }
+
+            sum = (get_b_ele(b_arr) - sum) / (get_A_ele(A_arr));
+            *x_arr = sum;
+
+            x_arr -= j;
+            A_arr -= A->strides[ULAB_MAX_DIMS - 1] * j;
+            A_arr += A->strides[ULAB_MAX_DIMS - 2];
+            b_arr += b->strides[ULAB_MAX_DIMS - 1];
+        }
+    } else {
+        // Solve the upper triangular matrix by iterating each row of A.
+        // Start by finding the last unknown using the last row.
+        // On finding this unknown, find the last-but-one unknown using the last-but-one row.
+        // Continue the same till the first unknown is found using the first row.
+
+        A_arr += (A->strides[ULAB_MAX_DIMS - 2] * A_rows);
+        b_arr += (b->strides[ULAB_MAX_DIMS - 1] * A_cols);
+        x_arr += A_cols;
+
+        for (i = A_rows - 1; i < A_rows; i--) {
+            mp_float_t sum = 0.0;
+            for (j = i + 1; j < A_cols; j++) {
+                sum += (get_A_ele(A_arr) * (*x_arr++));
+                A_arr += A->strides[ULAB_MAX_DIMS - 1];
+            }
+
+            x_arr -= (j - i);
+            A_arr -= (A->strides[ULAB_MAX_DIMS - 1] * (j - i));
+            b_arr -= b->strides[ULAB_MAX_DIMS - 1];
+
+            sum = (get_b_ele(b_arr) - sum) / get_A_ele(A_arr);
+            *x_arr = sum;
+
+            A_arr -= A->strides[ULAB_MAX_DIMS - 2];
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(x);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(linalg_solve_triangular_obj, 2, solve_triangular);
+
+//| def cho_solve(L: ulab.numpy.ndarray, b: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|    """
+//|    :param ~ulab.numpy.ndarray L: the lower triangular, Cholesky factorization of A
+//|    :param ~ulab.numpy.ndarray b: right-hand-side vector b
+//|    :return: solution to the system A x = b. Shape of return matches b
+//|    :raises TypeError: if L and b are not of type ndarray and are not dense
+//|
+//|    Solve the linear equations A x = b, given the Cholesky factorization of A as input"""
+//|    ...
+//|
+
+static mp_obj_t cho_solve(mp_obj_t _L, mp_obj_t _b) {
+
+    if(!mp_obj_is_type(_L, &ulab_ndarray_type) || !mp_obj_is_type(_b, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first two arguments must be ndarrays"));
+    }
+
+    ndarray_obj_t *L = MP_OBJ_TO_PTR(_L);
+    ndarray_obj_t *b = MP_OBJ_TO_PTR(_b);
+
+    if(!ndarray_is_dense(L) || !ndarray_is_dense(b)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be a dense ndarray"));
+    }
+
+    mp_float_t (*get_L_ele)(void *) = ndarray_get_float_function(L->dtype);
+    mp_float_t (*get_b_ele)(void *) = ndarray_get_float_function(b->dtype);
+    void (*set_L_ele)(void *, mp_float_t) = ndarray_set_float_function(L->dtype);
+
+    size_t L_rows = L->shape[ULAB_MAX_DIMS - 2];
+    size_t L_cols = L->shape[ULAB_MAX_DIMS - 1];
+
+    // Obtain transpose of the input matrix L in L_t
+    size_t L_t_shape[ULAB_MAX_DIMS];
+    size_t L_t_rows = L_t_shape[ULAB_MAX_DIMS - 2] = L_cols;
+    size_t L_t_cols = L_t_shape[ULAB_MAX_DIMS - 1] = L_rows;
+    ndarray_obj_t *L_t = ndarray_new_dense_ndarray(L->ndim, L_t_shape, L->dtype);
+
+    uint8_t *L_arr = (uint8_t *)L->array;
+    uint8_t *L_t_arr = (uint8_t *)L_t->array;
+    uint8_t *b_arr = (uint8_t *)b->array;
+
+    size_t i, j;
+
+    uint8_t *L_ptr = L_arr;
+    uint8_t *L_t_ptr = L_t_arr;
+    for (i = 0; i < L_rows; i++) {
+        for (j = 0; j < L_cols; j++) {
+            set_L_ele(L_t_ptr, get_L_ele(L_ptr));
+            L_t_ptr += L_t->strides[ULAB_MAX_DIMS - 2];
+            L_ptr += L->strides[ULAB_MAX_DIMS - 1];
+        }
+
+        L_t_ptr -= j * L_t->strides[ULAB_MAX_DIMS - 2];
+        L_t_ptr += L_t->strides[ULAB_MAX_DIMS - 1];
+        L_ptr -= j * L->strides[ULAB_MAX_DIMS - 1];
+        L_ptr += L->strides[ULAB_MAX_DIMS - 2];
+    }
+
+    ndarray_obj_t *x = ndarray_new_dense_ndarray(b->ndim, b->shape, NDARRAY_FLOAT);
+    mp_float_t *x_arr = (mp_float_t *)x->array;
+
+    ndarray_obj_t *y = ndarray_new_dense_ndarray(b->ndim, b->shape, NDARRAY_FLOAT);
+    mp_float_t *y_arr = (mp_float_t *)y->array;
+
+    // solve L y = b to obtain y, where L_t x = y
+    for (i = 0; i < L_rows; i++) {
+        mp_float_t sum = 0.0;
+        for (j = 0; j < i; j++) {
+            sum += (get_L_ele(L_arr) * (*y_arr++));
+            L_arr += L->strides[ULAB_MAX_DIMS - 1];
+        }
+
+        sum = (get_b_ele(b_arr) - sum) / (get_L_ele(L_arr));
+        *y_arr = sum;
+
+        y_arr -= j;
+        L_arr -= L->strides[ULAB_MAX_DIMS - 1] * j;
+        L_arr += L->strides[ULAB_MAX_DIMS - 2];
+        b_arr += b->strides[ULAB_MAX_DIMS - 1];
+    }
+
+    // using y, solve L_t x = y to obtain x
+    L_t_arr += (L_t->strides[ULAB_MAX_DIMS - 2] * L_t_rows);
+    y_arr += L_t_cols;
+    x_arr += L_t_cols;
+
+    for (i = L_t_rows - 1; i < L_t_rows; i--) {
+        mp_float_t sum = 0.0;
+        for (j = i + 1; j < L_t_cols; j++) {
+            sum += (get_L_ele(L_t_arr) * (*x_arr++));
+            L_t_arr += L_t->strides[ULAB_MAX_DIMS - 1];
+        }
+
+        x_arr -= (j - i);
+        L_t_arr -= (L_t->strides[ULAB_MAX_DIMS - 1] * (j - i));
+        y_arr--;
+
+        sum = ((*y_arr) - sum) / get_L_ele(L_t_arr);
+        *x_arr = sum;
+
+        L_t_arr -= L_t->strides[ULAB_MAX_DIMS - 2];
+    }
+
+    return MP_OBJ_FROM_PTR(x);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(linalg_cho_solve_obj, cho_solve);
+
+#endif
+
+static const mp_rom_map_elem_t ulab_scipy_linalg_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_linalg) },
+    #if ULAB_MAX_DIMS > 1
+        #if ULAB_SCIPY_LINALG_HAS_SOLVE_TRIANGULAR
+        { MP_ROM_QSTR(MP_QSTR_solve_triangular), MP_ROM_PTR(&linalg_solve_triangular_obj) },
+        #endif
+        #if ULAB_SCIPY_LINALG_HAS_CHO_SOLVE
+        { MP_ROM_QSTR(MP_QSTR_cho_solve), MP_ROM_PTR(&linalg_cho_solve_obj) },
+        #endif
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_linalg_globals, ulab_scipy_linalg_globals_table);
+
+const mp_obj_module_t ulab_scipy_linalg_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_scipy_linalg_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_scipy_dot_linalg, ulab_scipy_linalg_module);
+#endif
+#endif
diff --git a/tulip/shared/ulab/code/scipy/linalg/linalg.h b/tulip/shared/ulab/code/scipy/linalg/linalg.h
new file mode 100644
index 000000000..628051f4e
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/linalg/linalg.h
@@ -0,0 +1,21 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Vikas Udupa
+ * 
+*/
+
+#ifndef _SCIPY_LINALG_
+#define _SCIPY_LINALG_
+
+extern const mp_obj_module_t ulab_scipy_linalg_module;
+
+MP_DECLARE_CONST_FUN_OBJ_KW(linalg_solve_triangular_obj);
+MP_DECLARE_CONST_FUN_OBJ_2(linalg_cho_solve_obj);
+
+#endif /* _SCIPY_LINALG_ */
diff --git a/tulip/shared/ulab/code/scipy/optimize/optimize.c b/tulip/shared/ulab/code/scipy/optimize/optimize.c
new file mode 100644
index 000000000..bd2e2c291
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/optimize/optimize.c
@@ -0,0 +1,417 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "../../ndarray.h"
+#include "../../ulab.h"
+#include "../../ulab_tools.h"
+#include "optimize.h"
+
+ULAB_DEFINE_FLOAT_CONST(xtolerance, MICROPY_FLOAT_CONST(2.4e-7), 0x3480d959UL, 0x3e901b2b29a4692bULL);
+ULAB_DEFINE_FLOAT_CONST(rtolerance, MICROPY_FLOAT_CONST(0.0), 0UL, 0ULL);
+
+static mp_float_t optimize_python_call(const mp_obj_type_t *type, mp_obj_t fun, mp_float_t x, mp_obj_t *fargs, uint8_t nparams) {
+    // Helper function for calculating the value of f(x, a, b, c, ...),
+    // where f is defined in python. Takes a float, returns a float.
+    // The array of mp_obj_t type must be supplied, as must the number of parameters (a, b, c...) in nparams
+    fargs[0] = mp_obj_new_float(x);
+    return mp_obj_get_float(MP_OBJ_TYPE_GET_SLOT(type, call)(fun, nparams+1, 0, fargs));
+}
+
+#if ULAB_SCIPY_OPTIMIZE_HAS_BISECT
+//| def bisect(
+//|     fun: Callable[[float], float],
+//|     a: float,
+//|     b: float,
+//|     *,
+//|     xtol: float = 2.4e-7,
+//|     maxiter: int = 100
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to bisect
+//|     :param float a: The left side of the interval
+//|     :param float b: The right side of the interval
+//|     :param float xtol: The tolerance value
+//|     :param float maxiter: The maximum number of iterations to perform
+//|
+//|     Find a solution (zero) of the function ``f(x)`` on the interval
+//|     (``a``..``b``) using the bisection method.  The result is accurate to within
+//|     ``xtol`` unless more than ``maxiter`` steps are required."""
+//|     ...
+//|
+
+static mp_obj_t optimize_bisect(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // Simple bisection routine
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_xtol, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(xtolerance)} },
+        { MP_QSTR_maxiter, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 100} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a function"));
+    }
+    mp_float_t xtol = mp_obj_get_float(args[3].u_obj);
+    mp_obj_t fargs[1];
+    mp_float_t left, right;
+    mp_float_t x_mid;
+    mp_float_t a = mp_obj_get_float(args[1].u_obj);
+    mp_float_t b = mp_obj_get_float(args[2].u_obj);
+    left = optimize_python_call(type, fun, a, fargs, 0);
+    right = optimize_python_call(type, fun, b, fargs, 0);
+    if(left * right > 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("function has the same sign at the ends of interval"));
+    }
+    mp_float_t rtb = left < MICROPY_FLOAT_CONST(0.0) ? a : b;
+    mp_float_t dx = left < MICROPY_FLOAT_CONST(0.0) ? b - a : a - b;
+    if(args[4].u_int < 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("maxiter should be > 0"));
+    }
+    for(uint16_t i=0; i < args[4].u_int; i++) {
+        dx *= MICROPY_FLOAT_CONST(0.5);
+        x_mid = rtb + dx;
+        if(optimize_python_call(type, fun, x_mid, fargs, 0) < MICROPY_FLOAT_CONST(0.0)) {
+            rtb = x_mid;
+        }
+        if(MICROPY_FLOAT_C_FUN(fabs)(dx) < xtol) break;
+    }
+    return mp_obj_new_float(rtb);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(optimize_bisect_obj, 3, optimize_bisect);
+#endif
+
+#if ULAB_SCIPY_OPTIMIZE_HAS_FMIN
+//| def fmin(
+//|     fun: Callable[[float], float],
+//|     x0: float,
+//|     *,
+//|     xatol: float = 2.4e-7,
+//|     fatol: float = 2.4e-7,
+//|     maxiter: int = 200
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to bisect
+//|     :param float x0: The initial x value
+//|     :param float xatol: The absolute tolerance value
+//|     :param float fatol: The relative tolerance value
+//|
+//|     Find a minimum of the function ``f(x)`` using the downhill simplex method.
+//|     The located ``x`` is within ``fxtol`` of the actual minimum, and ``f(x)``
+//|     is within ``fatol`` of the actual minimum unless more than ``maxiter``
+//|     steps are requried."""
+//|     ...
+//|
+
+static mp_obj_t optimize_fmin(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // downhill simplex method in 1D
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_xatol, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(xtolerance)} },
+        { MP_QSTR_fatol, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(xtolerance)} },
+        { MP_QSTR_maxiter, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 200} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a function"));
+    }
+
+    // parameters controlling convergence conditions
+    mp_float_t xatol = mp_obj_get_float(args[2].u_obj);
+    mp_float_t fatol = mp_obj_get_float(args[3].u_obj);
+    if(args[4].u_int <= 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("maxiter must be > 0"));
+    }
+    uint16_t maxiter = (uint16_t)args[4].u_int;
+
+    mp_float_t x0 = mp_obj_get_float(args[1].u_obj);
+    mp_float_t x1 = MICROPY_FLOAT_C_FUN(fabs)(x0) > OPTIMIZE_EPSILON ? (MICROPY_FLOAT_CONST(1.0) + OPTIMIZE_NONZDELTA) * x0 : OPTIMIZE_ZDELTA;
+    mp_obj_t fargs[1];
+    mp_float_t f0 = optimize_python_call(type, fun, x0, fargs, 0);
+    mp_float_t f1 = optimize_python_call(type, fun, x1, fargs, 0);
+    if(f1 < f0) {
+        SWAP(mp_float_t, x0, x1);
+        SWAP(mp_float_t, f0, f1);
+    }
+    for(uint16_t i=0; i < maxiter; i++) {
+        uint8_t shrink = 0;
+        f0 = optimize_python_call(type, fun, x0, fargs, 0);
+        f1 = optimize_python_call(type, fun, x1, fargs, 0);
+
+        // reflection
+        mp_float_t xr = (MICROPY_FLOAT_CONST(1.0) + OPTIMIZE_ALPHA) * x0 - OPTIMIZE_ALPHA * x1;
+        mp_float_t fr = optimize_python_call(type, fun, xr, fargs, 0);
+        if(fr < f0) { // expansion
+            mp_float_t xe = (1 + OPTIMIZE_ALPHA * OPTIMIZE_BETA) * x0 - OPTIMIZE_ALPHA * OPTIMIZE_BETA * x1;
+            mp_float_t fe = optimize_python_call(type, fun, xe, fargs, 0);
+            if(fe < fr) {
+                x1 = xe;
+                f1 = fe;
+            } else {
+                x1 = xr;
+                f1 = fr;
+            }
+        } else {
+            if(fr < f1) { // contraction
+                mp_float_t xc = (1 + OPTIMIZE_GAMMA * OPTIMIZE_ALPHA) * x0 - OPTIMIZE_GAMMA * OPTIMIZE_ALPHA * x1;
+                mp_float_t fc = optimize_python_call(type, fun, xc, fargs, 0);
+                if(fc < fr) {
+                    x1 = xc;
+                    f1 = fc;
+                } else {
+                    shrink = 1;
+                }
+            } else { // inside contraction
+                mp_float_t xc = (MICROPY_FLOAT_CONST(1.0) - OPTIMIZE_GAMMA) * x0 + OPTIMIZE_GAMMA * x1;
+                mp_float_t fc = optimize_python_call(type, fun, xc, fargs, 0);
+                if(fc < f1) {
+                    x1 = xc;
+                    f1 = fc;
+                } else {
+                    shrink = 1;
+                }
+            }
+            if(shrink == 1) {
+                x1 = x0 + OPTIMIZE_DELTA * (x1 - x0);
+                f1 = optimize_python_call(type, fun, x1, fargs, 0);
+            }
+            if((MICROPY_FLOAT_C_FUN(fabs)(f1 - f0) < fatol) ||
+                (MICROPY_FLOAT_C_FUN(fabs)(x1 - x0) < xatol)) {
+                break;
+            }
+            if(f1 < f0) {
+                SWAP(mp_float_t, x0, x1);
+                SWAP(mp_float_t, f0, f1);
+            }
+        }
+    }
+    return mp_obj_new_float(x0);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(optimize_fmin_obj, 2, optimize_fmin);
+#endif
+
+#if ULAB_SCIPY_OPTIMIZE_HAS_CURVE_FIT
+static void optimize_jacobi(const mp_obj_type_t *type, mp_obj_t fun, mp_float_t *x, mp_float_t *y, uint16_t len, mp_float_t *params, uint8_t nparams, mp_float_t *jacobi, mp_float_t *grad) {
+    /* Calculates the Jacobian and the gradient of the cost function
+     *
+     * The entries in the Jacobian are
+     * J(m, n) = de_m/da_n,
+     *
+     * where
+     *
+     * e_m = (f(x_m, a1, a2, ...) - y_m)/sigma_m is the error at x_m,
+     *
+     * and
+     *
+     * a1, a2, ..., a_n are the free parameters
+     */
+    mp_obj_t *fargs0 = m_new(mp_obj_t, lenp+1);
+    mp_obj_t *fargs1 = m_new(mp_obj_t, lenp+1);
+    for(uint8_t p=0; p < nparams; p++) {
+        fargs0[p+1] = mp_obj_new_float(params[p]);
+        fargs1[p+1] = mp_obj_new_float(params[p]);
+    }
+    for(uint8_t p=0; p < nparams; p++) {
+        mp_float_t da = params[p] != MICROPY_FLOAT_CONST(0.0) ? (MICROPY_FLOAT_CONST(1.0) + APPROX_NONZDELTA) * params[p] : APPROX_ZDELTA;
+        fargs1[p+1] = mp_obj_new_float(params[p] + da);
+        grad[p] = MICROPY_FLOAT_CONST(0.0);
+        for(uint16_t i=0; i < len; i++) {
+            mp_float_t f0 = optimize_python_call(type, fun, x[i], fargs0, nparams);
+            mp_float_t f1 = optimize_python_call(type, fun, x[i], fargs1, nparams);
+            jacobi[i*nparamp+p] = (f1 - f0) / da;
+            grad[p] += (f0 - y[i]) * jacobi[i*nparamp+p];
+        }
+        fargs1[p+1] = fargs0[p+1]; // set back to the original value
+    }
+}
+
+static void optimize_delta(mp_float_t *jacobi, mp_float_t *grad, uint16_t len, uint8_t nparams, mp_float_t lambda) {
+    //
+}
+
+mp_obj_t optimize_curve_fit(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // Levenberg-Marquardt non-linear fit
+    // The implementation follows the introductory discussion in Mark Tanstrum's paper, https://arxiv.org/abs/1201.5885
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_p0, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_xatol, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&xtolerance)} },
+        { MP_QSTR_fatol, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&xtolerance)} },
+        { MP_QSTR_maxiter, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a function"));
+    }
+
+    mp_obj_t x_obj = args[1].u_obj;
+    mp_obj_t y_obj = args[2].u_obj;
+    mp_obj_t p0_obj = args[3].u_obj;
+    if(!ndarray_object_is_array_like(x_obj) || !ndarray_object_is_array_like(y_obj)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("data must be iterable"));
+    }
+    if(!ndarray_object_is_nditerable(p0_obj)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("initial values must be iterable"));
+    }
+    size_t len = (size_t)mp_obj_get_int(mp_obj_len_maybe(x_obj));
+    uint8_t lenp = (uint8_t)mp_obj_get_int(mp_obj_len_maybe(p0_obj));
+    if(len != (uint16_t)mp_obj_get_int(mp_obj_len_maybe(y_obj))) {
+        mp_raise_ValueError(MP_ERROR_TEXT("data must be of equal length"));
+    }
+
+    mp_float_t *x = m_new(mp_float_t, len);
+    fill_array_iterable(x, x_obj);
+    mp_float_t *y = m_new(mp_float_t, len);
+    fill_array_iterable(y, y_obj);
+    mp_float_t *p0 = m_new(mp_float_t, lenp);
+    fill_array_iterable(p0, p0_obj);
+    mp_float_t *grad = m_new(mp_float_t, len);
+    mp_float_t *jacobi = m_new(mp_float_t, len*len);
+    mp_obj_t *fargs = m_new(mp_obj_t, lenp+1);
+
+    m_del(mp_float_t, p0, lenp);
+    // parameters controlling convergence conditions
+    //mp_float_t xatol = mp_obj_get_float(args[2].u_obj);
+    //mp_float_t fatol = mp_obj_get_float(args[3].u_obj);
+
+    // this has finite binary representation; we will multiply/divide by 4
+    //mp_float_t lambda = 0.0078125;
+
+    //linalg_invert_matrix(mp_float_t *data, size_t N)
+
+    m_del(mp_float_t, x, len);
+    m_del(mp_float_t, y, len);
+    m_del(mp_float_t, grad, len);
+    m_del(mp_float_t, jacobi, len*len);
+    m_del(mp_obj_t, fargs, lenp+1);
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(optimize_curve_fit_obj, 2, optimize_curve_fit);
+#endif
+
+#if ULAB_SCIPY_OPTIMIZE_HAS_NEWTON
+//| def newton(
+//|     fun: Callable[[float], float],
+//|     x0: float,
+//|     *,
+//|     xtol: float = 2.4e-7,
+//|     rtol: float = 0.0,
+//|     maxiter: int = 50
+//| ) -> float:
+//|     """
+//|     :param callable f: The function to bisect
+//|     :param float x0: The initial x value
+//|     :param float xtol: The absolute tolerance value
+//|     :param float rtol: The relative tolerance value
+//|     :param float maxiter: The maximum number of iterations to perform
+//|
+//|     Find a solution (zero) of the function ``f(x)`` using Newton's Method.
+//|     The result is accurate to within ``xtol * rtol * |f(x)|`` unless more than
+//|     ``maxiter`` steps are requried."""
+//|     ...
+//|
+
+static mp_obj_t optimize_newton(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    // this is actually the secant method, as the first derivative of the function
+    // is not accepted as an argument. The function whose root we want to solve for
+    // must depend on a single variable without parameters, i.e., f(x)
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_tol, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(xtolerance) } },
+        { MP_QSTR_rtol, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = ULAB_REFERENCE_FLOAT_CONST(rtolerance) } },
+        { MP_QSTR_maxiter, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 50 } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t fun = args[0].u_obj;
+    const mp_obj_type_t *type = mp_obj_get_type(fun);
+    if(!MP_OBJ_TYPE_HAS_SLOT(type, call)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("first argument must be a function"));
+    }
+    mp_float_t x = mp_obj_get_float(args[1].u_obj);
+    mp_float_t tol = mp_obj_get_float(args[2].u_obj);
+    mp_float_t rtol = mp_obj_get_float(args[3].u_obj);
+    mp_float_t dx, df, fx;
+    dx = x > MICROPY_FLOAT_CONST(0.0) ? OPTIMIZE_EPS * x : -OPTIMIZE_EPS * x;
+    mp_obj_t fargs[1];
+    if(args[4].u_int <= 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("maxiter must be > 0"));
+    }
+    for(uint16_t i=0; i < args[4].u_int; i++) {
+        fx = optimize_python_call(type, fun, x, fargs, 0);
+        df = (optimize_python_call(type, fun, x + dx, fargs, 0) - fx) / dx;
+        dx = fx / df;
+        x -= dx;
+        if(MICROPY_FLOAT_C_FUN(fabs)(dx) < (tol + rtol * MICROPY_FLOAT_C_FUN(fabs)(x))) break;
+    }
+    return mp_obj_new_float(x);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(optimize_newton_obj, 2, optimize_newton);
+#endif
+
+static const mp_rom_map_elem_t ulab_scipy_optimize_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_optimize) },
+    #if ULAB_SCIPY_OPTIMIZE_HAS_BISECT
+        { MP_ROM_QSTR(MP_QSTR_bisect), MP_ROM_PTR(&optimize_bisect_obj) },
+    #endif
+    #if ULAB_SCIPY_OPTIMIZE_HAS_CURVE_FIT
+        { MP_ROM_QSTR(MP_QSTR_curve_fit), MP_ROM_PTR(&optimize_curve_fit_obj) },
+    #endif
+    #if ULAB_SCIPY_OPTIMIZE_HAS_FMIN
+        { MP_ROM_QSTR(MP_QSTR_fmin), MP_ROM_PTR(&optimize_fmin_obj) },
+    #endif
+    #if ULAB_SCIPY_OPTIMIZE_HAS_NEWTON
+        { MP_ROM_QSTR(MP_QSTR_newton), MP_ROM_PTR(&optimize_newton_obj) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_optimize_globals, ulab_scipy_optimize_globals_table);
+
+const mp_obj_module_t ulab_scipy_optimize_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_scipy_optimize_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_scipy_dot_optimize, ulab_scipy_optimize_module);
+#endif
diff --git a/tulip/shared/ulab/code/scipy/optimize/optimize.h b/tulip/shared/ulab/code/scipy/optimize/optimize.h
new file mode 100644
index 000000000..174b38636
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/optimize/optimize.h
@@ -0,0 +1,41 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *
+*/
+
+#ifndef _SCIPY_OPTIMIZE_
+#define _SCIPY_OPTIMIZE_
+
+#include "../../ulab_tools.h"
+
+#ifndef     OPTIMIZE_EPSILON
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+#define     OPTIMIZE_EPSILON      MICROPY_FLOAT_CONST(1.2e-7)
+#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
+#define     OPTIMIZE_EPSILON      MICROPY_FLOAT_CONST(2.3e-16)
+#endif
+#endif
+
+#define     OPTIMIZE_EPS          MICROPY_FLOAT_CONST(1.0e-4)
+#define     OPTIMIZE_NONZDELTA    MICROPY_FLOAT_CONST(0.05)
+#define     OPTIMIZE_ZDELTA       MICROPY_FLOAT_CONST(0.00025)
+#define     OPTIMIZE_ALPHA        MICROPY_FLOAT_CONST(1.0)
+#define     OPTIMIZE_BETA         MICROPY_FLOAT_CONST(2.0)
+#define     OPTIMIZE_GAMMA        MICROPY_FLOAT_CONST(0.5)
+#define     OPTIMIZE_DELTA        MICROPY_FLOAT_CONST(0.5)
+
+extern const mp_obj_module_t ulab_scipy_optimize_module;
+
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_bisect_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_curve_fit_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_fmin_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(optimize_newton_obj);
+
+#endif /* _SCIPY_OPTIMIZE_ */
diff --git a/tulip/shared/ulab/code/scipy/scipy.c b/tulip/shared/ulab/code/scipy/scipy.c
new file mode 100644
index 000000000..e4ad306c2
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/scipy.c
@@ -0,0 +1,58 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include "py/runtime.h"
+
+#include "../ulab.h"
+#include "optimize/optimize.h"
+#include "signal/signal.h"
+#include "special/special.h"
+#include "linalg/linalg.h"
+#include "integrate/integrate.h"
+
+
+#if ULAB_HAS_SCIPY
+
+//| """Compatibility layer for scipy"""
+//|
+
+static const mp_rom_map_elem_t ulab_scipy_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_scipy) },
+    #if ULAB_SCIPY_HAS_INTEGRATE_MODULE
+        { MP_ROM_QSTR(MP_QSTR_integrate), MP_ROM_PTR(&ulab_scipy_integrate_module) },
+    #endif
+    #if ULAB_SCIPY_HAS_LINALG_MODULE
+        { MP_ROM_QSTR(MP_QSTR_linalg), MP_ROM_PTR(&ulab_scipy_linalg_module) },
+    #endif
+    #if ULAB_SCIPY_HAS_OPTIMIZE_MODULE
+        { MP_ROM_QSTR(MP_QSTR_optimize), MP_ROM_PTR(&ulab_scipy_optimize_module) },
+    #endif
+    #if ULAB_SCIPY_HAS_SIGNAL_MODULE
+        { MP_ROM_QSTR(MP_QSTR_signal), MP_ROM_PTR(&ulab_scipy_signal_module) },
+    #endif
+    #if ULAB_SCIPY_HAS_SPECIAL_MODULE
+        { MP_ROM_QSTR(MP_QSTR_special), MP_ROM_PTR(&ulab_scipy_special_module) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_globals, ulab_scipy_globals_table);
+
+const mp_obj_module_t ulab_scipy_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_scipy_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_scipy, ulab_scipy_module);
+#endif
+#endif /* ULAB_HAS_SCIPY */
diff --git a/tulip/shared/ulab/code/scipy/scipy.h b/tulip/shared/ulab/code/scipy/scipy.h
new file mode 100644
index 000000000..ec8c8042c
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/scipy.h
@@ -0,0 +1,21 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *               
+*/
+
+#ifndef _SCIPY_
+#define _SCIPY_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+extern const mp_obj_module_t ulab_scipy_module;
+
+#endif /* _SCIPY_ */
diff --git a/tulip/shared/ulab/code/scipy/signal/signal.c b/tulip/shared/ulab/code/scipy/signal/signal.c
new file mode 100644
index 000000000..f930a9435
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/signal/signal.c
@@ -0,0 +1,138 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include <string.h>
+#include "py/runtime.h"
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+#include "../../numpy/carray/carray_tools.h"
+
+#if ULAB_SCIPY_SIGNAL_HAS_SOSFILT & ULAB_MAX_DIMS > 1
+static void signal_sosfilt_array(mp_float_t *x, const mp_float_t *coeffs, mp_float_t *zf, const size_t len) {
+    for(size_t i=0; i < len; i++) {
+        mp_float_t xn = *x;
+        *x = coeffs[0] * xn + zf[0];
+        zf[0] = zf[1] + coeffs[1] * xn - coeffs[4] * *x;
+        zf[1] = coeffs[2] * xn - coeffs[5] * *x;
+        x++;
+    }
+    x -= len;
+}
+
+mp_obj_t signal_sosfilt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_sos, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_x, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_zi, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!ndarray_object_is_array_like(args[0].u_obj) || !ndarray_object_is_array_like(args[1].u_obj)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("sosfilt requires iterable arguments"));
+    }
+    #if ULAB_SUPPORTS_COMPLEX
+    if(mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[1].u_obj);
+        COMPLEX_DTYPE_NOT_IMPLEMENTED(ndarray->dtype)
+    }
+    #endif
+    size_t lenx = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[1].u_obj));
+    ndarray_obj_t *y = ndarray_new_linear_array(lenx, NDARRAY_FLOAT);
+    mp_float_t *yarray = (mp_float_t *)y->array;
+    mp_float_t coeffs[6];
+    if(mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+        ndarray_obj_t *inarray = MP_OBJ_TO_PTR(args[1].u_obj);
+        #if ULAB_MAX_DIMS > 1
+        if(inarray->ndim > 1) {
+            mp_raise_ValueError(MP_ERROR_TEXT("input must be one-dimensional"));
+        }
+        #endif
+        uint8_t *iarray = (uint8_t *)inarray->array;
+        for(size_t i=0; i < lenx; i++) {
+            *yarray++ = ndarray_get_float_value(iarray, inarray->dtype);
+            iarray += inarray->strides[ULAB_MAX_DIMS - 1];
+        }
+        yarray -= lenx;
+    } else {
+        fill_array_iterable(yarray, args[1].u_obj);
+    }
+
+    mp_obj_iter_buf_t iter_buf;
+    mp_obj_t item, iterable = mp_getiter(args[0].u_obj, &iter_buf);
+    size_t lensos = (size_t)mp_obj_get_int(mp_obj_len_maybe(args[0].u_obj));
+
+    size_t *shape = ndarray_shape_vector(0, 0, lensos, 2);
+    ndarray_obj_t *zf = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT);
+    mp_float_t *zf_array = (mp_float_t *)zf->array;
+
+    if(args[2].u_obj != mp_const_none) {
+        if(!mp_obj_is_type(args[2].u_obj, &ulab_ndarray_type)) {
+            mp_raise_TypeError(MP_ERROR_TEXT("zi must be an ndarray"));
+        } else {
+            ndarray_obj_t *zi = MP_OBJ_TO_PTR(args[2].u_obj);
+            if((zi->shape[ULAB_MAX_DIMS - 2] != lensos) || (zi->shape[ULAB_MAX_DIMS - 1] != 2)) {
+                mp_raise_ValueError(MP_ERROR_TEXT("zi must be of shape (n_section, 2)"));
+            }
+            if(zi->dtype != NDARRAY_FLOAT) {
+                mp_raise_ValueError(MP_ERROR_TEXT("zi must be of float type"));
+            }
+            // TODO: this won't work with sparse arrays
+            memcpy(zf_array, zi->array, 2*lensos*sizeof(mp_float_t));
+        }
+    }
+    while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
+        if(mp_obj_get_int(mp_obj_len_maybe(item)) != 6) {
+            mp_raise_ValueError(MP_ERROR_TEXT("sos array must be of shape (n_section, 6)"));
+        } else {
+            fill_array_iterable(coeffs, item);
+            if(coeffs[3] != MICROPY_FLOAT_CONST(1.0)) {
+                mp_raise_ValueError(MP_ERROR_TEXT("sos[:, 3] should be all ones"));
+            }
+            signal_sosfilt_array(yarray, coeffs, zf_array, lenx);
+            zf_array += 2;
+        }
+    }
+    if(args[2].u_obj == mp_const_none) {
+        return MP_OBJ_FROM_PTR(y);
+    } else {
+        mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(2, NULL));
+        tuple->items[0] = MP_OBJ_FROM_PTR(y);
+        tuple->items[1] = MP_OBJ_FROM_PTR(zf);
+        return MP_OBJ_FROM_PTR(tuple);
+    }
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(signal_sosfilt_obj, 2, signal_sosfilt);
+#endif /* ULAB_SCIPY_SIGNAL_HAS_SOSFILT */
+
+static const mp_rom_map_elem_t ulab_scipy_signal_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_signal) },
+    #if ULAB_SCIPY_SIGNAL_HAS_SOSFILT & ULAB_MAX_DIMS > 1
+        { MP_ROM_QSTR(MP_QSTR_sosfilt), MP_ROM_PTR(&signal_sosfilt_obj) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_signal_globals, ulab_scipy_signal_globals_table);
+
+const mp_obj_module_t ulab_scipy_signal_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_scipy_signal_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_scipy_dot_signal, ulab_scipy_signal_module);
+#endif
diff --git a/tulip/shared/ulab/code/scipy/signal/signal.h b/tulip/shared/ulab/code/scipy/signal/signal.h
new file mode 100644
index 000000000..033f6e4cf
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/signal/signal.h
@@ -0,0 +1,23 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *
+*/
+
+#ifndef _SCIPY_SIGNAL_
+#define _SCIPY_SIGNAL_
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+
+extern const mp_obj_module_t ulab_scipy_signal_module;
+
+MP_DECLARE_CONST_FUN_OBJ_KW(signal_sosfilt_obj);
+
+#endif /* _SCIPY_SIGNAL_ */
diff --git a/tulip/shared/ulab/code/scipy/special/special.c b/tulip/shared/ulab/code/scipy/special/special.c
new file mode 100644
index 000000000..0e561b6f4
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/special/special.c
@@ -0,0 +1,45 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020 Jeff Epler for Adafruit Industries
+ *               2020 Scott Shawcroft for Adafruit Industries
+ *               2020-2021 Zoltán Vörös
+ *               2020 Taku Fukada
+*/
+
+#include <math.h>
+#include "py/runtime.h"
+
+#include "../../ulab.h"
+#include "../../numpy/vector.h"
+
+static const mp_rom_map_elem_t ulab_scipy_special_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_special) },
+    #if ULAB_SCIPY_SPECIAL_HAS_ERF
+		{ MP_ROM_QSTR(MP_QSTR_erf), MP_ROM_PTR(&vector_erf_obj) },
+    #endif
+	#if ULAB_SCIPY_SPECIAL_HAS_ERFC
+		{ MP_ROM_QSTR(MP_QSTR_erfc), MP_ROM_PTR(&vector_erfc_obj) },
+	#endif
+	#if ULAB_SCIPY_SPECIAL_HAS_GAMMA
+		{ MP_ROM_QSTR(MP_QSTR_gamma), MP_ROM_PTR(&vector_gamma_obj) },
+	#endif
+	#if ULAB_SCIPY_SPECIAL_HAS_GAMMALN
+		{ MP_ROM_QSTR(MP_QSTR_gammaln), MP_ROM_PTR(&vector_lgamma_obj) },
+	#endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_special_globals, ulab_scipy_special_globals_table);
+
+const mp_obj_module_t ulab_scipy_special_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_scipy_special_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_scipy_dot_special, ulab_scipy_special_module);
+#endif
diff --git a/tulip/shared/ulab/code/scipy/special/special.h b/tulip/shared/ulab/code/scipy/special/special.h
new file mode 100644
index 000000000..bb34e27ed
--- /dev/null
+++ b/tulip/shared/ulab/code/scipy/special/special.h
@@ -0,0 +1,21 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+ *               
+*/
+
+#ifndef _SCIPY_SPECIAL_
+#define _SCIPY_SPECIAL_
+
+#include "../../ulab.h"
+#include "../../ndarray.h"
+
+extern const mp_obj_module_t ulab_scipy_special_module;
+
+#endif /* _SCIPY_SPECIAL_ */
diff --git a/tulip/shared/ulab/code/ulab.c b/tulip/shared/ulab/code/ulab.c
new file mode 100644
index 000000000..be148c450
--- /dev/null
+++ b/tulip/shared/ulab/code/ulab.c
@@ -0,0 +1,234 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2021 Zoltán Vörös
+ *               2020 Jeff Epler for Adafruit Industries
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/runtime.h"
+#include "py/binary.h"
+#include "py/obj.h"
+#include "py/objarray.h"
+
+#include "ulab.h"
+#include "ndarray.h"
+#include "ndarray_properties.h"
+#include "numpy/create.h"
+#include "numpy/ndarray/ndarray_iter.h"
+
+#include "numpy/numpy.h"
+#include "scipy/scipy.h"
+// TODO: we should get rid of this; array.sort depends on it
+#include "numpy/numerical.h"
+
+#include "user/user.h"
+#include "utils/utils.h"
+
+#define ULAB_VERSION 6.7.0
+#define xstr(s) str(s)
+#define str(s) #s
+
+#if ULAB_SUPPORTS_COMPLEX
+#define ULAB_VERSION_STRING xstr(ULAB_VERSION) xstr(-) xstr(ULAB_MAX_DIMS) xstr(D-c)
+#else
+#define ULAB_VERSION_STRING xstr(ULAB_VERSION) xstr(-) xstr(ULAB_MAX_DIMS) xstr(D)
+#endif
+
+static MP_DEFINE_STR_OBJ(ulab_version_obj, ULAB_VERSION_STRING);
+
+#ifdef ULAB_HASH
+static MP_DEFINE_STR_OBJ(ulab_sha_obj, xstr(ULAB_HASH));
+#endif
+
+static const mp_rom_map_elem_t ulab_ndarray_locals_dict_table[] = {
+    #if ULAB_MAX_DIMS > 1
+        #if NDARRAY_HAS_RESHAPE
+            { MP_ROM_QSTR(MP_QSTR_reshape), MP_ROM_PTR(&ndarray_reshape_obj) },
+        #endif
+        #if NDARRAY_HAS_TRANSPOSE
+            { MP_ROM_QSTR(MP_QSTR_transpose), MP_ROM_PTR(&ndarray_transpose_obj) },
+        #endif
+    #endif
+    #if NDARRAY_HAS_BYTESWAP
+        { MP_ROM_QSTR(MP_QSTR_byteswap), MP_ROM_PTR(&ndarray_byteswap_obj) },
+    #endif
+    #if NDARRAY_HAS_COPY
+        { MP_ROM_QSTR(MP_QSTR_copy), MP_ROM_PTR(&ndarray_copy_obj) },
+    #endif
+    #if NDARRAY_HAS_FLATTEN
+        { MP_ROM_QSTR(MP_QSTR_flatten), MP_ROM_PTR(&ndarray_flatten_obj) },
+    #endif
+    #if NDARRAY_HAS_TOBYTES
+        { MP_ROM_QSTR(MP_QSTR_tobytes), MP_ROM_PTR(&ndarray_tobytes_obj) },
+    #endif
+    #if NDARRAY_HAS_TOLIST
+        { MP_ROM_QSTR(MP_QSTR_tolist), MP_ROM_PTR(&ndarray_tolist_obj) },
+    #endif
+    #if NDARRAY_HAS_SORT
+        { MP_ROM_QSTR(MP_QSTR_sort), MP_ROM_PTR(&numerical_sort_inplace_obj) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(ulab_ndarray_locals_dict, ulab_ndarray_locals_dict_table);
+
+#if defined(MP_DEFINE_CONST_OBJ_TYPE)
+// MicroPython after-b41aaaa (Sept 19 2022).
+
+#if NDARRAY_IS_SLICEABLE
+#define NDARRAY_TYPE_SUBSCR subscr, ndarray_subscr,
+#else
+#define NDARRAY_TYPE_SUBSCR
+#endif
+#if NDARRAY_IS_ITERABLE
+#define NDARRAY_TYPE_ITER iter, ndarray_getiter,
+#define NDARRAY_TYPE_ITER_FLAGS MP_TYPE_FLAG_ITER_IS_GETITER
+#else
+#define NDARRAY_TYPE_ITER
+#define NDARRAY_TYPE_ITER_FLAGS 0
+#endif
+#if NDARRAY_HAS_UNARY_OPS
+#define NDARRAY_TYPE_UNARY_OP unary_op, ndarray_unary_op,
+#else
+#define NDARRAY_TYPE_UNARY_OP
+#endif
+#if NDARRAY_HAS_BINARY_OPS
+#define NDARRAY_TYPE_BINARY_OP binary_op, ndarray_binary_op,
+#else
+#define NDARRAY_TYPE_BINARY_OP
+#endif
+
+MP_DEFINE_CONST_OBJ_TYPE(
+    ulab_ndarray_type,
+    MP_QSTR_ndarray,
+    MP_TYPE_FLAG_EQ_CHECKS_OTHER_TYPE | MP_TYPE_FLAG_EQ_HAS_NEQ_TEST | NDARRAY_TYPE_ITER_FLAGS,
+    print, ndarray_print,
+    make_new, ndarray_make_new,
+    locals_dict, &ulab_ndarray_locals_dict,
+    NDARRAY_TYPE_SUBSCR
+    NDARRAY_TYPE_ITER
+    NDARRAY_TYPE_UNARY_OP
+    NDARRAY_TYPE_BINARY_OP
+    attr, ndarray_properties_attr,
+    buffer, ndarray_get_buffer
+);
+
+#else
+// CircuitPython and earlier MicroPython revisions.
+const mp_obj_type_t ulab_ndarray_type = {
+    { &mp_type_type },
+    .flags = MP_TYPE_FLAG_EXTENDED
+    #if defined(MP_TYPE_FLAG_EQ_CHECKS_OTHER_TYPE) && defined(MP_TYPE_FLAG_EQ_HAS_NEQ_TEST)
+        | MP_TYPE_FLAG_EQ_CHECKS_OTHER_TYPE | MP_TYPE_FLAG_EQ_HAS_NEQ_TEST
+    #endif
+        ,
+    .name = MP_QSTR_ndarray,
+    .print = ndarray_print,
+    .make_new = ndarray_make_new,
+    .locals_dict = (mp_obj_dict_t*)&ulab_ndarray_locals_dict,
+    MP_TYPE_EXTENDED_FIELDS(
+    #if NDARRAY_IS_SLICEABLE
+    .subscr = ndarray_subscr,
+    #endif
+    #if NDARRAY_IS_ITERABLE
+    .getiter = ndarray_getiter,
+    #endif
+    #if NDARRAY_HAS_UNARY_OPS
+    .unary_op = ndarray_unary_op,
+    #endif
+    #if NDARRAY_HAS_BINARY_OPS
+    .binary_op = ndarray_binary_op,
+    #endif
+    .attr = ndarray_properties_attr,
+    .buffer_p = { .get_buffer = ndarray_get_buffer, },
+    )
+};
+#endif
+
+#if ULAB_HAS_DTYPE_OBJECT
+
+#if defined(MP_DEFINE_CONST_OBJ_TYPE)
+MP_DEFINE_CONST_OBJ_TYPE(
+    ulab_dtype_type,
+    MP_QSTR_dtype,
+    MP_TYPE_FLAG_NONE,
+    print, ndarray_dtype_print,
+    make_new, ndarray_dtype_make_new
+);
+#else
+const mp_obj_type_t ulab_dtype_type = {
+    { &mp_type_type },
+    .name = MP_QSTR_dtype,
+    .print = ndarray_dtype_print,
+    .make_new = ndarray_dtype_make_new,
+};
+#endif
+#endif
+
+#if NDARRAY_HAS_FLATITER
+#if defined(MP_DEFINE_CONST_OBJ_TYPE)
+MP_DEFINE_CONST_OBJ_TYPE(
+    ndarray_flatiter_type,
+    MP_QSTR_flatiter,
+    MP_TYPE_FLAG_ITER_IS_GETITER,
+    iter, ndarray_get_flatiterator
+);
+#else
+const mp_obj_type_t ndarray_flatiter_type = {
+    { &mp_type_type },
+    .name = MP_QSTR_flatiter,
+    MP_TYPE_EXTENDED_FIELDS(
+    .getiter = ndarray_get_flatiterator,
+    )
+};
+#endif
+#endif
+
+static const mp_rom_map_elem_t ulab_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ulab) },
+    { MP_ROM_QSTR(MP_QSTR___version__), MP_ROM_PTR(&ulab_version_obj) },
+    #ifdef ULAB_HASH
+    { MP_ROM_QSTR(MP_QSTR___sha__), MP_ROM_PTR(&ulab_sha_obj) },
+    #endif
+    #if ULAB_HAS_DTYPE_OBJECT
+        { MP_ROM_QSTR(MP_QSTR_dtype), MP_ROM_PTR(&ulab_dtype_type) },
+    #else
+        #if NDARRAY_HAS_DTYPE
+        { MP_ROM_QSTR(MP_QSTR_dtype), MP_ROM_PTR(&ndarray_dtype_obj) },
+        #endif /* NDARRAY_HAS_DTYPE */
+    #endif /* ULAB_HAS_DTYPE_OBJECT */
+        { MP_ROM_QSTR(MP_QSTR_numpy), MP_ROM_PTR(&ulab_numpy_module) },
+    #if ULAB_HAS_SCIPY
+        { MP_ROM_QSTR(MP_QSTR_scipy), MP_ROM_PTR(&ulab_scipy_module) },
+    #endif
+    #if ULAB_HAS_USER_MODULE
+        { MP_ROM_QSTR(MP_QSTR_user), MP_ROM_PTR(&ulab_user_module) },
+    #endif
+    #if ULAB_HAS_UTILS_MODULE
+        { MP_ROM_QSTR(MP_QSTR_utils), MP_ROM_PTR(&ulab_utils_module) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT (
+    mp_module_ulab_globals,
+    ulab_globals_table
+);
+
+#ifdef OPENMV
+const struct _mp_obj_module_t ulab_user_cmodule = {
+#else
+const mp_obj_module_t ulab_user_cmodule = {
+#endif
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_globals,
+};
+
+MP_REGISTER_MODULE(MP_QSTR_ulab, ulab_user_cmodule);
diff --git a/tulip/shared/ulab/code/ulab.h b/tulip/shared/ulab/code/ulab.h
new file mode 100644
index 000000000..3eb301316
--- /dev/null
+++ b/tulip/shared/ulab/code/ulab.h
@@ -0,0 +1,836 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2022 Zoltán Vörös
+*/
+
+#ifndef __ULAB__
+#define __ULAB__
+
+
+
+// The pre-processor constants in this file determine how ulab behaves:
+//
+// - how many dimensions ulab can handle
+// - which functions are included in the compiled firmware
+// - whether arrays can be sliced and iterated over
+// - which binary/unary operators are supported
+// - whether ulab can deal with complex numbers
+//
+// A considerable amount of flash space can be saved by removing (setting
+// the corresponding constants to 0) the unnecessary functions and features.
+
+// Values defined here can be overridden by your own config file as
+// make -DULAB_CONFIG_FILE="my_ulab_config.h"
+#if defined(ULAB_CONFIG_FILE)
+#include ULAB_CONFIG_FILE
+#endif
+
+// Adds support for complex ndarrays
+#ifndef ULAB_SUPPORTS_COMPLEX
+#define ULAB_SUPPORTS_COMPLEX               (1)
+#endif
+
+// Determines, whether scipy is defined in ulab. The sub-modules and functions
+// of scipy have to be defined separately
+#ifndef ULAB_HAS_SCIPY
+#define ULAB_HAS_SCIPY                      (1)
+#endif
+
+// The maximum number of dimensions the firmware should be able to support
+// Possible values lie between 1, and 4, inclusive
+#ifndef ULAB_MAX_DIMS
+#define ULAB_MAX_DIMS                       2
+#endif
+
+// By setting this constant to 1, iteration over array dimensions will be implemented
+// as a function (ndarray_rewind_array), instead of writing out the loops in macros
+// This reduces firmware size at the expense of speed
+#ifndef ULAB_HAS_FUNCTION_ITERATOR
+#define ULAB_HAS_FUNCTION_ITERATOR          (0)
+#endif
+
+// If NDARRAY_IS_ITERABLE is 1, the ndarray object defines its own iterator function
+// This option saves approx. 250 bytes of flash space
+#ifndef NDARRAY_IS_ITERABLE
+#define NDARRAY_IS_ITERABLE                 (1)
+#endif
+
+// Slicing can be switched off by setting this variable to 0
+#ifndef NDARRAY_IS_SLICEABLE
+#define NDARRAY_IS_SLICEABLE                (1)
+#endif
+
+// The default threshold for pretty printing. These variables can be overwritten
+// at run-time via the set_printoptions() function
+#ifndef ULAB_HAS_PRINTOPTIONS
+#define ULAB_HAS_PRINTOPTIONS               (1)
+#endif
+#define NDARRAY_PRINT_THRESHOLD             10
+#define NDARRAY_PRINT_EDGEITEMS             3
+
+// determines, whether the dtype is an object, or simply a character
+// the object implementation is numpythonic, but requires more space
+#ifndef ULAB_HAS_DTYPE_OBJECT
+#define ULAB_HAS_DTYPE_OBJECT               (0)
+#endif
+
+// the ndarray binary operators
+#ifndef NDARRAY_HAS_BINARY_OPS
+#define NDARRAY_HAS_BINARY_OPS              (1)
+#endif
+
+// Firmware size can be reduced at the expense of speed by using function
+// pointers in iterations. For each operator, he function pointer saves around
+// 2 kB in the two-dimensional case, and around 4 kB in the four-dimensional case.
+
+#ifndef NDARRAY_BINARY_USES_FUN_POINTER
+#define NDARRAY_BINARY_USES_FUN_POINTER     (0)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_ADD
+#define NDARRAY_HAS_BINARY_OP_ADD           (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_AND
+#define NDARRAY_HAS_BINARY_OP_AND           (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_EQUAL
+#define NDARRAY_HAS_BINARY_OP_EQUAL         (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE
+#define NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE  (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_LESS
+#define NDARRAY_HAS_BINARY_OP_LESS          (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_LESS_EQUAL
+#define NDARRAY_HAS_BINARY_OP_LESS_EQUAL    (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_MORE
+#define NDARRAY_HAS_BINARY_OP_MORE          (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_MORE_EQUAL
+#define NDARRAY_HAS_BINARY_OP_MORE_EQUAL    (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_MULTIPLY
+#define NDARRAY_HAS_BINARY_OP_MULTIPLY      (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_NOT_EQUAL
+#define NDARRAY_HAS_BINARY_OP_NOT_EQUAL     (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_OR
+#define NDARRAY_HAS_BINARY_OP_OR            (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_POWER
+#define NDARRAY_HAS_BINARY_OP_POWER         (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_SUBTRACT
+#define NDARRAY_HAS_BINARY_OP_SUBTRACT      (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE
+#define NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE   (1)
+#endif
+
+#ifndef NDARRAY_HAS_BINARY_OP_XOR
+#define NDARRAY_HAS_BINARY_OP_XOR           (1)
+#endif
+
+#ifndef NDARRAY_HAS_INPLACE_OPS
+#define NDARRAY_HAS_INPLACE_OPS             (1)
+#endif
+
+#ifndef NDARRAY_HAS_INPLACE_ADD
+#define NDARRAY_HAS_INPLACE_ADD             (1)
+#endif
+
+#ifndef NDARRAY_HAS_INPLACE_MULTIPLY
+#define NDARRAY_HAS_INPLACE_MULTIPLY        (1)
+#endif
+
+#ifndef NDARRAY_HAS_INPLACE_POWER
+#define NDARRAY_HAS_INPLACE_POWER           (1)
+#endif
+
+#ifndef NDARRAY_HAS_INPLACE_SUBTRACT
+#define NDARRAY_HAS_INPLACE_SUBTRACT        (1)
+#endif
+
+#ifndef NDARRAY_HAS_INPLACE_TRUE_DIVIDE
+#define NDARRAY_HAS_INPLACE_TRUE_DIVIDE     (1)
+#endif
+
+// bitwise operators
+#ifndef ULAB_NUMPY_HAS_BITWISE_AND
+#define ULAB_NUMPY_HAS_BITWISE_AND          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_BITWISE_OR
+#define ULAB_NUMPY_HAS_BITWISE_OR           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_BITWISE_XOR
+#define ULAB_NUMPY_HAS_BITWISE_XOR          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LEFT_SHIFT
+#define ULAB_NUMPY_HAS_LEFT_SHIFT           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_RIGHT_SHIFT
+#define ULAB_NUMPY_HAS_RIGHT_SHIFT          (1)
+#endif
+
+// the ndarray unary operators
+#ifndef NDARRAY_HAS_UNARY_OPS
+#define NDARRAY_HAS_UNARY_OPS               (1)
+#endif
+
+#ifndef NDARRAY_HAS_UNARY_OP_ABS
+#define NDARRAY_HAS_UNARY_OP_ABS            (1)
+#endif
+
+#ifndef NDARRAY_HAS_UNARY_OP_INVERT
+#define NDARRAY_HAS_UNARY_OP_INVERT         (1)
+#endif
+
+#ifndef NDARRAY_HAS_UNARY_OP_LEN
+#define NDARRAY_HAS_UNARY_OP_LEN            (1)
+#endif
+
+#ifndef NDARRAY_HAS_UNARY_OP_NEGATIVE
+#define NDARRAY_HAS_UNARY_OP_NEGATIVE       (1)
+#endif
+
+#ifndef NDARRAY_HAS_UNARY_OP_POSITIVE
+#define NDARRAY_HAS_UNARY_OP_POSITIVE       (1)
+#endif
+
+
+// determines, which ndarray methods are available
+#ifndef NDARRAY_HAS_BYTESWAP
+#define NDARRAY_HAS_BYTESWAP            (1)
+#endif
+
+#ifndef NDARRAY_HAS_COPY
+#define NDARRAY_HAS_COPY                (1)
+#endif
+
+#ifndef NDARRAY_HAS_DTYPE
+#define NDARRAY_HAS_DTYPE               (1)
+#endif
+
+#ifndef NDARRAY_HAS_FLATTEN
+#define NDARRAY_HAS_FLATTEN             (1)
+#endif
+
+#ifndef NDARRAY_HAS_ITEMSIZE
+#define NDARRAY_HAS_ITEMSIZE            (1)
+#endif
+
+#ifndef NDARRAY_HAS_RESHAPE
+#define NDARRAY_HAS_RESHAPE             (1)
+#endif
+
+#ifndef NDARRAY_HAS_SHAPE
+#define NDARRAY_HAS_SHAPE               (1)
+#endif
+
+#ifndef NDARRAY_HAS_SIZE
+#define NDARRAY_HAS_SIZE                (1)
+#endif
+
+#ifndef NDARRAY_HAS_SORT
+#define NDARRAY_HAS_SORT                (1)
+#endif
+
+#ifndef NDARRAY_HAS_STRIDES
+#define NDARRAY_HAS_STRIDES             (1)
+#endif
+
+#ifndef NDARRAY_HAS_TOBYTES
+#define NDARRAY_HAS_TOBYTES             (1)
+#endif
+
+#ifndef NDARRAY_HAS_TOLIST
+#define NDARRAY_HAS_TOLIST              (1)
+#endif
+
+#ifndef NDARRAY_HAS_TRANSPOSE
+#define NDARRAY_HAS_TRANSPOSE           (1)
+#endif
+
+// Firmware size can be reduced at the expense of speed by using a function
+// pointer in iterations. Setting ULAB_VECTORISE_USES_FUNCPOINTER to 1 saves
+// around 800 bytes in the four-dimensional case, and around 200 in two dimensions.
+#ifndef ULAB_VECTORISE_USES_FUN_POINTER
+#define ULAB_VECTORISE_USES_FUN_POINTER (1)
+#endif
+
+// determines, whether e is defined in ulab.numpy itself
+#ifndef ULAB_NUMPY_HAS_E
+#define ULAB_NUMPY_HAS_E                (1)
+#endif
+
+// ulab defines infinite as a class constant in ulab.numpy
+#ifndef ULAB_NUMPY_HAS_INF
+#define ULAB_NUMPY_HAS_INF              (1)
+#endif
+
+// ulab defines NaN as a class constant in ulab.numpy
+#ifndef ULAB_NUMPY_HAS_NAN
+#define ULAB_NUMPY_HAS_NAN              (1)
+#endif
+
+// determines, whether pi is defined in ulab.numpy itself
+#ifndef ULAB_NUMPY_HAS_PI
+#define ULAB_NUMPY_HAS_PI               (1)
+#endif
+
+// determines, whether the ndinfo function is available
+#ifndef ULAB_NUMPY_HAS_NDINFO
+#define ULAB_NUMPY_HAS_NDINFO           (1)
+#endif
+
+// if this constant is set to 1, the interpreter can iterate
+// over the flat array without copying any data
+#ifndef NDARRAY_HAS_FLATITER
+#define NDARRAY_HAS_FLATITER            (1)
+#endif
+
+// frombuffer adds 600 bytes to the firmware
+#ifndef ULAB_NUMPY_HAS_FROMBUFFER
+#define ULAB_NUMPY_HAS_FROMBUFFER       (1)
+#endif
+
+// functions that create an array
+#ifndef ULAB_NUMPY_HAS_ARANGE
+#define ULAB_NUMPY_HAS_ARANGE           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_CONCATENATE
+#define ULAB_NUMPY_HAS_CONCATENATE      (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_DIAG
+#define ULAB_NUMPY_HAS_DIAG             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_EMPTY
+#define ULAB_NUMPY_HAS_EMPTY            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_EYE
+#define ULAB_NUMPY_HAS_EYE              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_FULL
+#define ULAB_NUMPY_HAS_FULL             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LINSPACE
+#define ULAB_NUMPY_HAS_LINSPACE         (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LOGSPACE
+#define ULAB_NUMPY_HAS_LOGSPACE         (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ONES
+#define ULAB_NUMPY_HAS_ONES             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ZEROS
+#define ULAB_NUMPY_HAS_ZEROS            (1)
+#endif
+
+// functions that compare arrays
+#ifndef ULAB_NUMPY_HAS_CLIP
+#define ULAB_NUMPY_HAS_CLIP             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_EQUAL
+#define ULAB_NUMPY_HAS_EQUAL            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ISFINITE
+#define ULAB_NUMPY_HAS_ISFINITE         (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ISINF
+#define ULAB_NUMPY_HAS_ISINF            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_MAXIMUM
+#define ULAB_NUMPY_HAS_MAXIMUM          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_MINIMUM
+#define ULAB_NUMPY_HAS_MINIMUM          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_NONZERO
+#define ULAB_NUMPY_HAS_NONZERO          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_NOTEQUAL
+#define ULAB_NUMPY_HAS_NOTEQUAL         (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_WHERE
+#define ULAB_NUMPY_HAS_WHERE            (1)
+#endif
+
+// the integrate module; functions of the integrate module still have
+// to be defined separately
+#ifndef ULAB_SCIPY_HAS_INTEGRATE_MODULE
+#define ULAB_SCIPY_HAS_INTEGRATE_MODULE        (1)
+#endif
+
+#ifndef ULAB_INTEGRATE_HAS_TANHSINH
+#define ULAB_INTEGRATE_HAS_TANHSINH			(1)
+#endif
+
+#ifndef ULAB_INTEGRATE_HAS_ROMBERG
+#define ULAB_INTEGRATE_HAS_ROMBERG			(1)
+#endif
+
+#ifndef ULAB_INTEGRATE_HAS_SIMPSON
+#define ULAB_INTEGRATE_HAS_SIMPSON			(1)
+#endif
+
+#ifndef ULAB_INTEGRATE_HAS_QUAD
+#define ULAB_INTEGRATE_HAS_QUAD				(1)
+#endif
+
+// the linalg module; functions of the linalg module still have
+// to be defined separately
+#ifndef ULAB_NUMPY_HAS_LINALG_MODULE
+#define ULAB_NUMPY_HAS_LINALG_MODULE    (1)
+#endif
+
+#ifndef ULAB_LINALG_HAS_CHOLESKY
+#define ULAB_LINALG_HAS_CHOLESKY        (1)
+#endif
+
+#ifndef ULAB_LINALG_HAS_DET
+#define ULAB_LINALG_HAS_DET             (1)
+#endif
+
+#ifndef ULAB_LINALG_HAS_EIG
+#define ULAB_LINALG_HAS_EIG             (1)
+#endif
+
+#ifndef ULAB_LINALG_HAS_INV
+#define ULAB_LINALG_HAS_INV             (1)
+#endif
+
+#ifndef ULAB_LINALG_HAS_NORM
+#define ULAB_LINALG_HAS_NORM            (1)
+#endif
+
+#ifndef ULAB_LINALG_HAS_QR
+#define ULAB_LINALG_HAS_QR              (1)
+#endif
+
+// the FFT module; functions of the fft module still have
+// to be defined separately
+#ifndef ULAB_NUMPY_HAS_FFT_MODULE
+#define ULAB_NUMPY_HAS_FFT_MODULE       (1)
+#endif
+
+// By setting this constant to 1, the FFT routine will behave in a
+// numpy-compatible way, i.e., it will output a complex array
+// This setting has no effect, if ULAB_SUPPORTS_COMPLEX is 0
+// Note that in this case, the input also must be numpythonic,
+// i.e., the real an imaginary parts cannot be passed as two arguments
+#ifndef ULAB_FFT_IS_NUMPY_COMPATIBLE
+#define ULAB_FFT_IS_NUMPY_COMPATIBLE    (1)
+#endif
+
+#ifndef ULAB_FFT_HAS_FFT
+#define ULAB_FFT_HAS_FFT                (1)
+#endif
+
+#ifndef ULAB_FFT_HAS_IFFT
+#define ULAB_FFT_HAS_IFFT               (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ALL
+#define ULAB_NUMPY_HAS_ALL              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ANY
+#define ULAB_NUMPY_HAS_ANY              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ARGMINMAX
+#define ULAB_NUMPY_HAS_ARGMINMAX        (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ARGSORT
+#define ULAB_NUMPY_HAS_ARGSORT          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ASARRAY
+#define ULAB_NUMPY_HAS_ASARRAY          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_COMPRESS
+#define ULAB_NUMPY_HAS_COMPRESS         (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_CONVOLVE
+#define ULAB_NUMPY_HAS_CONVOLVE         (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_CROSS
+#define ULAB_NUMPY_HAS_CROSS            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_DELETE
+#define ULAB_NUMPY_HAS_DELETE           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_DIFF
+#define ULAB_NUMPY_HAS_DIFF             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_DOT
+#define ULAB_NUMPY_HAS_DOT              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_FLIP
+#define ULAB_NUMPY_HAS_FLIP             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_INTERP
+#define ULAB_NUMPY_HAS_INTERP           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LOAD
+#define ULAB_NUMPY_HAS_LOAD             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LOADTXT
+#define ULAB_NUMPY_HAS_LOADTXT          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_MEAN
+#define ULAB_NUMPY_HAS_MEAN             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_MEDIAN
+#define ULAB_NUMPY_HAS_MEDIAN           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_MINMAX
+#define ULAB_NUMPY_HAS_MINMAX           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_POLYFIT
+#define ULAB_NUMPY_HAS_POLYFIT          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_POLYVAL
+#define ULAB_NUMPY_HAS_POLYVAL          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ROLL
+#define ULAB_NUMPY_HAS_ROLL             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SAVE
+#define ULAB_NUMPY_HAS_SAVE             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SAVETXT
+#define ULAB_NUMPY_HAS_SAVETXT          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SIZE
+#define ULAB_NUMPY_HAS_SIZE             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SORT
+#define ULAB_NUMPY_HAS_SORT             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_STD
+#define ULAB_NUMPY_HAS_STD              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SUM
+#define ULAB_NUMPY_HAS_SUM              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_TAKE
+#define ULAB_NUMPY_HAS_TAKE             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_TRACE
+#define ULAB_NUMPY_HAS_TRACE            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_TRAPZ
+#define ULAB_NUMPY_HAS_TRAPZ            (1)
+#endif
+
+// vectorised versions of the functions of the math python module, with
+// the exception of the functions listed in scipy.special
+
+// if this constant is set, math functions support the out keyword argument
+#ifndef ULAB_MATH_FUNCTIONS_OUT_KEYWORD
+#define ULAB_MATH_FUNCTIONS_OUT_KEYWORD (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ACOS
+#define ULAB_NUMPY_HAS_ACOS             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ACOSH
+#define ULAB_NUMPY_HAS_ACOSH            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ARCTAN2
+#define ULAB_NUMPY_HAS_ARCTAN2          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_AROUND
+#define ULAB_NUMPY_HAS_AROUND           (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ASIN
+#define ULAB_NUMPY_HAS_ASIN             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ASINH
+#define ULAB_NUMPY_HAS_ASINH            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ATAN
+#define ULAB_NUMPY_HAS_ATAN             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_ATANH
+#define ULAB_NUMPY_HAS_ATANH            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_CEIL
+#define ULAB_NUMPY_HAS_CEIL             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_COS
+#define ULAB_NUMPY_HAS_COS              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_COSH
+#define ULAB_NUMPY_HAS_COSH             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_DEGREES
+#define ULAB_NUMPY_HAS_DEGREES          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_EXP
+#define ULAB_NUMPY_HAS_EXP              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_EXPM1
+#define ULAB_NUMPY_HAS_EXPM1            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_FLOOR
+#define ULAB_NUMPY_HAS_FLOOR            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LOG
+#define ULAB_NUMPY_HAS_LOG              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LOG10
+#define ULAB_NUMPY_HAS_LOG10            (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_LOG2
+#define ULAB_NUMPY_HAS_LOG2             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_RADIANS
+#define ULAB_NUMPY_HAS_RADIANS          (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SIN
+#define ULAB_NUMPY_HAS_SIN              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SINC
+#define ULAB_NUMPY_HAS_SINC             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SINH
+#define ULAB_NUMPY_HAS_SINH             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SQRT
+#define ULAB_NUMPY_HAS_SQRT             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_TAN
+#define ULAB_NUMPY_HAS_TAN              (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_TANH
+#define ULAB_NUMPY_HAS_TANH             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_VECTORIZE
+#define ULAB_NUMPY_HAS_VECTORIZE        (1)
+#endif
+
+// Complex functions. The implementations are compiled into
+// the firmware, only if ULAB_SUPPORTS_COMPLEX is set to 1
+#ifndef ULAB_NUMPY_HAS_CONJUGATE
+#define ULAB_NUMPY_HAS_CONJUGATE        (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_IMAG
+#define ULAB_NUMPY_HAS_IMAG             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_REAL
+#define ULAB_NUMPY_HAS_REAL             (1)
+#endif
+
+#ifndef ULAB_NUMPY_HAS_SORT_COMPLEX
+#define ULAB_NUMPY_HAS_SORT_COMPLEX     (1)
+#endif
+
+// random module
+#ifndef ULAB_NUMPY_HAS_RANDOM_MODULE
+#define ULAB_NUMPY_HAS_RANDOM_MODULE    (1)
+#endif
+
+#ifndef ULAB_NUMPY_RANDOM_HAS_NORMAL
+#define ULAB_NUMPY_RANDOM_HAS_NORMAL    (1)
+#endif
+
+#ifndef ULAB_NUMPY_RANDOM_HAS_RANDOM
+#define ULAB_NUMPY_RANDOM_HAS_RANDOM    (1)
+#endif
+
+#ifndef ULAB_NUMPY_RANDOM_HAS_UNIFORM
+#define ULAB_NUMPY_RANDOM_HAS_UNIFORM   (1)
+#endif
+
+
+// scipy modules
+#ifndef ULAB_SCIPY_HAS_LINALG_MODULE
+#define ULAB_SCIPY_HAS_LINALG_MODULE        (1)
+#endif
+
+#ifndef ULAB_SCIPY_LINALG_HAS_CHO_SOLVE
+#define ULAB_SCIPY_LINALG_HAS_CHO_SOLVE     (1)
+#endif
+
+#ifndef ULAB_SCIPY_LINALG_HAS_SOLVE_TRIANGULAR
+#define ULAB_SCIPY_LINALG_HAS_SOLVE_TRIANGULAR  (1)
+#endif
+
+#ifndef ULAB_SCIPY_HAS_SIGNAL_MODULE
+#define ULAB_SCIPY_HAS_SIGNAL_MODULE        (1)
+#endif
+
+#ifndef ULAB_SCIPY_SIGNAL_HAS_SOSFILT
+#define ULAB_SCIPY_SIGNAL_HAS_SOSFILT       (1)
+#endif
+
+#ifndef ULAB_SCIPY_HAS_OPTIMIZE_MODULE
+#define ULAB_SCIPY_HAS_OPTIMIZE_MODULE      (1)
+#endif
+
+#ifndef ULAB_SCIPY_OPTIMIZE_HAS_BISECT
+#define ULAB_SCIPY_OPTIMIZE_HAS_BISECT      (1)
+#endif
+
+#ifndef ULAB_SCIPY_OPTIMIZE_HAS_CURVE_FIT
+#define ULAB_SCIPY_OPTIMIZE_HAS_CURVE_FIT   (0) // not fully implemented
+#endif
+
+#ifndef ULAB_SCIPY_OPTIMIZE_HAS_FMIN
+#define ULAB_SCIPY_OPTIMIZE_HAS_FMIN        (1)
+#endif
+
+#ifndef ULAB_SCIPY_OPTIMIZE_HAS_NEWTON
+#define ULAB_SCIPY_OPTIMIZE_HAS_NEWTON      (1)
+#endif
+
+#ifndef ULAB_SCIPY_HAS_SPECIAL_MODULE
+#define ULAB_SCIPY_HAS_SPECIAL_MODULE       (1)
+#endif
+
+#ifndef ULAB_SCIPY_SPECIAL_HAS_ERF
+#define ULAB_SCIPY_SPECIAL_HAS_ERF          (1)
+#endif
+
+#ifndef ULAB_SCIPY_SPECIAL_HAS_ERFC
+#define ULAB_SCIPY_SPECIAL_HAS_ERFC         (1)
+#endif
+
+#ifndef ULAB_SCIPY_SPECIAL_HAS_GAMMA
+#define ULAB_SCIPY_SPECIAL_HAS_GAMMA        (1)
+#endif
+
+#ifndef ULAB_SCIPY_SPECIAL_HAS_GAMMALN
+#define ULAB_SCIPY_SPECIAL_HAS_GAMMALN      (1)
+#endif
+
+// functions of the utils module
+#ifndef ULAB_HAS_UTILS_MODULE
+#define ULAB_HAS_UTILS_MODULE               (1)
+#endif
+
+#ifndef ULAB_UTILS_HAS_FROM_INT16_BUFFER
+#define ULAB_UTILS_HAS_FROM_INT16_BUFFER    (1)
+#endif
+
+#ifndef ULAB_UTILS_HAS_FROM_UINT16_BUFFER
+#define ULAB_UTILS_HAS_FROM_UINT16_BUFFER   (1)
+#endif
+
+#ifndef ULAB_UTILS_HAS_FROM_INT32_BUFFER
+#define ULAB_UTILS_HAS_FROM_INT32_BUFFER    (1)
+#endif
+
+#ifndef ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+#define ULAB_UTILS_HAS_FROM_UINT32_BUFFER   (1)
+#endif
+
+#ifndef ULAB_UTILS_HAS_SPECTROGRAM
+#define ULAB_UTILS_HAS_SPECTROGRAM          (1)
+#endif
+
+// user-defined module; source of the module and
+// its sub-modules should be placed in code/user/
+#ifndef ULAB_HAS_USER_MODULE
+#define ULAB_HAS_USER_MODULE                (0)
+#endif
+
+#endif
diff --git a/tulip/shared/ulab/code/ulab_tools.c b/tulip/shared/ulab/code/ulab_tools.c
new file mode 100644
index 000000000..05ed1ede7
--- /dev/null
+++ b/tulip/shared/ulab/code/ulab_tools.c
@@ -0,0 +1,304 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2022 Zoltán Vörös
+ */
+
+
+#include <string.h>
+#include "py/runtime.h"
+
+#include "ulab.h"
+#include "ndarray.h"
+#include "ulab_tools.h"
+
+// The following five functions return a float from a void type
+// The value in question is supposed to be located at the head of the pointer
+
+mp_float_t ndarray_get_float_uint8(void *data) {
+    // Returns a float value from an uint8_t type
+    return (mp_float_t)(*(uint8_t *)data);
+}
+
+mp_float_t ndarray_get_float_int8(void *data) {
+    // Returns a float value from an int8_t type
+    return (mp_float_t)(*(int8_t *)data);
+}
+
+mp_float_t ndarray_get_float_uint16(void *data) {
+    // Returns a float value from an uint16_t type
+    return (mp_float_t)(*(uint16_t *)data);
+}
+
+mp_float_t ndarray_get_float_int16(void *data) {
+    // Returns a float value from an int16_t type
+    return (mp_float_t)(*(int16_t *)data);
+}
+
+
+mp_float_t ndarray_get_float_float(void *data) {
+    // Returns a float value from an mp_float_t type
+    return *((mp_float_t *)data);
+}
+
+// returns a single function pointer, depending on the dtype
+void *ndarray_get_float_function(uint8_t dtype) {
+    if(dtype == NDARRAY_UINT8) {
+        return ndarray_get_float_uint8;
+    } else if(dtype == NDARRAY_INT8) {
+        return ndarray_get_float_int8;
+    } else if(dtype == NDARRAY_UINT16) {
+        return ndarray_get_float_uint16;
+    } else if(dtype == NDARRAY_INT16) {
+        return ndarray_get_float_int16;
+    } else {
+        return ndarray_get_float_float;
+    }
+}
+
+mp_float_t ndarray_get_float_index(void *data, uint8_t dtype, size_t index) {
+    // returns a single float value from an array located at index
+    if(dtype == NDARRAY_UINT8) {
+        return (mp_float_t)((uint8_t *)data)[index];
+    } else if(dtype == NDARRAY_INT8) {
+        return (mp_float_t)((int8_t *)data)[index];
+    } else if(dtype == NDARRAY_UINT16) {
+        return (mp_float_t)((uint16_t *)data)[index];
+    } else if(dtype == NDARRAY_INT16) {
+        return (mp_float_t)((int16_t *)data)[index];
+    } else {
+        return (mp_float_t)((mp_float_t *)data)[index];
+    }
+}
+
+mp_float_t ndarray_get_float_value(void *data, uint8_t dtype) {
+    // Returns a float value from an arbitrary data type
+    // The value in question is supposed to be located at the head of the pointer
+    if(dtype == NDARRAY_UINT8) {
+        return (mp_float_t)(*(uint8_t *)data);
+    } else if(dtype == NDARRAY_INT8) {
+        return (mp_float_t)(*(int8_t *)data);
+    } else if(dtype == NDARRAY_UINT16) {
+        return (mp_float_t)(*(uint16_t *)data);
+    } else if(dtype == NDARRAY_INT16) {
+        return (mp_float_t)(*(int16_t *)data);
+    } else {
+        return *((mp_float_t *)data);
+    }
+}
+
+#if NDARRAY_BINARY_USES_FUN_POINTER | ULAB_NUMPY_HAS_WHERE
+uint8_t ndarray_upcast_dtype(uint8_t ldtype, uint8_t rdtype) {
+    // returns a single character that corresponds to the broadcasting rules
+    // - if one of the operarands is a float, the result is always float
+    // - operation on identical types preserves type
+    //
+    // uint8 + int8 => int16
+    // uint8 + int16 => int16
+    // uint8 + uint16 => uint16
+    // int8 + int16 => int16
+    // int8 + uint16 => uint16
+    // uint16 + int16 => float
+
+    if(ldtype == rdtype) {
+        // if the two dtypes are equal, the result is also of that type
+        return ldtype;
+    } else if(((ldtype == NDARRAY_UINT8) && (rdtype == NDARRAY_INT8)) ||
+            ((ldtype == NDARRAY_INT8) && (rdtype == NDARRAY_UINT8)) ||
+            ((ldtype == NDARRAY_UINT8) && (rdtype == NDARRAY_INT16)) ||
+            ((ldtype == NDARRAY_INT16) && (rdtype == NDARRAY_UINT8)) ||
+            ((ldtype == NDARRAY_INT8) && (rdtype == NDARRAY_INT16)) ||
+            ((ldtype == NDARRAY_INT16) && (rdtype == NDARRAY_INT8))) {
+        return NDARRAY_INT16;
+    } else if(((ldtype == NDARRAY_UINT8) && (rdtype == NDARRAY_UINT16)) ||
+            ((ldtype == NDARRAY_UINT16) && (rdtype == NDARRAY_UINT8)) ||
+            ((ldtype == NDARRAY_INT8) && (rdtype == NDARRAY_UINT16)) ||
+            ((ldtype == NDARRAY_UINT16) && (rdtype == NDARRAY_INT8))) {
+        return NDARRAY_UINT16;
+    }
+    return NDARRAY_FLOAT;
+}
+
+// The following five functions are the inverse of the ndarray_get_... functions,
+// and write a floating point datum into a void pointer
+
+void ndarray_set_float_uint8(void *data, mp_float_t datum) {
+    *((uint8_t *)data) = (uint8_t)datum;
+}
+
+void ndarray_set_float_int8(void *data, mp_float_t datum) {
+    *((int8_t *)data) = (int8_t)datum;
+}
+
+void ndarray_set_float_uint16(void *data, mp_float_t datum) {
+    *((uint16_t *)data) = (uint16_t)datum;
+}
+
+void ndarray_set_float_int16(void *data, mp_float_t datum) {
+    *((int16_t *)data) = (int16_t)datum;
+}
+
+void ndarray_set_float_float(void *data, mp_float_t datum) {
+    *((mp_float_t *)data) = datum;
+}
+
+// returns a single function pointer, depending on the dtype
+void *ndarray_set_float_function(uint8_t dtype) {
+    if(dtype == NDARRAY_UINT8) {
+        return ndarray_set_float_uint8;
+    } else if(dtype == NDARRAY_INT8) {
+        return ndarray_set_float_int8;
+    } else if(dtype == NDARRAY_UINT16) {
+        return ndarray_set_float_uint16;
+    } else if(dtype == NDARRAY_INT16) {
+        return ndarray_set_float_int16;
+    } else {
+        return ndarray_set_float_float;
+    }
+}
+#endif /* NDARRAY_BINARY_USES_FUN_POINTER */
+
+shape_strides tools_reduce_axes(ndarray_obj_t *ndarray, mp_obj_t axis) {
+    // TODO: replace numerical_reduce_axes with this function, wherever applicable
+    // This function should be used, whenever a tensor is contracted;
+    // The shape and strides at `axis` are moved to the zeroth position,
+    // everything else is aligned to the right
+    if(!mp_obj_is_int(axis) & (axis != mp_const_none)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("axis must be None, or an integer"));
+    }
+    shape_strides _shape_strides;
+
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS + 1);
+    _shape_strides.shape = shape;
+    int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS + 1);
+    _shape_strides.strides = strides;
+
+    _shape_strides.increment = 0;
+    // this is the contracted dimension (won't be overwritten for axis == None)
+    _shape_strides.ndim = 0;
+
+    memcpy(_shape_strides.shape, ndarray->shape, sizeof(size_t) * ULAB_MAX_DIMS);
+    memcpy(_shape_strides.strides, ndarray->strides, sizeof(int32_t) * ULAB_MAX_DIMS);
+
+    if(axis == mp_const_none) {
+        return _shape_strides;
+    }
+
+    uint8_t index = ULAB_MAX_DIMS - 1; // value of index for axis == mp_const_none (won't be overwritten)
+
+    if(axis != mp_const_none) { // i.e., axis is an integer
+        int8_t ax = mp_obj_get_int(axis);
+        if(ax < 0) ax += ndarray->ndim;
+        if((ax < 0) || (ax > ndarray->ndim - 1)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("index out of range"));
+        }
+        index = ULAB_MAX_DIMS - ndarray->ndim + ax;
+        _shape_strides.ndim = ndarray->ndim - 1;
+    }
+
+    // move the value stored at index to the leftmost position, and align everything else to the right
+    _shape_strides.shape[0] = ndarray->shape[index];
+    _shape_strides.strides[0] = ndarray->strides[index];
+    for(uint8_t i = 0; i < index; i++) {
+        // entries to the right of index must be shifted by one position to the left
+        _shape_strides.shape[i + 1] = ndarray->shape[i];
+        _shape_strides.strides[i + 1] = ndarray->strides[i];
+    }
+
+    if(_shape_strides.ndim != 0) {
+        _shape_strides.increment = 1;
+    }
+
+    return _shape_strides;
+}
+
+int8_t tools_get_axis(mp_obj_t axis, uint8_t ndim) {
+    int8_t ax = mp_obj_get_int(axis);
+    if(ax < 0) ax += ndim;
+    if((ax < 0) || (ax > ndim - 1)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("axis is out of bounds"));
+    }
+    return ax;
+}
+
+#if ULAB_MAX_DIMS > 1
+ndarray_obj_t *tools_object_is_square(mp_obj_t obj) {
+    // Returns an ndarray, if the object is a square ndarray,
+    // raises the appropriate exception otherwise
+    if(!mp_obj_is_type(obj, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("size is defined for ndarrays only"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(obj);
+    if((ndarray->shape[ULAB_MAX_DIMS - 1] != ndarray->shape[ULAB_MAX_DIMS - 2]) || (ndarray->ndim != 2)) {
+        mp_raise_ValueError(MP_ERROR_TEXT("input must be square matrix"));
+    }
+    return ndarray;
+}
+#endif
+
+uint8_t ulab_binary_get_size(uint8_t dtype) {
+    #if ULAB_SUPPORTS_COMPLEX
+    if(dtype == NDARRAY_COMPLEX) {
+        return 2 * (uint8_t)sizeof(mp_float_t);
+    }
+    #endif
+    return dtype == NDARRAY_BOOL ? 1 : mp_binary_get_size('@', dtype, NULL);
+}
+
+#if ULAB_SUPPORTS_COMPLEX
+void ulab_rescale_float_strides(int32_t *strides) {
+    // re-scale the strides, so that we can work with floats, when iterating
+    uint8_t sz = sizeof(mp_float_t);
+    for(uint8_t i = 0; i < ULAB_MAX_DIMS; i++) {
+        strides[i] /= sz;
+    }
+}
+#endif
+
+bool ulab_tools_mp_obj_is_scalar(mp_obj_t obj) {
+    #if ULAB_SUPPORTS_COMPLEX
+    if(mp_obj_is_int(obj) || mp_obj_is_float(obj) || mp_obj_is_type(obj, &mp_type_complex)) {
+        return true;
+    } else {
+        return false;
+    }
+    #else
+    if(mp_obj_is_int(obj) || mp_obj_is_float(obj)) {
+        return true;
+    } else {
+        return false;
+    }
+    #endif
+}
+
+ndarray_obj_t *ulab_tools_inspect_out(mp_obj_t out, uint8_t dtype, uint8_t ndim, size_t *shape, bool dense_only) {
+    if(!mp_obj_is_type(out, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("out has wrong type"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(out);
+    
+    if(ndarray->dtype != dtype) {
+        mp_raise_ValueError(MP_ERROR_TEXT("out array has wrong dtype"));
+    }
+
+    if(ndarray->ndim != ndim) {
+        mp_raise_ValueError(MP_ERROR_TEXT("out array has wrong dimension"));
+    }
+
+    for(uint8_t i = 0; i < ULAB_MAX_DIMS; i++) {
+        if(ndarray->shape[i] != shape[i]) {
+            mp_raise_ValueError(MP_ERROR_TEXT("out array has wrong shape"));
+        }
+    }
+
+    if(dense_only) {
+        if(!ndarray_is_dense(ndarray)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("output array must be contiguous"));
+        }
+    }
+    return ndarray;
+}
\ No newline at end of file
diff --git a/tulip/shared/ulab/code/ulab_tools.h b/tulip/shared/ulab/code/ulab_tools.h
new file mode 100644
index 000000000..62170fb85
--- /dev/null
+++ b/tulip/shared/ulab/code/ulab_tools.h
@@ -0,0 +1,49 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2022 Zoltán Vörös
+*/
+
+#ifndef _TOOLS_
+#define _TOOLS_
+
+#include "ndarray.h"
+
+#define SWAP(t, a, b) { t tmp = a; a = b; b = tmp; }
+
+typedef struct _shape_strides_t {
+    uint8_t increment;
+    uint8_t ndim;
+    size_t *shape;
+    int32_t *strides;
+} shape_strides;
+
+mp_float_t ndarray_get_float_uint8(void *);
+mp_float_t ndarray_get_float_int8(void *);
+mp_float_t ndarray_get_float_uint16(void *);
+mp_float_t ndarray_get_float_int16(void *);
+mp_float_t ndarray_get_float_float(void *);
+void *ndarray_get_float_function(uint8_t );
+
+uint8_t ndarray_upcast_dtype(uint8_t , uint8_t );
+void *ndarray_set_float_function(uint8_t );
+
+shape_strides tools_reduce_axes(ndarray_obj_t *, mp_obj_t );
+int8_t tools_get_axis(mp_obj_t , uint8_t );
+ndarray_obj_t *tools_object_is_square(mp_obj_t );
+
+uint8_t ulab_binary_get_size(uint8_t );
+
+#if ULAB_SUPPORTS_COMPLEX
+void ulab_rescale_float_strides(int32_t *);
+#endif
+
+bool ulab_tools_mp_obj_is_scalar(mp_obj_t );
+
+ndarray_obj_t *ulab_tools_inspect_out(mp_obj_t , uint8_t , uint8_t , size_t *, bool );
+
+#endif
diff --git a/tulip/shared/ulab/code/user/user.c b/tulip/shared/ulab/code/user/user.c
new file mode 100644
index 000000000..c372092f9
--- /dev/null
+++ b/tulip/shared/ulab/code/user/user.c
@@ -0,0 +1,98 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+#include "user.h"
+
+#if ULAB_HAS_USER_MODULE
+
+//| """This module should hold arbitrary user-defined functions."""
+//|
+
+static mp_obj_t user_square(mp_obj_t arg) {
+    // the function takes a single dense ndarray, and calculates the
+    // element-wise square of its entries
+
+    // raise a TypeError exception, if the input is not an ndarray
+    if(!mp_obj_is_type(arg, &ulab_ndarray_type)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be an ndarray"));
+    }
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(arg);
+
+    // make sure that the input is a dense array
+    if(!ndarray_is_dense(ndarray)) {
+        mp_raise_TypeError(MP_ERROR_TEXT("input must be a dense ndarray"));
+    }
+
+    // if the input is a dense array, create `results` with the same number of
+    // dimensions, shape, and dtype
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype);
+
+    // since in a dense array the iteration over the elements is trivial, we
+    // can cast the data arrays ndarray->array and results->array to the actual type
+    if(ndarray->dtype == NDARRAY_UINT8) {
+        uint8_t *array = (uint8_t *)ndarray->array;
+        uint8_t *rarray = (uint8_t *)results->array;
+        for(size_t i=0; i < ndarray->len; i++, array++) {
+            *rarray++ = (*array) * (*array);
+        }
+    } else if(ndarray->dtype == NDARRAY_INT8) {
+        int8_t *array = (int8_t *)ndarray->array;
+        int8_t *rarray = (int8_t *)results->array;
+        for(size_t i=0; i < ndarray->len; i++, array++) {
+            *rarray++ = (*array) * (*array);
+        }
+    } else if(ndarray->dtype == NDARRAY_UINT16) {
+        uint16_t *array = (uint16_t *)ndarray->array;
+        uint16_t *rarray = (uint16_t *)results->array;
+        for(size_t i=0; i < ndarray->len; i++, array++) {
+            *rarray++ = (*array) * (*array);
+        }
+    } else if(ndarray->dtype == NDARRAY_INT16) {
+        int16_t *array = (int16_t *)ndarray->array;
+        int16_t *rarray = (int16_t *)results->array;
+        for(size_t i=0; i < ndarray->len; i++, array++) {
+            *rarray++ = (*array) * (*array);
+        }
+    } else { // if we end up here, the dtype is NDARRAY_FLOAT
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        mp_float_t *rarray = (mp_float_t *)results->array;
+        for(size_t i=0; i < ndarray->len; i++, array++) {
+            *rarray++ = (*array) * (*array);
+        }
+    }
+    // at the end, return a micrppython object
+    return MP_OBJ_FROM_PTR(results);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(user_square_obj, user_square);
+
+static const mp_rom_map_elem_t ulab_user_globals_table[] = {
+    { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_user) },
+    { MP_OBJ_NEW_QSTR(MP_QSTR_square), (mp_obj_t)&user_square_obj },
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_user_globals, ulab_user_globals_table);
+
+const mp_obj_module_t ulab_user_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_user_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_user, ulab_user_module);
+#endif
+#endif
+
diff --git a/tulip/shared/ulab/code/user/user.h b/tulip/shared/ulab/code/user/user.h
new file mode 100644
index 000000000..ff274f438
--- /dev/null
+++ b/tulip/shared/ulab/code/user/user.h
@@ -0,0 +1,20 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+#ifndef _USER_
+#define _USER_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+extern const mp_obj_module_t ulab_user_module;
+
+#endif
diff --git a/tulip/shared/ulab/code/utils/utils.c b/tulip/shared/ulab/code/utils/utils.c
new file mode 100644
index 000000000..17e6ca0a7
--- /dev/null
+++ b/tulip/shared/ulab/code/utils/utils.c
@@ -0,0 +1,414 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2024 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+#include "utils.h"
+
+#include "../ulab_tools.h"
+#include "../numpy/fft/fft_tools.h"
+
+#if ULAB_HAS_UTILS_MODULE
+
+enum UTILS_BUFFER_TYPE {
+    UTILS_INT16_BUFFER,
+    UTILS_UINT16_BUFFER,
+    UTILS_INT32_BUFFER,
+    UTILS_UINT32_BUFFER,
+};
+
+#if ULAB_UTILS_HAS_FROM_INT16_BUFFER | ULAB_UTILS_HAS_FROM_UINT16_BUFFER | ULAB_UTILS_HAS_FROM_INT32_BUFFER | ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+static mp_obj_t utils_from_intbuffer_helper(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, uint8_t buffer_type) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } } ,
+        { MP_QSTR_count, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_INT(-1) } },
+        { MP_QSTR_offset, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_INT(0) } },
+        { MP_QSTR_out, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_byteswap, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_FALSE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    ndarray_obj_t *ndarray = NULL;
+
+    if(args[3].u_obj != mp_const_none) {
+        ndarray = MP_OBJ_TO_PTR(args[3].u_obj);
+        if((ndarray->dtype != NDARRAY_FLOAT) || !ndarray_is_dense(ndarray)) {
+            mp_raise_TypeError(MP_ERROR_TEXT("out must be a float dense array"));
+        }
+    }
+
+    size_t offset = mp_obj_get_int(args[2].u_obj);
+
+    mp_buffer_info_t bufinfo;
+    if(mp_get_buffer(args[0].u_obj, &bufinfo, MP_BUFFER_READ)) {
+        if(bufinfo.len < offset) {
+            mp_raise_ValueError(MP_ERROR_TEXT("offset is too large"));
+        }
+        uint8_t sz = sizeof(int16_t);
+        #if ULAB_UTILS_HAS_FROM_INT32_BUFFER | ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+        if((buffer_type == UTILS_INT32_BUFFER) ||  (buffer_type == UTILS_UINT32_BUFFER)) {
+            sz = sizeof(int32_t);
+        }
+        #endif
+
+        size_t len = (bufinfo.len - offset) / sz;
+        if((len * sz) != (bufinfo.len - offset)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("buffer size must be a multiple of element size"));
+        }
+        if(mp_obj_get_int(args[1].u_obj) > 0) {
+            size_t count = mp_obj_get_int(args[1].u_obj);
+            if(len < count) {
+                mp_raise_ValueError(MP_ERROR_TEXT("buffer is smaller than requested size"));
+            } else {
+                len = count;
+            }
+        }
+        if(args[3].u_obj == mp_const_none) {
+            ndarray = ndarray_new_linear_array(len, NDARRAY_FLOAT);
+        } else {
+            if(ndarray->len < len) {
+                mp_raise_ValueError(MP_ERROR_TEXT("out array is too small"));
+            }
+        }
+        uint8_t *buffer = bufinfo.buf;
+
+        mp_float_t *array = (mp_float_t *)ndarray->array;
+        if(args[4].u_obj == mp_const_true) {
+            // swap the bytes before conversion
+            uint8_t *tmpbuff = m_new(uint8_t, sz);
+            #if ULAB_UTILS_HAS_FROM_INT16_BUFFER | ULAB_UTILS_HAS_FROM_UINT16_BUFFER
+            if((buffer_type == UTILS_INT16_BUFFER) || (buffer_type == UTILS_UINT16_BUFFER)) {
+                for(size_t i = 0; i < len; i++) {
+                    tmpbuff += sz;
+                    for(uint8_t j = 0; j < sz; j++) {
+                        memcpy(--tmpbuff, buffer++, 1);
+                    }
+                    if(buffer_type == UTILS_INT16_BUFFER) {
+                        *array++ = (mp_float_t)(*(int16_t *)tmpbuff);
+                    } else {
+                        *array++ = (mp_float_t)(*(uint16_t *)tmpbuff);
+                    }
+                }
+            }
+            #endif
+            #if ULAB_UTILS_HAS_FROM_INT32_BUFFER | ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+            if((buffer_type == UTILS_INT32_BUFFER) || (buffer_type == UTILS_UINT32_BUFFER)) {
+                for(size_t i = 0; i < len; i++) {
+                    tmpbuff += sz;
+                    for(uint8_t j = 0; j < sz; j++) {
+                        memcpy(--tmpbuff, buffer++, 1);
+                    }
+                    if(buffer_type == UTILS_INT32_BUFFER) {
+                        *array++ = (mp_float_t)(*(int32_t *)tmpbuff);
+                    } else {
+                        *array++ = (mp_float_t)(*(uint32_t *)tmpbuff);
+                    }
+                }
+            }
+            #endif
+        } else {
+            #if ULAB_UTILS_HAS_FROM_INT16_BUFFER
+            if(buffer_type == UTILS_INT16_BUFFER) {
+                for(size_t i = 0; i < len; i++) {
+                    *array++ = (mp_float_t)(*(int16_t *)buffer);
+                    buffer += sz;
+                }
+            }
+            #endif
+            #if ULAB_UTILS_HAS_FROM_UINT16_BUFFER
+            if(buffer_type == UTILS_UINT16_BUFFER) {
+                for(size_t i = 0; i < len; i++) {
+                    *array++ = (mp_float_t)(*(uint16_t *)buffer);
+                    buffer += sz;
+                }
+            }
+            #endif
+            #if ULAB_UTILS_HAS_FROM_INT32_BUFFER
+            if(buffer_type == UTILS_INT32_BUFFER) {
+                for(size_t i = 0; i < len; i++) {
+                    *array++ = (mp_float_t)(*(int32_t *)buffer);
+                    buffer += sz;
+                }
+            }
+            #endif
+            #if ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+            if(buffer_type == UTILS_UINT32_BUFFER) {
+                for(size_t i = 0; i < len; i++) {
+                    *array++ = (mp_float_t)(*(uint32_t *)buffer);
+                    buffer += sz;
+                }
+            }
+            #endif
+        }
+        return MP_OBJ_FROM_PTR(ndarray);
+    }
+    return mp_const_none;
+}
+
+#ifdef ULAB_UTILS_HAS_FROM_INT16_BUFFER
+static mp_obj_t utils_from_int16_buffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return utils_from_intbuffer_helper(n_args, pos_args, kw_args, UTILS_INT16_BUFFER);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(utils_from_int16_buffer_obj, 1, utils_from_int16_buffer);
+#endif
+
+#ifdef ULAB_UTILS_HAS_FROM_UINT16_BUFFER
+static mp_obj_t utils_from_uint16_buffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return utils_from_intbuffer_helper(n_args, pos_args, kw_args, UTILS_UINT16_BUFFER);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(utils_from_uint16_buffer_obj, 1, utils_from_uint16_buffer);
+#endif
+
+#ifdef ULAB_UTILS_HAS_FROM_INT32_BUFFER
+static mp_obj_t utils_from_int32_buffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return utils_from_intbuffer_helper(n_args, pos_args, kw_args, UTILS_INT32_BUFFER);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(utils_from_int32_buffer_obj, 1, utils_from_int32_buffer);
+#endif
+
+#ifdef ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+static mp_obj_t utils_from_uint32_buffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    return utils_from_intbuffer_helper(n_args, pos_args, kw_args, UTILS_UINT32_BUFFER);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(utils_from_uint32_buffer_obj, 1, utils_from_uint32_buffer);
+#endif
+
+#endif /* ULAB_UTILS_HAS_FROM_INT16_BUFFER | ULAB_UTILS_HAS_FROM_UINT16_BUFFER | ULAB_UTILS_HAS_FROM_INT32_BUFFER | ULAB_UTILS_HAS_FROM_UINT32_BUFFER */
+
+#if ULAB_UTILS_HAS_SPECTROGRAM
+//| import ulab.numpy
+//|
+//| def spectrogram(r: ulab.numpy.ndarray) -> ulab.numpy.ndarray:
+//|     """
+//|     :param ulab.numpy.ndarray r: A 1-dimension array of values whose size is a power of 2
+//|
+//|     Computes the spectrum of the input signal.  This is the absolute value of the (complex-valued) fft of the signal.
+//|     This function is similar to scipy's ``scipy.signal.welch`` https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html."""
+//|     ...
+//|
+
+mp_obj_t utils_spectrogram(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE }} ,
+        #if !ULAB_FFT_IS_NUMPY_COMPATIBLE
+        { MP_QSTR_, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        #endif
+        { MP_QSTR_scratchpad, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_out, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_NONE } },
+        { MP_QSTR_log, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = MP_ROM_FALSE } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type)) {
+        mp_raise_NotImplementedError(MP_ERROR_TEXT("spectrogram is defined for ndarrays only"));
+    }
+    ndarray_obj_t *in = MP_OBJ_TO_PTR(args[0].u_obj);
+
+    #if ULAB_MAX_DIMS > 1
+    if(in->ndim != 1) {
+        mp_raise_TypeError(MP_ERROR_TEXT("spectrogram is implemented for 1D arrays only"));
+    }
+    #endif
+
+    size_t len = in->len;
+    // Check if input is of length of power of 2
+    if((len & (len-1)) != 0) {
+        mp_raise_ValueError(MP_ERROR_TEXT("input array length must be power of 2"));
+    }
+    
+    ndarray_obj_t *out = NULL;
+
+    #if ULAB_FFT_IS_NUMPY_COMPATIBLE
+    mp_obj_t scratchpad_object = args[1].u_obj;
+    mp_obj_t out_object = args[2].u_obj;
+    mp_obj_t log_object = args[3].u_obj;
+    #else
+    mp_obj_t scratchpad_object = args[2].u_obj;
+    mp_obj_t out_object = args[3].u_obj;
+    mp_obj_t log_object = args[4].u_obj;
+    #endif
+
+    if(out_object != mp_const_none) {
+        if(!mp_obj_is_type(out_object, &ulab_ndarray_type)) {
+            mp_raise_TypeError(MP_ERROR_TEXT("out must be an ndarray"));
+        }
+        
+        out = MP_OBJ_TO_PTR(out_object);
+        if((out->dtype != NDARRAY_FLOAT) || (out->ndim != 1)){
+            mp_raise_TypeError(MP_ERROR_TEXT("out array must be a 1D array of float type"));
+        }
+        if(len != out->len) {
+            mp_raise_ValueError(MP_ERROR_TEXT("input and out arrays must have same length"));
+        }
+    } else {
+        out = ndarray_new_linear_array(len, NDARRAY_FLOAT);
+    }
+
+    ndarray_obj_t *scratchpad = NULL;
+    mp_float_t *tmp = NULL;
+
+    if(scratchpad_object != mp_const_none) {
+        if(!mp_obj_is_type(scratchpad_object, &ulab_ndarray_type)) {
+            mp_raise_TypeError(MP_ERROR_TEXT("scratchpad must be an ndarray"));
+        }
+
+        scratchpad = MP_OBJ_TO_PTR(scratchpad_object);
+        if(!ndarray_is_dense(scratchpad) || (scratchpad->ndim != 1) || (scratchpad->dtype != NDARRAY_FLOAT)) {
+            mp_raise_ValueError(MP_ERROR_TEXT("scratchpad must be a 1D dense float array"));
+        }
+        if(scratchpad->len != 2 * len) {
+            mp_raise_ValueError(MP_ERROR_TEXT("scratchpad must be twice as long as input"));
+        }
+
+        tmp = (mp_float_t *)scratchpad->array;
+    } else {
+        tmp = m_new0(mp_float_t, 2 * len);
+    }
+
+    uint8_t *array = (uint8_t *)in->array;
+
+    #if ULAB_FFT_IS_NUMPY_COMPATIBLE & ULAB_SUPPORTS_COMPLEX
+    if(in->dtype == NDARRAY_COMPLEX) {
+        uint8_t sz = 2 * sizeof(mp_float_t);
+        for(size_t i = 0; i < len; i++) {
+            memcpy(tmp, array, sz);
+            tmp += 2;
+            array += in->strides[ULAB_MAX_DIMS - 1];
+        }
+    } else {
+        mp_float_t (*func)(void *) = ndarray_get_float_function(in->dtype);
+        for(size_t i = 0; i < len; i++) {
+            *tmp++ = func(array);   // real part
+            *tmp++ = 0;             // imaginary part, clear
+            array += in->strides[ULAB_MAX_DIMS - 1];
+        }
+    }
+    
+    tmp -= 2 * len;
+    fft_kernel(tmp, len, 1);
+    #else // we might have two real input vectors
+
+    ndarray_obj_t *in2 = NULL;
+
+    if(n_args == 2) {
+        if(!mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
+            mp_raise_NotImplementedError(MP_ERROR_TEXT("spectrogram is defined for ndarrays only"));
+        }
+        in2 = MP_OBJ_TO_PTR(args[1].u_obj);
+
+        #if ULAB_MAX_DIMS > 1
+        if(in2->ndim != 1) {
+            mp_raise_TypeError(MP_ERROR_TEXT("spectrogram is implemented for 1D arrays only"));
+        }
+        #endif
+        if(len != in2->len) {
+            mp_raise_TypeError(MP_ERROR_TEXT("input arrays are not compatible"));
+        }
+    } 
+
+    mp_float_t (*func)(void *) = ndarray_get_float_function(in->dtype);
+
+    for(size_t i = 0; i < len; i++) {
+        *tmp++ = func(array);       // real part; imageinary will be cleared later
+        array += in->strides[ULAB_MAX_DIMS - 1];
+    }
+    
+    if(n_args == 2) {
+        mp_float_t (*func2)(void *) = ndarray_get_float_function(in2->dtype);
+        array = (uint8_t *)in2->array;
+        for(size_t i = 0; i < len; i++) {
+            *tmp++ = func2(array);
+            array += in2->strides[ULAB_MAX_DIMS - 1];
+        }
+        tmp -= len;
+    } else {
+        // if there is only one input argument, clear the imaginary part
+        memset(tmp, 0, len * sizeof(mp_float_t));
+    }
+
+    tmp -= len;
+
+    fft_kernel(tmp, tmp + len, len, 1);
+    #endif /* ULAB_FFT_IS_NUMPY_COMPATIBLE */
+
+    mp_float_t *spectrum = (mp_float_t *)out->array;
+    uint8_t spectrum_sz = out->strides[ULAB_MAX_DIMS - 1] / sizeof(mp_float_t);
+
+    for(size_t i = 0; i < len; i++) {
+        #if ULAB_FFT_IS_NUMPY_COMPATIBLE
+        *spectrum = MICROPY_FLOAT_C_FUN(sqrt)(*tmp * *tmp + *(tmp + 1) * *(tmp + 1));
+        tmp += 2;
+        #else
+        *spectrum = MICROPY_FLOAT_C_FUN(sqrt)(*tmp * *tmp + *(tmp + len) * *(tmp + len));
+        tmp++;
+        #endif
+        if(log_object == mp_const_true) {
+            *spectrum = MICROPY_FLOAT_C_FUN(log)(*spectrum);
+        }
+        spectrum += spectrum_sz;
+    }
+
+    if(scratchpad_object == mp_const_none) {
+        tmp -= len;
+        #if ULAB_FFT_IS_NUMPY_COMPATIBLE
+        tmp -= len;
+        #endif
+        m_del(mp_float_t, tmp, 2 * len);
+    }
+    return MP_OBJ_FROM_PTR(out);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(utils_spectrogram_obj, 1, utils_spectrogram);
+
+#endif /* ULAB_UTILS_HAS_SPECTROGRAM */
+
+
+static const mp_rom_map_elem_t ulab_utils_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_utils) },
+    #if ULAB_UTILS_HAS_FROM_INT16_BUFFER
+        { MP_ROM_QSTR(MP_QSTR_from_int16_buffer), MP_ROM_PTR(&utils_from_int16_buffer_obj) },
+    #endif
+    #if ULAB_UTILS_HAS_FROM_UINT16_BUFFER
+        { MP_ROM_QSTR(MP_QSTR_from_uint16_buffer), MP_ROM_PTR(&utils_from_uint16_buffer_obj) },
+    #endif
+    #if ULAB_UTILS_HAS_FROM_INT32_BUFFER
+        { MP_ROM_QSTR(MP_QSTR_from_int32_buffer), MP_ROM_PTR(&utils_from_int32_buffer_obj) },
+    #endif
+    #if ULAB_UTILS_HAS_FROM_UINT32_BUFFER
+        { MP_ROM_QSTR(MP_QSTR_from_uint32_buffer), MP_ROM_PTR(&utils_from_uint32_buffer_obj) },
+    #endif
+    #if ULAB_UTILS_HAS_SPECTROGRAM
+        { MP_ROM_QSTR(MP_QSTR_spectrogram), MP_ROM_PTR(&utils_spectrogram_obj) },
+    #endif
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_utils_globals, ulab_utils_globals_table);
+
+const mp_obj_module_t ulab_utils_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_utils_globals,
+};
+#if CIRCUITPY_ULAB
+MP_REGISTER_MODULE(MP_QSTR_ulab_dot_utils, ulab_utils_module);
+#endif
+
+#endif /* ULAB_HAS_UTILS_MODULE */
diff --git a/tulip/shared/ulab/code/utils/utils.h b/tulip/shared/ulab/code/utils/utils.h
new file mode 100644
index 000000000..b2155c383
--- /dev/null
+++ b/tulip/shared/ulab/code/utils/utils.h
@@ -0,0 +1,19 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+#ifndef _UTILS_
+#define _UTILS_
+
+#include "../ulab.h"
+#include "../ndarray.h"
+
+extern const mp_obj_module_t ulab_utils_module;
+
+#endif
diff --git a/tulip/shared/ulab/docs/manual/Makefile b/tulip/shared/ulab/docs/manual/Makefile
new file mode 100644
index 000000000..a97f7258d
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/Makefile
@@ -0,0 +1,24 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+clean:
+	rm -rf "$(BUILDDIR)"
+
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/tulip/shared/ulab/docs/manual/make.bat b/tulip/shared/ulab/docs/manual/make.bat
new file mode 100644
index 000000000..6247f7e23
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/tulip/shared/ulab/docs/manual/source/conf.py b/tulip/shared/ulab/docs/manual/source/conf.py
new file mode 100644
index 000000000..c4a27e26a
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/conf.py
@@ -0,0 +1,111 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+#import sphinx_rtd_theme
+
+from sphinx.transforms import SphinxTransform
+from docutils import nodes
+from sphinx import addnodes
+
+# -- Project information -----------------------------------------------------
+
+project = 'The ulab book'
+copyright = '2019-2024, Zoltán Vörös and contributors'
+author = 'Zoltán Vörös'
+
+# The full version, including alpha/beta/rc tags
+release = '6.6.0'
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+latex_maketitle = r'''
+\begin{titlepage}
+\begin{flushright}
+\Huge\textbf{The $\mu$lab book}
+\vskip 0.5em
+\LARGE
+\textbf{Release %s}
+\vskip 5em
+\huge\textbf{Zoltán Vörös}
+\end{flushright}
+\begin{flushright}
+\LARGE
+\vskip 2em
+with contributions by
+\vskip 2em
+\textbf{Roberto Colistete Jr.}
+\vskip 0.2em
+\textbf{Jeff Epler}
+\vskip 0.2em
+\textbf{Taku Fukada}
+\vskip 0.2em
+\textbf{Diego Elio Pettenò}
+\vskip 0.2em
+\textbf{Scott Shawcroft}
+\vskip 5em
+\today
+\end{flushright}
+\end{titlepage}
+'''%release
+
+latex_elements = {
+    'maketitle': latex_maketitle
+}
+
+
+master_doc = 'index'
+
+author=u'Zoltán Vörös'
+copyright=author
+language='en'
+
+latex_documents = [
+(master_doc, 'the-ulab-book.tex', 'The $\mu$lab book',
+'Zoltán Vörös', 'manual'),
+]
+
+# Read the docs theme
+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+if not on_rtd:
+    try:
+        import sphinx_rtd_theme
+        html_theme = 'sphinx_rtd_theme'
+        html_theme_path = [sphinx_rtd_theme.get_html_theme_path(), '.']
+    except ImportError:
+        html_theme = 'default'
+        html_theme_path = ['.']
+else:
+    html_theme_path = ['.']
diff --git a/tulip/shared/ulab/docs/manual/source/index.rst b/tulip/shared/ulab/docs/manual/source/index.rst
new file mode 100644
index 000000000..40fbc00df
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/index.rst
@@ -0,0 +1,39 @@
+
+.. ulab-manual documentation master file, created by
+   sphinx-quickstart on Sat Oct 19 12:48:00 2019.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to the ulab book!
+=======================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Introduction
+
+   ulab-intro
+
+.. toctree::
+   :maxdepth: 2
+   :caption: User's guide:
+
+   ulab-ndarray
+   numpy-functions
+   numpy-universal
+   numpy-fft
+   numpy-linalg
+   numpy-random
+   scipy-linalg
+   scipy-optimize
+   scipy-signal
+   scipy-special
+   ulab-utils
+   ulab-tricks
+   ulab-programming
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/tulip/shared/ulab/docs/manual/source/numpy-fft.rst b/tulip/shared/ulab/docs/manual/source/numpy-fft.rst
new file mode 100644
index 000000000..7da9b60e2
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/numpy-fft.rst
@@ -0,0 +1,197 @@
+
+numpy.fft
+=========
+
+Functions related to Fourier transforms can be called by prepending them
+with ``numpy.fft.``. The module defines the following two functions:
+
+1. `numpy.fft.fft <#fft>`__
+2. `numpy.fft.ifft <#ifft>`__
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.fft.ifft.html
+
+fft
+---
+
+Since ``ulab``\ ’s ``ndarray`` does not support complex numbers, the
+invocation of the Fourier transform differs from that in ``numpy``. In
+``numpy``, you can simply pass an array or iterable to the function, and
+it will be treated as a complex array:
+
+.. code::
+
+    # code to be run in CPython
+    
+    fft.fft([1, 2, 3, 4, 1, 2, 3, 4])
+
+
+
+.. parsed-literal::
+
+    array([20.+0.j,  0.+0.j, -4.+4.j,  0.+0.j, -4.+0.j,  0.+0.j, -4.-4.j,
+            0.+0.j])
+
+
+
+**WARNING:** The array returned is also complex, i.e., the real and
+imaginary components are cast together. In ``ulab``, the real and
+imaginary parts are treated separately: you have to pass two
+``ndarray``\ s to the function, although, the second argument is
+optional, in which case the imaginary part is assumed to be zero.
+
+**WARNING:** The function, as opposed to ``numpy``, returns a 2-tuple,
+whose elements are two ``ndarray``\ s, holding the real and imaginary
+parts of the transform separately.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.linspace(0, 10, num=1024)
+    y = np.sin(x)
+    z = np.zeros(len(x))
+    
+    a, b = np.fft.fft(x)
+    print('real part:\t', a)
+    print('\nimaginary part:\t', b)
+    
+    c, d = np.fft.fft(x, z)
+    print('\nreal part:\t', c)
+    print('\nimaginary part:\t', d)
+
+.. parsed-literal::
+
+    real part:	 array([5119.996, -5.004663, -5.004798, ..., -5.005482, -5.005643, -5.006577], dtype=float)
+    
+    imaginary part:	 array([0.0, 1631.333, 815.659, ..., -543.764, -815.6588, -1631.333], dtype=float)
+    
+    real part:	 array([5119.996, -5.004663, -5.004798, ..., -5.005482, -5.005643, -5.006577], dtype=float)
+    
+    imaginary part:	 array([0.0, 1631.333, 815.659, ..., -543.764, -815.6588, -1631.333], dtype=float)
+    
+
+
+ulab with complex support
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the ``ULAB_SUPPORTS_COMPLEX``, and ``ULAB_FFT_IS_NUMPY_COMPATIBLE``
+pre-processor constants are set to 1 in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__
+as
+
+.. code:: c
+
+   // Adds support for complex ndarrays
+   #ifndef ULAB_SUPPORTS_COMPLEX
+   #define ULAB_SUPPORTS_COMPLEX               (1)
+   #endif
+
+.. code:: c
+
+   #ifndef ULAB_FFT_IS_NUMPY_COMPATIBLE
+   #define ULAB_FFT_IS_NUMPY_COMPATIBLE    (1)
+   #endif
+
+then the FFT routine will behave in a ``numpy``-compatible way: the
+single input array can either be real, in which case the imaginary part
+is assumed to be zero, or complex. The output is also complex.
+
+While ``numpy``-compatibility might be a desired feature, it has one
+side effect, namely, the FFT routine consumes approx. 50% more RAM. The
+reason for this lies in the implementation details.
+
+ifft
+----
+
+The above-mentioned rules apply to the inverse Fourier transform. The
+inverse is also normalised by ``N``, the number of elements, as is
+customary in ``numpy``. With the normalisation, we can ascertain that
+the inverse of the transform is equal to the original array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.linspace(0, 10, num=1024)
+    y = np.sin(x)
+    
+    a, b = np.fft.fft(y)
+    
+    print('original vector:\t', y)
+    
+    y, z = np.fft.ifft(a, b)
+    # the real part should be equal to y
+    print('\nreal part of inverse:\t', y)
+    # the imaginary part should be equal to zero
+    print('\nimaginary part of inverse:\t', z)
+
+.. parsed-literal::
+
+    original vector:	 array([0.0, 0.009775016, 0.0195491, ..., -0.5275068, -0.5357859, -0.5440139], dtype=float)
+    
+    real part of inverse:	 array([-2.980232e-08, 0.0097754, 0.0195494, ..., -0.5275064, -0.5357857, -0.5440133], dtype=float)
+    
+    imaginary part of inverse:	 array([-2.980232e-08, -1.451171e-07, 3.693752e-08, ..., 6.44871e-08, 9.34986e-08, 2.18336e-07], dtype=float)
+    
+
+
+Note that unlike in ``numpy``, the length of the array on which the
+Fourier transform is carried out must be a power of 2. If this is not
+the case, the function raises a ``ValueError`` exception.
+
+ulab with complex support
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``fft.ifft`` function can also be made ``numpy``-compatible by
+setting the ``ULAB_SUPPORTS_COMPLEX``, and
+``ULAB_FFT_IS_NUMPY_COMPATIBLE`` pre-processor constants to 1.
+
+Computation and storage costs
+-----------------------------
+
+RAM
+~~~
+
+The FFT routine of ``ulab`` calculates the transform in place. This
+means that beyond reserving space for the two ``ndarray``\ s that will
+be returned (the computation uses these two as intermediate storage
+space), only a handful of temporary variables, all floats or 32-bit
+integers, are required.
+
+Speed of FFTs
+~~~~~~~~~~~~~
+
+A comment on the speed: a 1024-point transform implemented in python
+would cost around 90 ms, and 13 ms in assembly, if the code runs on the
+pyboard, v.1.1. You can gain a factor of four by moving to the D series
+https://github.com/peterhinch/micropython-fourier/blob/master/README.md#8-performance.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.linspace(0, 10, num=1024)
+    y = np.sin(x)
+    
+    @timeit
+    def np_fft(y):
+        return np.fft.fft(y)
+    
+    a, b = np_fft(y)
+
+.. parsed-literal::
+
+    execution time:  1985  us
+    
+
+
+The C implementation runs in less than 2 ms on the pyboard (we have just
+measured that), and has been reported to run in under 0.8 ms on the D
+series board. That is an improvement of at least a factor of four.
diff --git a/tulip/shared/ulab/docs/manual/source/numpy-functions.rst b/tulip/shared/ulab/docs/manual/source/numpy-functions.rst
new file mode 100644
index 000000000..252f5a574
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/numpy-functions.rst
@@ -0,0 +1,2195 @@
+
+Numpy functions
+===============
+
+This section of the manual discusses those functions that were adapted
+from ``numpy``. Functions with an asterisk accept complex arrays as
+arguments, if the firmware was compiled with complex support.
+
+1.  `numpy.all\* <#all>`__
+2.  `numpy.any\* <#any>`__
+3.  `numpy.argmax <#argmax>`__
+4.  `numpy.argmin <#argmin>`__
+5.  `numpy.argsort <#argsort>`__
+6.  `numpy.asarray\* <#asarray>`__
+7.  `numpy.bitwise_and <#bitwise_and>`__
+8.  `numpy.bitwise_or <#bitwise_and>`__
+9.  `numpy.bitwise_xor <#bitwise_and>`__
+10. `numpy.clip <#clip>`__
+11. `numpy.compress\* <#compress>`__
+12. `numpy.conjugate\* <#conjugate>`__
+13. `numpy.convolve\* <#convolve>`__
+14. `numpy.delete <#delete>`__
+15. `numpy.diff <#diff>`__
+16. `numpy.dot <#dot>`__
+17. `numpy.equal <#equal>`__
+18. `numpy.flip\* <#flip>`__
+19. `numpy.imag\* <#imag>`__
+20. `numpy.interp <#interp>`__
+21. `numpy.isfinite <#isfinite>`__
+22. `numpy.isinf <#isinf>`__
+23. `numpy.left_shift <#left_shift>`__
+24. `numpy.load <#load>`__
+25. `numpy.loadtxt <#loadtxt>`__
+26. `numpy.max <#max>`__
+27. `numpy.maximum <#maximum>`__
+28. `numpy.mean <#mean>`__
+29. `numpy.median <#median>`__
+30. `numpy.min <#min>`__
+31. `numpy.minimum <#minimum>`__
+32. `numpy.nozero <#nonzero>`__
+33. `numpy.not_equal <#equal>`__
+34. `numpy.polyfit <#polyfit>`__
+35. `numpy.polyval <#polyval>`__
+36. `numpy.real\* <#real>`__
+37. `numpy.right_shift <#right_shift>`__
+38. `numpy.roll <#roll>`__
+39. `numpy.save <#save>`__
+40. `numpy.savetxt <#savetxt>`__
+41. `numpy.size <#size>`__
+42. `numpy.sort <#sort>`__
+43. `numpy.sort_complex\* <#sort_complex>`__
+44. `numpy.std <#std>`__
+45. `numpy.sum <#sum>`__
+46. `numpy.take\* <#take>`__
+47. `numpy.trace <#trace>`__
+48. `numpy.trapz <#trapz>`__
+49. `numpy.where <#where>`__
+
+all
+---
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.all.html
+
+The function takes one positional, and one keyword argument, the
+``axis``, with a default value of ``None``, and tests, whether *all*
+array elements along the given axis evaluate to ``True``. If the keyword
+argument is ``None``, the flattened array is inspected.
+
+Elements of an array evaluate to ``True``, if they are not equal to
+zero, or the Boolean ``False``. The return value if a Boolean
+``ndarray``.
+
+If the firmware was compiled with complex support, the function can
+accept complex arrays.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(12)).reshape((3, 4))
+    
+    print('\na:\n', a)
+    
+    b = np.all(a)
+    print('\nall of the flattened array:\n', b)
+    
+    c = np.all(a, axis=0)
+    print('\nall of a along 0th axis:\n', c)
+    
+    d = np.all(a, axis=1)
+    print('\nall of a along 1st axis:\n', d)
+
+.. parsed-literal::
+
+    
+    a:
+     array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0]], dtype=float64)
+    
+    all of the flattened array:
+     False
+    
+    all of a along 0th axis:
+     array([False, True, True, True], dtype=bool)
+    
+    all of a along 1st axis:
+     array([False, True, True], dtype=bool)
+    
+    
+
+
+any
+---
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.any.html
+
+The function takes one positional, and one keyword argument, the
+``axis``, with a default value of ``None``, and tests, whether *any*
+array element along the given axis evaluates to ``True``. If the keyword
+argument is ``None``, the flattened array is inspected.
+
+Elements of an array evaluate to ``True``, if they are not equal to
+zero, or the Boolean ``False``. The return value if a Boolean
+``ndarray``.
+
+If the firmware was compiled with complex support, the function can
+accept complex arrays.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(12)).reshape((3, 4))
+    
+    print('\na:\n', a)
+    
+    b = np.any(a)
+    print('\nany of the flattened array:\n', b)
+    
+    c = np.any(a, axis=0)
+    print('\nany of a along 0th axis:\n', c)
+    
+    d = np.any(a, axis=1)
+    print('\nany of a along 1st axis:\n', d)
+
+.. parsed-literal::
+
+    
+    a:
+     array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0]], dtype=float64)
+    
+    any of the flattened array:
+     True
+    
+    any of a along 0th axis:
+     array([True, True, True, True], dtype=bool)
+    
+    any of a along 1st axis:
+     array([True, True, True], dtype=bool)
+    
+    
+
+
+argmax
+------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html
+
+See `numpy.max <#max>`__.
+
+argmin
+------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmin.html
+
+See `numpy.max <#max>`__.
+
+argsort
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html
+
+Similarly to `sort <#sort>`__, ``argsort`` takes a positional, and a
+keyword argument, and returns an unsigned short index array of type
+``ndarray`` with the same dimensions as the input, or, if ``axis=None``,
+as a row vector with length equal to the number of elements in the input
+(i.e., the flattened array). The indices in the output sort the input in
+ascending order. The routine in ``argsort`` is the same as in ``sort``,
+therefore, the comments on computational expenses (time and RAM) also
+apply. In particular, since no copy of the original data is required,
+virtually no RAM beyond the output array is used.
+
+Since the underlying container of the output array is of type
+``uint16_t``, neither of the output dimensions should be larger than
+65535. If that happens to be the case, the function will bail out with a
+``ValueError``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.float)
+    print('\na:\n', a)
+    b = np.argsort(a, axis=0)
+    print('\na sorted along vertical axis:\n', b)
+    
+    c = np.argsort(a, axis=1)
+    print('\na sorted along horizontal axis:\n', c)
+    
+    c = np.argsort(a, axis=None)
+    print('\nflattened a sorted:\n', c)
+
+.. parsed-literal::
+
+    
+    a:
+     array([[1.0, 12.0, 3.0, 0.0],
+           [5.0, 3.0, 4.0, 1.0],
+           [9.0, 11.0, 1.0, 8.0],
+           [7.0, 10.0, 0.0, 1.0]], dtype=float64)
+    
+    a sorted along vertical axis:
+     array([[0, 1, 3, 0],
+           [1, 3, 2, 1],
+           [3, 2, 0, 3],
+           [2, 0, 1, 2]], dtype=uint16)
+    
+    a sorted along horizontal axis:
+     array([[3, 0, 2, 1],
+           [3, 1, 2, 0],
+           [2, 3, 0, 1],
+           [2, 3, 0, 1]], dtype=uint16)
+    
+    Traceback (most recent call last):
+      File "/dev/shm/micropython.py", line 12, in <module>
+    NotImplementedError: argsort is not implemented for flattened arrays
+    
+
+
+Since during the sorting, only the indices are shuffled, ``argsort``
+does not modify the input array, as one can verify this by the following
+example:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([0, 5, 1, 3, 2, 4], dtype=np.uint8)
+    print('\na:\n', a)
+    b = np.argsort(a, axis=0)
+    print('\nsorting indices:\n', b)
+    print('\nthe original array:\n', a)
+
+.. parsed-literal::
+
+    
+    a:
+     array([0, 5, 1, 3, 2, 4], dtype=uint8)
+    
+    sorting indices:
+     array([0, 2, 4, 3, 5, 1], dtype=uint16)
+    
+    the original array:
+     array([0, 5, 1, 3, 2, 4], dtype=uint8)
+    
+    
+
+
+asarray
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.asarray.html
+
+The function takes a single positional argument, and an optional keyword
+argument, ``dtype``, with a default value of ``None``.
+
+If the positional argument is an ``ndarray``, and its ``dtypes`` is
+identical to the value of the keyword argument, or if the keyword
+argument is ``None``, then the positional argument is simply returned.
+If the original ``dtype``, and the value of the keyword argument are
+different, then a copy is returned, with appropriate ``dtype``
+conversion.
+
+If the positional argument is an iterable, then the function is simply
+an alias for ``array``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8)
+    b = np.asarray(a)
+    c = np.asarray(a, dtype=np.int8)
+    print('a:{}'.format(a))
+    print('b:{}'.format(b))
+    print('a == b: {}'.format(a is b))
+    
+    print('\nc:{}'.format(c))
+    print('a == c: {}'.format(a is c))
+
+.. parsed-literal::
+
+    a:array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    b:array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    a == b: True
+    
+    c:array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int8)
+    a == c: False
+    
+    
+
+
+bitwise_and
+-----------
+
+``numpy``: https://numpy.org/doc/stable/reference/routines.bitwise.html
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.bitwise_and.html
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.bitwise_or.html
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.bitwise_xor.html
+
+Each of ``bitwise_and``, ``bitwise_or``, and ``bitwise_xor`` takes two
+integer-type ``ndarray``\ s as arguments, and returns the element-wise
+results of the ``AND``, ``OR``, and ``XOR`` operators. Broadcasting is
+supported. If the ``dtype`` of the input arrays is not an integer, and
+exception will be raised.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(8), dtype=np.uint8)
+    b = a + 1
+    
+    print(a)
+    print(b)
+    print('\nbitwise_and:\n', np.bitwise_and(a, b))
+    print('\nbitwise_or:\n', np.bitwise_or(a, b))
+    print('\nbitwise_xor:\n', np.bitwise_xor(a, b))
+
+.. parsed-literal::
+
+    array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint8)
+    array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    
+    bitwise_and:
+     array([0, 0, 2, 0, 4, 4, 6, 0], dtype=uint8)
+    
+    bitwise_or:
+     array([1, 3, 3, 7, 5, 7, 7, 15], dtype=uint8)
+    
+    bitwise_xor:
+     array([1, 3, 1, 7, 1, 3, 1, 15], dtype=uint8)
+    
+    
+
+
+clip
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.clip.html
+
+Clips an array, i.e., values that are outside of an interval are clipped
+to the interval edges. The function is equivalent to
+``maximum(a_min, minimum(a, a_max))`` broadcasting takes place exactly
+as in `minimum <#minimum>`__. If the arrays are of different ``dtype``,
+the output is upcast as in `Binary operators <#Binary-operators>`__.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8)
+    print('a:\t\t', a)
+    print('clipped:\t', np.clip(a, 3, 7))
+    
+    b = 3 * np.ones(len(a), dtype=np.float)
+    print('\na:\t\t', a)
+    print('b:\t\t', b)
+    print('clipped:\t', np.clip(a, b, 7))
+
+.. parsed-literal::
+
+    a:		 array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    clipped:	 array([3, 3, 3, 3, 4, 5, 6, 7, 7], dtype=uint8)
+    
+    a:		 array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    b:		 array([3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0], dtype=float64)
+    clipped:	 array([3.0, 3.0, 3.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.0], dtype=float64)
+    
+    
+
+
+compress
+--------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.compress.html
+
+The function returns selected slices of an array along given axis. If
+the axis keyword is ``None``, the flattened array is used.
+
+If the firmware was compiled with complex support, the function can
+accept complex arguments.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(6)).reshape((2, 3))
+    
+    print('a:\n', a)
+    print('\ncompress(a):\n', np.compress([0, 1], a, axis=0))
+
+.. parsed-literal::
+
+    a:
+     array([[0.0, 1.0, 2.0],
+           [3.0, 4.0, 5.0]], dtype=float64)
+    
+    compress(a):
+     array([[3.0, 4.0, 5.0]], dtype=float64)
+    
+    
+
+
+conjugate
+---------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.conjugate.html
+
+If the firmware was compiled with complex support, the function
+calculates the complex conjugate of the input array. If the input array
+is of real ``dtype``, then the output is simply a copy, preserving the
+``dtype``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.uint8)
+    b = np.array([1+1j, 2-2j, 3+3j, 4-4j], dtype=np.complex)
+    
+    print('a:\t\t', a)
+    print('conjugate(a):\t', np.conjugate(a))
+    print()
+    print('b:\t\t', b)
+    print('conjugate(b):\t', np.conjugate(b))
+
+.. parsed-literal::
+
+    a:		 array([1, 2, 3, 4], dtype=uint8)
+    conjugate(a):	 array([1, 2, 3, 4], dtype=uint8)
+    
+    b:		 array([1.0+1.0j, 2.0-2.0j, 3.0+3.0j, 4.0-4.0j], dtype=complex)
+    conjugate(b):	 array([1.0-1.0j, 2.0+2.0j, 3.0-3.0j, 4.0+4.0j], dtype=complex)
+    
+    
+
+
+convolve
+--------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html
+
+Returns the discrete, linear convolution of two one-dimensional arrays.
+
+Only the ``full`` mode is supported, and the ``mode`` named parameter is
+not accepted. Note that all other modes can be had by slicing a ``full``
+result.
+
+If the firmware was compiled with complex support, the function can
+accept complex arrays.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.array((1, 2, 3))
+    y = np.array((1, 10, 100, 1000))
+    
+    print(np.convolve(x, y))
+
+.. parsed-literal::
+
+    array([1.0, 12.0, 123.0, 1230.0, 2300.0, 3000.0], dtype=float64)
+    
+    
+
+
+delete
+------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.delete.html
+
+The function returns a new array with sub-arrays along an axis deleted.
+It takes two positional arguments, the array, and the indices, which
+will be removed, as well as the ``axis`` keyword argument with a default
+value of ``None``. If the ``axis`` is ``None``, the will be flattened
+first.
+
+The second positional argument can be a scalar, or any ``micropython``
+iterable. Since ``range`` can also be passed in place of the indices,
+slicing can be emulated. If the indices are negative, the elements are
+counted from the end of the axis.
+
+Note that the function creates a copy of the indices first, because it
+is not guaranteed that the indices are ordered. Keep this in mind, when
+working with large arrays.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(25), dtype=np.uint8).reshape((5,5))
+    print('a:\n', a)
+    print('\naxis = 0\n', np.delete(a, 2, axis=0))
+    print('\naxis = 1\n', np.delete(a, -2, axis=1))
+    print('\naxis = None\n', np.delete(a, [0, 1, 2, 22]))
+
+.. parsed-literal::
+
+    a:
+     array([[0, 1, 2, 3, 4],
+           [5, 6, 7, 8, 9],
+           [10, 11, 12, 13, 14],
+           [15, 16, 17, 18, 19],
+           [20, 21, 22, 23, 24]], dtype=uint8)
+    
+    axis = 0
+     array([[0, 1, 2, 3, 4],
+           [5, 6, 7, 8, 9],
+           [15, 16, 17, 18, 19],
+           [20, 21, 22, 23, 24]], dtype=uint8)
+    
+    axis = 1
+     array([[0, 1, 2, 4],
+           [5, 6, 7, 9],
+           [10, 11, 12, 14],
+           [15, 16, 17, 19],
+           [20, 21, 22, 24]], dtype=uint8)
+    
+    axis = None
+     array([3, 4, 5, ..., 21, 23, 24], dtype=uint8)
+    
+    
+
+
+diff
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.diff.html
+
+The ``diff`` function returns the numerical derivative of the forward
+scheme, or more accurately, the differences of an ``ndarray`` along a
+given axis. The order of derivative can be stipulated with the ``n``
+keyword argument, which should be between 0, and 9. Default is 1. If
+higher order derivatives are required, they can be gotten by repeated
+calls to the function. The ``axis`` keyword argument should be -1 (last
+axis, in ``ulab`` equivalent to the second axis, and this also happens
+to be the default value), 0, or 1.
+
+Beyond the output array, the function requires only a couple of bytes of
+extra RAM for the differentiation stencil. (The stencil is an ``int8``
+array, one byte longer than ``n``. This also explains, why the highest
+order is 9: the coefficients of a ninth-order stencil all fit in signed
+bytes, while 10 would require ``int16``.) Note that as usual in
+numerical differentiation (and also in ``numpy``), the length of the
+respective axis will be reduced by ``n`` after the operation. If ``n``
+is larger than, or equal to the length of the axis, an empty array will
+be returned.
+
+**WARNING**: the ``diff`` function does not implement the ``prepend``
+and ``append`` keywords that can be found in ``numpy``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8)
+    a[3] = 10
+    print('a:\n', a)
+    
+    print('\nfirst derivative:\n', np.diff(a, n=1))
+    print('\nsecond derivative:\n', np.diff(a, n=2))
+    
+    c = np.array([[1, 2, 3, 4], [4, 3, 2, 1], [1, 4, 9, 16], [0, 0, 0, 0]])
+    print('\nc:\n', c)
+    print('\nfirst derivative, first axis:\n', np.diff(c, axis=0))
+    print('\nfirst derivative, second axis:\n', np.diff(c, axis=1))
+
+.. parsed-literal::
+
+    a:
+     array([0, 1, 2, 10, 4, 5, 6, 7, 8], dtype=uint8)
+    
+    first derivative:
+     array([1, 1, 8, 250, 1, 1, 1, 1], dtype=uint8)
+    
+    second derivative:
+     array([0, 249, 14, 249, 0, 0, 0], dtype=uint8)
+    
+    c:
+     array([[1.0, 2.0, 3.0, 4.0],
+           [4.0, 3.0, 2.0, 1.0],
+           [1.0, 4.0, 9.0, 16.0],
+           [0.0, 0.0, 0.0, 0.0]], dtype=float64)
+    
+    first derivative, first axis:
+     array([[3.0, 1.0, -1.0, -3.0],
+           [-3.0, 1.0, 7.0, 15.0],
+           [-1.0, -4.0, -9.0, -16.0]], dtype=float64)
+    
+    first derivative, second axis:
+     array([[1.0, 1.0, 1.0],
+           [-1.0, -1.0, -1.0],
+           [3.0, 5.0, 7.0],
+           [0.0, 0.0, 0.0]], dtype=float64)
+    
+    
+
+
+dot
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html
+
+**WARNING:** numpy applies upcasting rules for the multiplication of
+matrices, while ``ulab`` simply returns a float matrix.
+
+Once you can invert a matrix, you might want to know, whether the
+inversion is correct. You can simply take the original matrix and its
+inverse, and multiply them by calling the ``dot`` function, which takes
+the two matrices as its arguments. If the matrix dimensions do not
+match, the function raises a ``ValueError``. The result of the
+multiplication is expected to be the unit matrix, which is demonstrated
+below.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    m = np.array([[1, 2, 3], [4, 5, 6], [7, 10, 9]], dtype=np.uint8)
+    n = np.linalg.inv(m)
+    print("m:\n", m)
+    print("\nm^-1:\n", n)
+    # this should be the unit matrix
+    print("\nm*m^-1:\n", np.dot(m, n))
+
+.. parsed-literal::
+
+    m:
+     array([[1, 2, 3],
+           [4, 5, 6],
+           [7, 10, 9]], dtype=uint8)
+    
+    m^-1:
+     array([[-1.25, 1.0, -0.25],
+           [0.4999999999999998, -1.0, 0.5],
+           [0.4166666666666668, 0.3333333333333333, -0.25]], dtype=float64)
+    
+    m*m^-1:
+     array([[1.0, 0.0, 0.0],
+           [4.440892098500626e-16, 1.0, 0.0],
+           [8.881784197001252e-16, 0.0, 1.0]], dtype=float64)
+    
+    
+
+
+Note that for matrix multiplication you don’t necessarily need square
+matrices, it is enough, if their dimensions are compatible (i.e., the
+the left-hand-side matrix has as many columns, as does the
+right-hand-side matrix rows):
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    m = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8)
+    n = np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.uint8)
+    print(m)
+    print(n)
+    print(np.dot(m, n))
+
+.. parsed-literal::
+
+    array([[1, 2, 3, 4],
+           [5, 6, 7, 8]], dtype=uint8)
+    array([[1, 2],
+           [3, 4],
+           [5, 6],
+           [7, 8]], dtype=uint8)
+    array([[50.0, 60.0],
+           [114.0, 140.0]], dtype=float64)
+    
+    
+
+
+equal
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.equal.html
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.not_equal.html
+
+In ``micropython``, equality of arrays or scalars can be established by
+utilising the ``==``, ``!=``, ``<``, ``>``, ``<=``, or ``=>`` binary
+operators. In ``circuitpython``, ``==`` and ``!=`` will produce
+unexpected results. In order to avoid this discrepancy, and to maintain
+compatibility with ``numpy``, ``ulab`` implements the ``equal`` and
+``not_equal`` operators that return the same results, irrespective of
+the ``python`` implementation.
+
+These two functions take two ``ndarray``\ s, or scalars as their
+arguments. No keyword arguments are implemented.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9))
+    b = np.zeros(9)
+    
+    print('a: ', a)
+    print('b: ', b)
+    print('\na == b: ', np.equal(a, b))
+    print('a != b: ', np.not_equal(a, b))
+    
+    # comparison with scalars
+    print('a == 2: ', np.equal(a, 2))
+
+.. parsed-literal::
+
+    a:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    b:  array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=float64)
+    
+    a == b:  array([True, False, False, False, False, False, False, False, False], dtype=bool)
+    a != b:  array([False, True, True, True, True, True, True, True, True], dtype=bool)
+    a == 2:  array([False, False, True, False, False, False, False, False, False], dtype=bool)
+    
+    
+
+
+flip
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.flip.html
+
+The ``flip`` function takes one positional, an ``ndarray``, and one
+keyword argument, ``axis = None``, and reverses the order of elements
+along the given axis. If the keyword argument is ``None``, the matrix’
+entries are flipped along all axes. ``flip`` returns a new copy of the
+array.
+
+If the firmware was compiled with complex support, the function can
+accept complex arrays.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5])
+    print("a: \t", a)
+    print("a flipped:\t", np.flip(a))
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)
+    print("\na flipped horizontally\n", np.flip(a, axis=1))
+    print("\na flipped vertically\n", np.flip(a, axis=0))
+    print("\na flipped horizontally+vertically\n", np.flip(a))
+
+.. parsed-literal::
+
+    a: 	 array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=float64)
+    a flipped:	 array([5.0, 4.0, 3.0, 2.0, 1.0], dtype=float64)
+    
+    a flipped horizontally
+     array([[3, 2, 1],
+           [6, 5, 4],
+           [9, 8, 7]], dtype=uint8)
+    
+    a flipped vertically
+     array([[7, 8, 9],
+           [4, 5, 6],
+           [1, 2, 3]], dtype=uint8)
+    
+    a flipped horizontally+vertically
+     array([9, 8, 7, 6, 5, 4, 3, 2, 1], dtype=uint8)
+    
+    
+
+
+imag
+----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.imag.html
+
+The ``imag`` function returns the imaginary part of an array, or scalar.
+It cannot accept a generic iterable as its argument. The function is
+defined only, if the firmware was compiled with complex support.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.uint16)
+    print("a:\t\t", a)
+    print("imag(a):\t", np.imag(a))
+    
+    b = np.array([1, 2+1j, 3-1j], dtype=np.complex)
+    print("\nb:\t\t", b)
+    print("imag(b):\t", np.imag(b))
+
+.. parsed-literal::
+
+    a:		 array([1, 2, 3], dtype=uint16)
+    imag(a):	 array([0, 0, 0], dtype=uint16)
+    
+    b:		 array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)
+    imag(b):	 array([0.0, 1.0, -1.0], dtype=float64)
+    
+    
+
+
+interp
+------
+
+``numpy``: https://docs.scipy.org/doc/numpy/numpy.interp
+
+The ``interp`` function returns the linearly interpolated values of a
+one-dimensional numerical array. It requires three positional
+arguments,\ ``x``, at which the interpolated values are evaluated,
+``xp``, the array of the independent data variable, and ``fp``, the
+array of the dependent values of the data. ``xp`` must be a
+monotonically increasing sequence of numbers.
+
+Two keyword arguments, ``left``, and ``right`` can also be supplied;
+these determine the return values, if ``x < xp[0]``, and ``x > xp[-1]``,
+respectively. If these arguments are not supplied, ``left``, and
+``right`` default to ``fp[0]``, and ``fp[-1]``, respectively.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.array([1, 2, 3, 4, 5]) - 0.2
+    xp = np.array([1, 2, 3, 4])
+    fp = np.array([1, 2, 3, 5])
+    
+    print(x)
+    print(np.interp(x, xp, fp))
+    print(np.interp(x, xp, fp, left=0.0))
+    print(np.interp(x, xp, fp, right=10.0))
+
+.. parsed-literal::
+
+    array([0.8, 1.8, 2.8, 3.8, 4.8], dtype=float64)
+    array([1.0, 1.8, 2.8, 4.6, 5.0], dtype=float64)
+    array([0.0, 1.8, 2.8, 4.6, 5.0], dtype=float64)
+    array([1.0, 1.8, 2.8, 4.6, 10.0], dtype=float64)
+    
+    
+
+
+isfinite
+--------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.isfinite.html
+
+Returns a Boolean array of the same shape as the input, or a
+``True/False``, if the input is a scalar. In the return value, all
+elements are ``True`` at positions, where the input value was finite.
+Integer types are automatically finite, therefore, if the input is of
+integer type, the output will be the ``True`` tensor.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print('isfinite(0): ', np.isfinite(0))
+    
+    a = np.array([1, 2, np.nan])
+    print('\n' + '='*20)
+    print('a:\n', a)
+    print('\nisfinite(a):\n', np.isfinite(a))
+    
+    b = np.array([1, 2, np.inf])
+    print('\n' + '='*20)
+    print('b:\n', b)
+    print('\nisfinite(b):\n', np.isfinite(b))
+    
+    c = np.array([1, 2, 3], dtype=np.uint16)
+    print('\n' + '='*20)
+    print('c:\n', c)
+    print('\nisfinite(c):\n', np.isfinite(c))
+
+.. parsed-literal::
+
+    isfinite(0):  True
+    
+    ====================
+    a:
+     array([1.0, 2.0, nan], dtype=float64)
+    
+    isfinite(a):
+     array([True, True, False], dtype=bool)
+    
+    ====================
+    b:
+     array([1.0, 2.0, inf], dtype=float64)
+    
+    isfinite(b):
+     array([True, True, False], dtype=bool)
+    
+    ====================
+    c:
+     array([1, 2, 3], dtype=uint16)
+    
+    isfinite(c):
+     array([True, True, True], dtype=bool)
+    
+    
+
+
+isinf
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.isinf.html
+
+Similar to `isfinite <#isfinite>`__, but the output is ``True`` at
+positions, where the input is infinite. Integer types return the
+``False`` tensor.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print('isinf(0): ', np.isinf(0))
+    
+    a = np.array([1, 2, np.nan])
+    print('\n' + '='*20)
+    print('a:\n', a)
+    print('\nisinf(a):\n', np.isinf(a))
+    
+    b = np.array([1, 2, np.inf])
+    print('\n' + '='*20)
+    print('b:\n', b)
+    print('\nisinf(b):\n', np.isinf(b))
+    
+    c = np.array([1, 2, 3], dtype=np.uint16)
+    print('\n' + '='*20)
+    print('c:\n', c)
+    print('\nisinf(c):\n', np.isinf(c))
+
+.. parsed-literal::
+
+    isinf(0):  False
+    
+    ====================
+    a:
+     array([1.0, 2.0, nan], dtype=float64)
+    
+    isinf(a):
+     array([False, False, False], dtype=bool)
+    
+    ====================
+    b:
+     array([1.0, 2.0, inf], dtype=float64)
+    
+    isinf(b):
+     array([False, False, True], dtype=bool)
+    
+    ====================
+    c:
+     array([1, 2, 3], dtype=uint16)
+    
+    isinf(c):
+     array([False, False, False], dtype=bool)
+    
+    
+
+
+left_shift
+----------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.left_shift.html
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.right_shift.html
+
+``left_shift``, and ``right_shift`` both take two integer-type
+``ndarray``\ s, and bit-wise shift the elements of the first array by an
+amount given by the second array to the left, and right, respectively.
+Broadcasting is supported. If the ``dtype`` of the input arrays is not
+an integer, and exception will be raised.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.ones(7, dtype=np.uint8)
+    b = np.zeros(7, dtype=np.uint8) + 255
+    c = np.array(range(7), dtype=np.uint8) + 1
+    
+    print('a: ', a)
+    print('b: ', b)
+    print('c: ', c)
+    print('\na left shifted by c:\n', np.left_shift(a, c))
+    print('\nb right shifted by c:\n', np.right_shift(b, c))
+
+.. parsed-literal::
+
+    a:  array([1, 1, 1, 1, 1, 1, 1], dtype=uint8)
+    b:  array([255, 255, 255, 255, 255, 255, 255], dtype=uint8)
+    c:  array([1, 2, 3, 4, 5, 6, 7], dtype=uint8)
+    
+    a left shifted by c:
+     array([2, 4, 8, 16, 32, 64, 128], dtype=uint8)
+    
+    b right shifted by c:
+     array([127, 63, 31, 15, 7, 3, 1], dtype=uint8)
+    
+    
+
+
+load
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html
+
+The function reads data from a file in ``numpy``\ ’s
+`platform-independent
+format <https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format>`__,
+and returns the generated array. If the endianness of the data in the
+file and the microcontroller differ, the bytes are automatically
+swapped.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.load('a.npy')
+    print(a)
+
+.. parsed-literal::
+
+    array([[0.0, 1.0, 2.0, 3.0, 4.0],
+           [5.0, 6.0, 7.0, 8.0, 9.0],
+           [10.0, 11.0, 12.0, 13.0, 14.0],
+           [15.0, 16.0, 17.0, 18.0, 19.0],
+           [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)
+    
+    
+
+
+loadtxt
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html
+
+The function reads data from a text file, and returns the generated
+array. It takes a file name as the single positional argument, and the
+following keyword arguments:
+
+1. ``comments='#'``
+2. ``dtype=float``
+3. ``delimiter=','``
+4. ``max_rows`` (with a default of all rows)
+5. ``skip_rows=0``
+6. ``usecols`` (with a default of all columns)
+
+If ``dtype`` is supplied and is not ``float``, the data entries will be
+converted to the appropriate integer type by rounding the values.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print('read all data')
+    print(np.loadtxt('loadtxt.dat'))
+    
+    print('\nread maximum 5 rows (first row is a comment line)')
+    print(np.loadtxt('loadtxt.dat', max_rows=5))
+    
+    print('\nread maximum 5 rows, convert dtype (first row is a comment line)')
+    print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))
+    
+    print('\nskip the first 3 rows, convert dtype (first row is a comment line)')
+    print(np.loadtxt('loadtxt.dat', skiprows=3, dtype=np.uint8))
+
+.. parsed-literal::
+
+    read all data
+    array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0],
+           [12.0, 13.0, 14.0, 15.0],
+           [16.0, 17.0, 18.0, 19.0],
+           [20.0, 21.0, 22.0, 23.0],
+           [24.0, 25.0, 26.0, 27.0],
+           [28.00000000000001, 29.0, 30.0, 31.0],
+           [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)
+    
+    read maximum 5 rows (first row is a comment line)
+    array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0],
+           [12.0, 13.0, 14.0, 15.0]], dtype=float64)
+    
+    read maximum 5 rows, convert dtype (first row is a comment line)
+    array([[0, 1, 2, 3],
+           [4, 5, 6, 7],
+           [8, 9, 10, 11],
+           [12, 13, 14, 15]], dtype=uint8)
+    
+    skip the first 3 rows, convert dtype (first row is a comment line)
+    array([[8, 9, 10, 11],
+           [12, 13, 14, 15],
+           [16, 17, 18, 19],
+           [20, 21, 22, 23],
+           [24, 25, 26, 27],
+           [28, 29, 30, 31],
+           [32, 33, 34, 35]], dtype=uint8)
+    
+    
+
+
+mean
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.mean.html
+
+If the axis keyword is not specified, it assumes the default value of
+``None``, and returns the result of the computation for the flattened
+array. Otherwise, the calculation is along the given axis.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    print('a: \n', a)
+    print('mean, flat: ', np.mean(a))
+    print('mean, horizontal: ', np.mean(a, axis=1))
+    print('mean, vertical: ', np.mean(a, axis=0))
+
+.. parsed-literal::
+
+    a: 
+     array([[1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0],
+           [7.0, 8.0, 9.0]], dtype=float64)
+    mean, flat:  5.0
+    mean, horizontal:  array([2.0, 5.0, 8.0], dtype=float64)
+    mean, vertical:  array([4.0, 5.0, 6.0], dtype=float64)
+    
+    
+
+
+max
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.max.html
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.min.html
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmin.html
+
+**WARNING:** Difference to ``numpy``: the ``out`` keyword argument is
+not implemented.
+
+These functions follow the same pattern, and work with generic
+iterables, and ``ndarray``\ s. ``min``, and ``max`` return the minimum
+or maximum of a sequence. If the input array is two-dimensional, the
+``axis`` keyword argument can be supplied, in which case the
+minimum/maximum along the given axis will be returned. If ``axis=None``
+(this is also the default value), the minimum/maximum of the flattened
+array will be determined.
+
+``argmin/argmax`` return the position (index) of the minimum/maximum in
+the sequence.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 0, 1, 10])
+    print('a:', a)
+    print('min of a:', np.min(a))
+    print('argmin of a:', np.argmin(a))
+    
+    b = np.array([[1, 2, 0], [1, 10, -1]])
+    print('\nb:\n', b)
+    print('min of b (flattened):', np.min(b))
+    print('min of b (axis=0):', np.min(b, axis=0))
+    print('min of b (axis=1):', np.min(b, axis=1))
+
+.. parsed-literal::
+
+    a: array([1.0, 2.0, 0.0, 1.0, 10.0], dtype=float64)
+    min of a: 0.0
+    argmin of a: 2
+    
+    b:
+     array([[1.0, 2.0, 0.0],
+           [1.0, 10.0, -1.0]], dtype=float64)
+    min of b (flattened): -1.0
+    min of b (axis=0): array([1.0, 2.0, -1.0], dtype=float64)
+    min of b (axis=1): array([0.0, -1.0], dtype=float64)
+    
+    
+
+
+median
+------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.median.html
+
+The function computes the median along the specified axis, and returns
+the median of the array elements. If the ``axis`` keyword argument is
+``None``, the arrays is flattened first. The ``dtype`` of the results is
+always float.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(12), dtype=np.int8).reshape((3, 4))
+    print('a:\n', a)
+    print('\nmedian of the flattened array: ', np.median(a))
+    print('\nmedian along the vertical axis: ', np.median(a, axis=0))
+    print('\nmedian along the horizontal axis: ', np.median(a, axis=1))
+
+.. parsed-literal::
+
+    a:
+     array([[0, 1, 2, 3],
+           [4, 5, 6, 7],
+           [8, 9, 10, 11]], dtype=int8)
+    
+    median of the flattened array:  5.5
+    
+    median along the vertical axis:  array([4.0, 5.0, 6.0, 7.0], dtype=float64)
+    
+    median along the horizontal axis:  array([1.5, 5.5, 9.5], dtype=float64)
+    
+    
+
+
+min
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.min.html
+
+See `numpy.max <#max>`__.
+
+minimum
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.minimum.html
+
+See `numpy.maximum <#maximum>`__
+
+maximum
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.maximum.html
+
+Returns the maximum of two arrays, or two scalars, or an array, and a
+scalar. If the arrays are of different ``dtype``, the output is upcast
+as in `Binary operators <#Binary-operators>`__. If both inputs are
+scalars, a scalar is returned. Only positional arguments are
+implemented.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)
+    b = np.array([5, 4, 3, 2, 1], dtype=np.float)
+    print('minimum of a, and b:')
+    print(np.minimum(a, b))
+    
+    print('\nmaximum of a, and b:')
+    print(np.maximum(a, b))
+    
+    print('\nmaximum of 1, and 5.5:')
+    print(np.maximum(1, 5.5))
+
+.. parsed-literal::
+
+    minimum of a, and b:
+    array([1.0, 2.0, 3.0, 2.0, 1.0], dtype=float64)
+    
+    maximum of a, and b:
+    array([5.0, 4.0, 3.0, 4.0, 5.0], dtype=float64)
+    
+    maximum of 1, and 5.5:
+    5.5
+    
+    
+
+
+nonzero
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.nonzero.html
+
+``nonzero`` returns the indices of the elements of an array that are not
+zero. If the number of dimensions of the array is larger than one, a
+tuple of arrays is returned, one for each dimension, containing the
+indices of the non-zero elements in that dimension.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9)) - 5
+    print('a:\n', a)
+    print(np.nonzero(a))
+    
+    a = a.reshape((3,3))
+    print('\na:\n', a)
+    print(np.nonzero(a))
+
+.. parsed-literal::
+
+    a:
+     array([-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=float64)
+    (array([0, 1, 2, 3, 4, 6, 7, 8], dtype=uint16),)
+    
+    a:
+     array([[-5.0, -4.0, -3.0],
+           [-2.0, -1.0, 0.0],
+           [1.0, 2.0, 3.0]], dtype=float64)
+    (array([0, 0, 0, 1, 1, 2, 2, 2], dtype=uint16), array([0, 1, 2, 0, 1, 0, 1, 2], dtype=uint16))
+    
+    
+
+
+not_equal
+---------
+
+See `numpy.equal <#equal>`__.
+
+polyfit
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyfit.html
+
+``polyfit`` takes two, or three arguments. The last one is the degree of
+the polynomial that will be fitted, the last but one is an array or
+iterable with the ``y`` (dependent) values, and the first one, an array
+or iterable with the ``x`` (independent) values, can be dropped. If that
+is the case, ``x`` will be generated in the function as
+``range(len(y))``.
+
+If the lengths of ``x``, and ``y`` are not the same, the function raises
+a ``ValueError``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.array([0, 1, 2, 3, 4, 5, 6])
+    y = np.array([9, 4, 1, 0, 1, 4, 9])
+    print('independent values:\t', x)
+    print('dependent values:\t', y)
+    print('fitted values:\t\t', np.polyfit(x, y, 2))
+    
+    # the same with missing x
+    print('\ndependent values:\t', y)
+    print('fitted values:\t\t', np.polyfit(y, 2))
+
+.. parsed-literal::
+
+    independent values:	 array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=float64)
+    dependent values:	 array([9.0, 4.0, 1.0, 0.0, 1.0, 4.0, 9.0], dtype=float64)
+    fitted values:		 array([1.0, -6.0, 9.000000000000004], dtype=float64)
+    
+    dependent values:	 array([9.0, 4.0, 1.0, 0.0, 1.0, 4.0, 9.0], dtype=float64)
+    fitted values:		 array([1.0, -6.0, 9.000000000000004], dtype=float64)
+    
+    
+
+
+Execution time
+~~~~~~~~~~~~~~
+
+``polyfit`` is based on the inversion of a matrix (there is more on the
+background in https://en.wikipedia.org/wiki/Polynomial_regression), and
+it requires the intermediate storage of ``2*N*(deg+1)`` floats, where
+``N`` is the number of entries in the input array, and ``deg`` is the
+fit’s degree. The additional computation costs of the matrix inversion
+discussed in `linalg.inv <#inv>`__ also apply. The example from above
+needs around 150 microseconds to return:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    @timeit
+    def time_polyfit(x, y, n):
+        return np.polyfit(x, y, n)
+    
+    x = np.array([0, 1, 2, 3, 4, 5, 6])
+    y = np.array([9, 4, 1, 0, 1, 4, 9])
+    
+    time_polyfit(x, y, 2)
+
+.. parsed-literal::
+
+    execution time:  153  us
+
+
+polyval
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyval.html
+
+``polyval`` takes two arguments, both arrays or generic ``micropython``
+iterables returning scalars.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    p = [1, 1, 1, 0]
+    x = [0, 1, 2, 3, 4]
+    print('coefficients: ', p)
+    print('independent values: ', x)
+    print('\nvalues of p(x): ', np.polyval(p, x))
+    
+    # the same works with one-dimensional ndarrays
+    a = np.array(x)
+    print('\nndarray (a): ', a)
+    print('value of p(a): ', np.polyval(p, a))
+
+.. parsed-literal::
+
+    coefficients:  [1, 1, 1, 0]
+    independent values:  [0, 1, 2, 3, 4]
+    
+    values of p(x):  array([0.0, 3.0, 14.0, 39.0, 84.0], dtype=float64)
+    
+    ndarray (a):  array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float64)
+    value of p(a):  array([0.0, 3.0, 14.0, 39.0, 84.0], dtype=float64)
+    
+    
+
+
+real
+----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.real.html
+
+The ``real`` function returns the real part of an array, or scalar. It
+cannot accept a generic iterable as its argument. The function is
+defined only, if the firmware was compiled with complex support.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.uint16)
+    print("a:\t\t", a)
+    print("real(a):\t", np.real(a))
+    
+    b = np.array([1, 2+1j, 3-1j], dtype=np.complex)
+    print("\nb:\t\t", b)
+    print("real(b):\t", np.real(b))
+
+.. parsed-literal::
+
+    a:		 array([1, 2, 3], dtype=uint16)
+    real(a):	 array([1, 2, 3], dtype=uint16)
+    
+    b:		 array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)
+    real(b):	 array([1.0, 2.0, 3.0], dtype=float64)
+    
+    
+
+
+roll
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.roll.html
+
+The roll function shifts the content of a vector by the positions given
+as the second argument. If the ``axis`` keyword is supplied, the shift
+is applied to the given axis.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5, 6, 7, 8])
+    print("a:\t\t\t", a)
+    
+    a = np.roll(a, 2)
+    print("a rolled to the left:\t", a)
+    
+    # this should be the original vector
+    a = np.roll(a, -2)
+    print("a rolled to the right:\t", a)
+
+.. parsed-literal::
+
+    a:			 array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    a rolled to the left:	 array([7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=float64)
+    a rolled to the right:	 array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    
+    
+
+
+Rolling works with matrices, too. If the ``axis`` keyword is 0, the
+matrix is rolled along its vertical axis, otherwise, horizontally.
+
+Horizontal rolls are faster, because they require fewer steps, and
+larger memory chunks are copied, however, they also require more RAM:
+basically the whole row must be stored internally. Most expensive are
+the ``None`` keyword values, because with ``axis = None``, the array is
+flattened first, hence the row’s length is the size of the whole matrix.
+
+Vertical rolls require two internal copies of single columns.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(12)).reshape((3, 4))
+    print("a:\n", a)
+    a = np.roll(a, 2, axis=0)
+    print("\na rolled up:\n", a)
+    
+    a = np.array(range(12)).reshape((3, 4))
+    print("a:\n", a)
+    a = np.roll(a, -1, axis=1)
+    print("\na rolled to the left:\n", a)
+    
+    a = np.array(range(12)).reshape((3, 4))
+    print("a:\n", a)
+    a = np.roll(a, 1, axis=None)
+    print("\na rolled with None:\n", a)
+
+.. parsed-literal::
+
+    a:
+     array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0]], dtype=float64)
+    
+    a rolled up:
+     array([[4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0],
+           [0.0, 1.0, 2.0, 3.0]], dtype=float64)
+    a:
+     array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0]], dtype=float64)
+    
+    a rolled to the left:
+     array([[1.0, 2.0, 3.0, 0.0],
+           [5.0, 6.0, 7.0, 4.0],
+           [9.0, 10.0, 11.0, 8.0]], dtype=float64)
+    a:
+     array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0]], dtype=float64)
+    
+    a rolled with None:
+     array([[11.0, 0.0, 1.0, 2.0],
+           [3.0, 4.0, 5.0, 6.0],
+           [7.0, 8.0, 9.0, 10.0]], dtype=float64)
+    
+    
+
+
+save
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html
+
+With the help of this function, numerical array can be saved in
+``numpy``\ ’s `platform-independent
+format <https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format>`__.
+
+The function takes two positional arguments, the name of the output
+file, and the array.
+
+.. code::
+
+    # code to be run in CPython
+    
+    a = np.array(range(25)).reshape((5, 5))
+    np.save('a.npy', a)
+savetxt
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.savetxt.html
+
+With the help of this function, numerical array can be saved in a text
+file. The function takes two positional arguments, the name of the
+output file, and the array, and also implements the ``comments='#'``
+``delimiter=' '``, the ``header=''``, and ``footer=''`` keyword
+arguments. The input is treated as of type ``float``, i.e., the output
+is always in the floating point representation.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(12), dtype=np.uint8).reshape((3, 4))
+    np.savetxt('savetxt.dat', a)
+    
+    with open('savetxt.dat', 'r') as fin:
+        print(fin.read())
+        
+    np.savetxt('savetxt.dat', a, 
+               comments='!', 
+               delimiter=';', 
+               header='col1;col2;col3;col4', 
+               footer='saved data')
+    
+    with open('savetxt.dat', 'r') as fin:
+        print(fin.read())
+
+.. parsed-literal::
+
+    0.000000000000000 1.000000000000000 2.000000000000000 3.000000000000000
+    4.000000000000000 5.000000000000000 6.000000000000000 7.000000000000000
+    8.000000000000000 9.000000000000000 10.000000000000000 11.000000000000000
+    
+    !col1;col2;col3;col4
+    0.000000000000000;1.000000000000000;2.000000000000000;3.000000000000000
+    4.000000000000000;5.000000000000000;6.000000000000000;7.000000000000000
+    8.000000000000000;9.000000000000000;10.000000000000000;11.000000000000000
+    !saved data
+    
+    
+    
+
+
+size
+----
+
+The function takes a single positional argument, and an optional keyword
+argument, ``axis``, with a default value of ``None``, and returns the
+size of an array along that axis. If ``axis`` is ``None``, the total
+length of the array (the product of the elements of its shape) is
+returned.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.ones((2, 3))
+    
+    print(a)
+    print('size(a, axis=0): ', np.size(a, axis=0))
+    print('size(a, axis=1): ', np.size(a, axis=1))
+    print('size(a, axis=None): ', np.size(a, axis=None))
+
+.. parsed-literal::
+
+    array([[1.0, 1.0, 1.0],
+           [1.0, 1.0, 1.0]], dtype=float64)
+    size(a, axis=0):  2
+    size(a, axis=1):  3
+    size(a, axis=None):  6
+    
+    
+
+
+sort
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.sort.html
+
+The sort function takes an ndarray, and sorts its elements in ascending
+order along the specified axis using a heap sort algorithm. As opposed
+to the ``.sort()`` method discussed earlier, this function creates a
+copy of its input before sorting, and at the end, returns this copy.
+Sorting takes place in place, without auxiliary storage. The ``axis``
+keyword argument takes on the possible values of -1 (the last axis, in
+``ulab`` equivalent to the second axis, and this also happens to be the
+default value), 0, 1, or ``None``. The first three cases are identical
+to those in `diff <#diff>`__, while the last one flattens the array
+before sorting.
+
+If descending order is required, the result can simply be ``flip``\ ped,
+see `flip <#flip>`__.
+
+**WARNING:** ``numpy`` defines the ``kind``, and ``order`` keyword
+arguments that are not implemented here. The function in ``ulab`` always
+uses heap sort, and since ``ulab`` does not have the concept of data
+fields, the ``order`` keyword argument would have no meaning.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.float)
+    print('\na:\n', a)
+    b = np.sort(a, axis=0)
+    print('\na sorted along vertical axis:\n', b)
+    
+    c = np.sort(a, axis=1)
+    print('\na sorted along horizontal axis:\n', c)
+    
+    c = np.sort(a, axis=None)
+    print('\nflattened a sorted:\n', c)
+
+.. parsed-literal::
+
+    
+    a:
+     array([[1.0, 12.0, 3.0, 0.0],
+           [5.0, 3.0, 4.0, 1.0],
+           [9.0, 11.0, 1.0, 8.0],
+           [7.0, 10.0, 0.0, 1.0]], dtype=float64)
+    
+    a sorted along vertical axis:
+     array([[1.0, 3.0, 0.0, 0.0],
+           [5.0, 10.0, 1.0, 1.0],
+           [7.0, 11.0, 3.0, 1.0],
+           [9.0, 12.0, 4.0, 8.0]], dtype=float64)
+    
+    a sorted along horizontal axis:
+     array([[0.0, 1.0, 3.0, 12.0],
+           [1.0, 3.0, 4.0, 5.0],
+           [1.0, 8.0, 9.0, 11.0],
+           [0.0, 1.0, 7.0, 10.0]], dtype=float64)
+    
+    flattened a sorted:
+     array([0.0, 0.0, 1.0, ..., 10.0, 11.0, 12.0], dtype=float64)
+    
+    
+
+
+Heap sort requires :math:`\sim N\log N` operations, and notably, the
+worst case costs only 20% more time than the average. In order to get an
+order-of-magnitude estimate, we will take the sine of 1000 uniformly
+spaced numbers between 0, and two pi, and sort them:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    @timeit
+    def sort_time(array):
+        return nup.sort(array)
+    
+    b = np.sin(np.linspace(0, 6.28, num=1000))
+    print('b: ', b)
+    sort_time(b)
+    print('\nb sorted:\n', b)
+sort_complex
+------------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.sort_complex.html
+
+If the firmware was compiled with complex support, the functions sorts
+the input array first according to its real part, and then the imaginary
+part. The input must be a one-dimensional array. The output is always of
+``dtype`` complex, even if the input was real integer.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([5, 4, 3, 2, 1], dtype=np.int16)
+    print('a:\t\t\t', a)
+    print('sort_complex(a):\t', np.sort_complex(a))
+    print()
+    
+    b = np.array([5, 4+3j, 4-2j, 0, 1j], dtype=np.complex)
+    print('b:\t\t\t', b)
+    print('sort_complex(b):\t', np.sort_complex(b))
+
+.. parsed-literal::
+
+    a:			 array([5, 4, 3, 2, 1], dtype=int16)
+    sort_complex(a):	 array([1.0+0.0j, 2.0+0.0j, 3.0+0.0j, 4.0+0.0j, 5.0+0.0j], dtype=complex)
+    
+    b:			 array([5.0+0.0j, 4.0+3.0j, 4.0-2.0j, 0.0+0.0j, 0.0+1.0j], dtype=complex)
+    sort_complex(b):	 array([0.0+0.0j, 0.0+1.0j, 4.0-2.0j, 4.0+3.0j, 5.0+0.0j], dtype=complex)
+    
+    
+
+
+std
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html
+
+If the axis keyword is not specified, it assumes the default value of
+``None``, and returns the result of the computation for the flattened
+array. Otherwise, the calculation is along the given axis.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    print('a: \n', a)
+    print('sum, flat array: ', np.std(a))
+    print('std, vertical: ', np.std(a, axis=0))
+    print('std, horizonal: ', np.std(a, axis=1))
+
+.. parsed-literal::
+
+    a: 
+     array([[1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0],
+           [7.0, 8.0, 9.0]], dtype=float64)
+    sum, flat array:  2.581988897471611
+    std, vertical:  array([2.449489742783178, 2.449489742783178, 2.449489742783178], dtype=float64)
+    std, horizonal:  array([0.8164965809277261, 0.8164965809277261, 0.8164965809277261], dtype=float64)
+    
+    
+
+
+sum
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html
+
+If the axis keyword is not specified, it assumes the default value of
+``None``, and returns the result of the computation for the flattened
+array. Otherwise, the calculation is along the given axis.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    print('a: \n', a)
+    
+    print('sum, flat array: ', np.sum(a))
+    print('sum, horizontal: ', np.sum(a, axis=1))
+    print('std, vertical: ', np.sum(a, axis=0))
+
+.. parsed-literal::
+
+    a: 
+     array([[1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0],
+           [7.0, 8.0, 9.0]], dtype=float64)
+    sum, flat array:  45.0
+    sum, horizontal:  array([6.0, 15.0, 24.0], dtype=float64)
+    std, vertical:  array([12.0, 15.0, 18.0], dtype=float64)
+    
+    
+
+
+take
+----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.take.html
+
+The ``take`` method takes elements from an array along an axis. The
+function accepts two positional arguments, the array, and the indices,
+which is either a ``python`` iterable, or a one-dimensional ``ndarray``,
+as well as three keyword arguments, the ``axis``, which can be ``None``,
+or an integer, ``out``, which can be ``None``, or an ``ndarray`` with
+the proper dimensions, and ``mode``, which can be one of the strings
+``raise``, ``wrap``, or ``clip``. This last argument determines how
+out-of-bounds indices will be treated. The default value is ``raise``,
+which raises an exception. ``wrap`` takes the indices modulo the length
+of the ``axis``, while ``clip`` pegs the values at the 0, and the length
+of the ``axis``. If ``axis`` is ``None``, then ``take`` operates on the
+flattened array.
+
+The function can be regarded as a method of advanced slicing: as opposed
+to standard slicing, where the indices are distributed uniformly and in
+either increasing or decreasing order, ``take`` can take indices in an
+arbitrary order.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(12)).reshape((3, 4))
+    print('\na:', a)
+    
+    print('\nslices taken along first axis')
+    print(np.take(a, (0, 2, 2, 1), axis=0))
+    
+    print('\nslices taken along second axis')
+    print(np.take(a, (0, 2, 2, 1), axis=1))
+
+.. parsed-literal::
+
+    
+    a: array([[0.0, 1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0, 7.0],
+           [8.0, 9.0, 10.0, 11.0]], dtype=float64)
+    
+    slices taken along first axis
+    array([[0.0, 1.0, 2.0, 3.0],
+           [8.0, 9.0, 10.0, 11.0],
+           [8.0, 9.0, 10.0, 11.0],
+           [4.0, 5.0, 6.0, 7.0]], dtype=float64)
+    
+    slices taken along second axis
+    array([[0.0, 2.0, 2.0, 1.0],
+           [2.0, 3.0, 4.0, 5.0],
+           [6.0, 7.0, 8.0, 9.0]], dtype=float64)
+    
+    
+
+
+trace
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.trace.html
+
+The ``trace`` function returns the sum of the diagonal elements of a
+square matrix. If the input argument is not a square matrix, an
+exception will be raised.
+
+The scalar so returned will inherit the type of the input array, i.e.,
+integer arrays have integer trace, and floating point arrays a floating
+point trace.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]], dtype=np.int8)
+    print('a: ', a)
+    print('\ntrace of a: ', np.trace(a))
+    
+    b = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]], dtype=np.float)
+    
+    print('='*20 + '\nb: ', b)
+    print('\ntrace of b: ', np.trace(b))
+
+.. parsed-literal::
+
+    a:  array([[25, 15, -5],
+           [15, 18, 0],
+           [-5, 0, 11]], dtype=int8)
+    
+    trace of a:  54
+    ====================
+    b:  array([[25.0, 15.0, -5.0],
+           [15.0, 18.0, 0.0],
+           [-5.0, 0.0, 11.0]], dtype=float64)
+    
+    trace of b:  54.0
+    
+    
+
+
+trapz
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.trapz.html
+
+The function takes one or two one-dimensional ``ndarray``\ s, and
+integrates the dependent values (``y``) using the trapezoidal rule. If
+the independent variable (``x``) is given, that is taken as the sample
+points corresponding to ``y``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    x = np.linspace(0, 9, num=10)
+    y = x*x
+    
+    print('x: ',  x)
+    print('y: ',  y)
+    print('============================')
+    print('integral of y: ', np.trapz(y))
+    print('integral of y at x: ', np.trapz(y, x=x))
+
+.. parsed-literal::
+
+    x:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], dtype=float64)
+    y:  array([0.0, 1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0, 81.0], dtype=float64)
+    ============================
+    integral of y:  244.5
+    integral of y at x:  244.5
+    
+    
+
+
+where
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.where.html
+
+The function takes three positional arguments, ``condition``, ``x``, and
+``y``, and returns a new ``ndarray``, whose values are taken from either
+``x``, or ``y``, depending on the truthness of ``condition``. The three
+arguments are broadcast together, and the function raises a
+``ValueError`` exception, if broadcasting is not possible.
+
+The function is implemented for ``ndarray``\ s only: other iterable
+types can be passed after casting them to an ``ndarray`` by calling the
+``array`` constructor.
+
+If the ``dtype``\ s of ``x``, and ``y`` differ, the output is upcast as
+discussed earlier.
+
+Note that the ``condition`` is expanded into an Boolean ``ndarray``.
+This means that the storage required to hold the condition should be
+taken into account, whenever the function is called.
+
+The following example returns an ``ndarray`` of length 4, with 1 at
+positions, where ``condition`` is smaller than 3, and with -1 otherwise.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    
+    from ulab import numpy as np
+    
+    condition = np.array([1, 2, 3, 4], dtype=np.uint8)
+    print(np.where(condition < 3, 1, -1))
+
+.. parsed-literal::
+
+    array([1, 1, -1, -1], dtype=int16)
+    
+    
+
+
+The next snippet shows, how values from two arrays can be fed into the
+output:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    
+    from ulab import numpy as np
+    
+    condition = np.array([1, 2, 3, 4], dtype=np.uint8)
+    x = np.array([11, 22, 33, 44], dtype=np.uint8)
+    y = np.array([1, 2, 3, 4], dtype=np.uint8)
+    print(np.where(condition < 3, x, y))
+
+.. parsed-literal::
+
+    array([11, 22, 3, 4], dtype=uint8)
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/numpy-linalg.rst b/tulip/shared/ulab/docs/manual/source/numpy-linalg.rst
new file mode 100644
index 000000000..8439f337f
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/numpy-linalg.rst
@@ -0,0 +1,386 @@
+
+numpy.linalg
+============
+
+Functions in the ``linalg`` module can be called by prepending them by
+``numpy.linalg.``. The module defines the following seven functions:
+
+1. `numpy.linalg.cholesky <#cholesky>`__
+2. `numpy.linalg.det <#det>`__
+3. `numpy.linalg.eig <#eig>`__
+4. `numpy.linalg.inv <#inv>`__
+5. `numpy.linalg.norm <#norm>`__
+6. `numpy.linalg.qr <#qr>`__
+
+cholesky
+--------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.linalg.cholesky.html
+
+The function of the Cholesky decomposition takes a positive definite,
+symmetric square matrix as its single argument, and returns the *square
+root matrix* in the lower triangular form. If the input argument does
+not fulfill the positivity or symmetry condition, a ``ValueError`` is
+raised.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]])
+    print('a: ', a)
+    print('\n' + '='*20 + '\nCholesky decomposition\n', np.linalg.cholesky(a))
+
+.. parsed-literal::
+
+    a:  array([[25.0, 15.0, -5.0],
+    	 [15.0, 18.0, 0.0],
+    	 [-5.0, 0.0, 11.0]], dtype=float)
+    
+    ====================
+    Cholesky decomposition
+     array([[5.0, 0.0, 0.0],
+    	 [3.0, 3.0, 0.0],
+    	 [-1.0, 1.0, 3.0]], dtype=float)
+    
+    
+
+
+det
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.det.html
+
+The ``det`` function takes a square matrix as its single argument, and
+calculates the determinant. The calculation is based on successive
+elimination of the matrix elements, and the return value is a float,
+even if the input array was of integer type.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2], [3, 4]], dtype=np.uint8)
+    print(np.linalg.det(a))
+
+.. parsed-literal::
+
+    -2.0
+    
+
+
+Benchmark
+~~~~~~~~~
+
+Since the routine for calculating the determinant is pretty much the
+same as for finding the `inverse of a matrix <#inv>`__, the execution
+times are similar:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    @timeit
+    def matrix_det(m):
+        return np.linalg.inv(m)
+    
+    m = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [0, 5, 6, 4, 5, 6, 4, 5], 
+                  [0, 0, 9, 7, 8, 9, 7, 8], [0, 0, 0, 10, 11, 12, 11, 12], 
+                 [0, 0, 0, 0, 4, 6, 7, 8], [0, 0, 0, 0, 0, 5, 6, 7], 
+                 [0, 0, 0, 0, 0, 0, 7, 6], [0, 0, 0, 0, 0, 0, 0, 2]])
+    
+    matrix_det(m)
+
+.. parsed-literal::
+
+    execution time:  294  us
+    
+
+
+eig
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html
+
+The ``eig`` function calculates the eigenvalues and the eigenvectors of
+a real, symmetric square matrix. If the matrix is not symmetric, a
+``ValueError`` will be raised. The function takes a single argument, and
+returns a tuple with the eigenvalues, and eigenvectors. With the help of
+the eigenvectors, amongst other things, you can implement sophisticated
+stabilisation routines for robots.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 1, 4], [2, 5, 3, 5], [1, 3, 6, 1], [4, 5, 1, 7]], dtype=np.uint8)
+    x, y = np.linalg.eig(a)
+    print('eigenvectors of a:\n', y)
+    print('\neigenvalues of a:\n', x)
+
+.. parsed-literal::
+
+    eigenvectors of a:
+     array([[0.8151560042509081, -0.4499411232970823, -0.1644660242574522, 0.3256141906686505],
+           [0.2211334179893007, 0.7846992598235538, 0.08372081379922657, 0.5730077734355189],
+           [-0.1340114162071679, -0.3100776411558949, 0.8742786816656, 0.3486109343758527],
+           [-0.5183258053659028, -0.292663481927148, -0.4489749870391468, 0.6664142156731531]], dtype=float)
+    
+    eigenvalues of a:
+     array([-1.165288365404889, 0.8029365530314914, 5.585625756072663, 13.77672605630074], dtype=float)
+    
+    
+
+
+The same matrix diagonalised with ``numpy`` yields:
+
+.. code::
+
+    # code to be run in CPython
+    
+    a = array([[1, 2, 1, 4], [2, 5, 3, 5], [1, 3, 6, 1], [4, 5, 1, 7]], dtype=np.uint8)
+    x, y = eig(a)
+    print('eigenvectors of a:\n', y)
+    print('\neigenvalues of a:\n', x)
+
+.. parsed-literal::
+
+    eigenvectors of a:
+     [[ 0.32561419  0.815156    0.44994112 -0.16446602]
+     [ 0.57300777  0.22113342 -0.78469926  0.08372081]
+     [ 0.34861093 -0.13401142  0.31007764  0.87427868]
+     [ 0.66641421 -0.51832581  0.29266348 -0.44897499]]
+    
+    eigenvalues of a:
+     [13.77672606 -1.16528837  0.80293655  5.58562576]
+
+
+When comparing results, we should keep two things in mind:
+
+1. the eigenvalues and eigenvectors are not necessarily sorted in the
+   same way
+2. an eigenvector can be multiplied by an arbitrary non-zero scalar, and
+   it is still an eigenvector with the same eigenvalue. This is why all
+   signs of the eigenvector belonging to 5.58, and 0.80 are flipped in
+   ``ulab`` with respect to ``numpy``. This difference, however, is of
+   absolutely no consequence.
+
+Computation expenses
+~~~~~~~~~~~~~~~~~~~~
+
+Since the function is based on `Givens
+rotations <https://en.wikipedia.org/wiki/Givens_rotation>`__ and runs
+till convergence is achieved, or till the maximum number of allowed
+rotations is exhausted, there is no universal estimate for the time
+required to find the eigenvalues. However, an order of magnitude can, at
+least, be guessed based on the measurement below:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    @timeit
+    def matrix_eig(a):
+        return np.linalg.eig(a)
+    
+    a = np.array([[1, 2, 1, 4], [2, 5, 3, 5], [1, 3, 6, 1], [4, 5, 1, 7]], dtype=np.uint8)
+    
+    matrix_eig(a)
+
+.. parsed-literal::
+
+    execution time:  111  us
+    
+
+
+inv
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.linalg.inv.html
+
+A square matrix, provided that it is not singular, can be inverted by
+calling the ``inv`` function that takes a single argument. The inversion
+is based on successive elimination of elements in the lower left
+triangle, and raises a ``ValueError`` exception, if the matrix turns out
+to be singular (i.e., one of the diagonal entries is zero).
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    m = np.array([[1, 2, 3, 4], [4, 5, 6, 4], [7, 8.6, 9, 4], [3, 4, 5, 6]])
+    
+    print(np.linalg.inv(m))
+
+.. parsed-literal::
+
+    array([[-2.166666666666667, 1.500000000000001, -0.8333333333333337, 1.0],
+           [1.666666666666667, -3.333333333333335, 1.666666666666668, -0.0],
+           [0.1666666666666666, 2.166666666666668, -0.8333333333333337, -1.0],
+           [-0.1666666666666667, -0.3333333333333333, 0.0, 0.5]], dtype=float64)
+    
+    
+
+
+Computation expenses
+~~~~~~~~~~~~~~~~~~~~
+
+Note that the cost of inverting a matrix is approximately twice as many
+floats (RAM), as the number of entries in the original matrix, and
+approximately as many operations, as the number of entries. Here are a
+couple of numbers:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    @timeit
+    def invert_matrix(m):
+        return np.linalg.inv(m)
+    
+    m = np.array([[1, 2,], [4, 5]])
+    print('2 by 2 matrix:')
+    invert_matrix(m)
+    
+    m = np.array([[1, 2, 3, 4], [4, 5, 6, 4], [7, 8.6, 9, 4], [3, 4, 5, 6]])
+    print('\n4 by 4 matrix:')
+    invert_matrix(m)
+    
+    m = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [0, 5, 6, 4, 5, 6, 4, 5], 
+                  [0, 0, 9, 7, 8, 9, 7, 8], [0, 0, 0, 10, 11, 12, 11, 12], 
+                 [0, 0, 0, 0, 4, 6, 7, 8], [0, 0, 0, 0, 0, 5, 6, 7], 
+                 [0, 0, 0, 0, 0, 0, 7, 6], [0, 0, 0, 0, 0, 0, 0, 2]])
+    print('\n8 by 8 matrix:')
+    invert_matrix(m)
+
+.. parsed-literal::
+
+    2 by 2 matrix:
+    execution time:  65  us
+    
+    4 by 4 matrix:
+    execution time:  105  us
+    
+    8 by 8 matrix:
+    execution time:  299  us
+    
+
+
+The above-mentioned scaling is not obeyed strictly. The reason for the
+discrepancy is that the function call is still the same for all three
+cases: the input must be inspected, the output array must be created,
+and so on.
+
+norm
+----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html
+
+The function takes a vector or matrix without options, and returns its
+2-norm, i.e., the square root of the sum of the square of the elements.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5])
+    b = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    
+    print('norm of a:', np.linalg.norm(a))
+    print('norm of b:', np.linalg.norm(b))
+
+.. parsed-literal::
+
+    norm of a: 7.416198487095663
+    norm of b: 16.88194301613414
+    
+    
+
+
+qr
+--
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.linalg.qr.html
+
+The function computes the QR decomposition of a matrix ``m`` of
+dimensions ``(M, N)``, i.e., it returns two such matrices, ``q``\ ’, and
+``r``, that ``m = qr``, where ``q`` is orthonormal, and ``r`` is upper
+triangular. In addition to the input matrix, which is the first
+positional argument, the function accepts the ``mode`` keyword argument
+with a default value of ``reduced``. If ``mode`` is ``reduced``, ``q``,
+and ``r`` are returned in the reduced representation. Otherwise, the
+outputs will have dimensions ``(M, M)``, and ``(M, N)``, respectively.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    A = np.arange(6).reshape((3, 2))
+    print('A: \n', A)
+    
+    print('complete decomposition')
+    q, r = np.linalg.qr(A, mode='complete')
+    print('q: \n', q)
+    print()
+    print('r: \n', r)
+    
+    print('\n\nreduced decomposition')
+    q, r = np.linalg.qr(A, mode='reduced')
+    print('q: \n', q)
+    print()
+    print('r: \n', r)
+
+.. parsed-literal::
+
+    A: 
+     array([[0, 1],
+           [2, 3],
+           [4, 5]], dtype=int16)
+    complete decomposition
+    q: 
+     array([[0.0, -0.9128709291752768, 0.408248290463863],
+           [-0.447213595499958, -0.3651483716701107, -0.8164965809277261],
+           [-0.8944271909999159, 0.1825741858350553, 0.408248290463863]], dtype=float64)
+    
+    r: 
+     array([[-4.47213595499958, -5.813776741499454],
+           [0.0, -1.095445115010332],
+           [0.0, 0.0]], dtype=float64)
+    
+    
+    reduced decomposition
+    q: 
+     array([[0.0, -0.9128709291752768],
+           [-0.447213595499958, -0.3651483716701107],
+           [-0.8944271909999159, 0.1825741858350553]], dtype=float64)
+    
+    r: 
+     array([[-4.47213595499958, -5.813776741499454],
+           [0.0, -1.095445115010332]], dtype=float64)
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/numpy-random.rst b/tulip/shared/ulab/docs/manual/source/numpy-random.rst
new file mode 100644
index 000000000..2a3a8ca93
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/numpy-random.rst
@@ -0,0 +1,183 @@
+
+numpy.random
+============
+
+Random numbers drawn specific distributions can be generated by
+instantiating a ``Generator`` object, and calling its methods. The
+module defines the following three functions:
+
+1. `numpy.random.Generator.normal <#normal>`__
+2. `numpy.random.Generator.random <#random>`__
+3. `numpy.random.Generator.uniform <#uniform>`__
+
+The ``Generator`` object, when instantiated, takes a single integer as
+its argument. This integer is the seed, which will be fed to the 32-bit
+or 64-bit routine. More details can be found under
+https://www.pcg-random.org/index.html. The generator is a standard
+``python`` object that keeps track of its state.
+
+``numpy``: https://numpy.org/doc/stable/reference/random/index.html
+
+normal
+------
+
+A random set of number from the ``normal`` distribution can be generated
+by calling the generator’s ``normal`` method. The method takes three
+optional arguments, ``loc=0.0``, the centre of the distribution,
+``scale=1.0``, the width of the distribution, and ``size=None``, a tuple
+containing the shape of the returned array. In case ``size`` is
+``None``, a single floating point number is returned.
+
+The ``normal`` method of the ``Generator`` object is based on the
+`Box-Muller
+transform <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>`__.
+
+``numpy``:
+https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.normal.html
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    rng = np.random.Generator(123456)
+    print(rng)
+    
+    # return single number from a distribution of scale 1, and location 0
+    print(rng.normal())
+    
+    print(rng.normal(loc=20.0, scale=10.0, size=(3,3)))
+    # same as above, with positional arguments
+    print(rng.normal(20.0, 10.0, (3,3)))
+
+.. parsed-literal::
+
+    Gnerator() at 0x7fa9dae05340
+    -6.285246229407202
+    array([[24.95816273705659, 15.2670302229426, 14.81001577336041],
+           [20.17589833056986, 23.14539083787544, 26.37772041367461],
+           [41.94894234387275, 37.11027030608206, 25.65889562100477]], dtype=float64)
+    array([[21.52562779033434, 12.74685887865834, 24.08404670765186],
+           [4.728112596365396, 7.667757906857082, 21.61576094228444],
+           [2.432338873595267, 27.75945683572574, 5.730827584659245]], dtype=float64)
+    
+    
+
+
+random
+------
+
+A random set of number from the uniform distribution in the interval [0,
+1] can be generated by calling the generator’s ``random`` method. The
+method takes two optional arguments, ``size=None``, a tuple containing
+the shape of the returned array, and ``out``. In case ``size`` is
+``None``, a single floating point number is returned.
+
+``out`` can be used, if a floating point array is available. An
+exception will be raised, if the array is not of ``float`` ``dtype``, or
+if both ``size`` and ``out`` are supplied, and there is a conflict in
+their shapes.
+
+If ``size`` is ``None``, a single floating point number will be
+returned.
+
+``numpy``:
+https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.random.html
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    rng = np.random.Generator(123456)
+    print(rng)
+    
+    # returning new objects
+    print(rng.random())
+    print('\n', rng.random(size=(3,3)))
+    
+    # supplying a buffer
+    a = np.array(range(9), dtype=np.float).reshape((3,3))
+    print('\nbuffer array before:\n', a)
+    rng.random(out=a)
+    print('\nbuffer array after:\n', a)
+
+.. parsed-literal::
+
+    Gnerator() at 0x7f299de05340
+    6.384615058863119e-11
+    
+     array([[0.4348157846574171, 0.7906325931024071, 0.878697619856133],
+           [0.8738606263361598, 0.4946080034142021, 0.7765890156101152],
+           [0.1770783715717074, 0.02080447648492112, 0.1053837559005948]], dtype=float64)
+    
+    buffer array before:
+     array([[0.0, 1.0, 2.0],
+           [3.0, 4.0, 5.0],
+           [6.0, 7.0, 8.0]], dtype=float64)
+    
+    buffer array after:
+     array([[0.8508024287393201, 0.9848489829156055, 0.7598167589604003],
+           [0.782995698302952, 0.2866337782847831, 0.7915884498022229],
+           [0.4614071706315902, 0.4792657443088592, 0.1581582066230718]], dtype=float64)
+    
+    
+
+
+uniform
+-------
+
+``uniform`` is similar to ``random``, except that the interval over
+which the numbers are distributed can be specified, while the ``out``
+argument cannot. In addition to ``size`` specifying the shape of the
+output, ``low=0.0``, and ``high=1.0`` are accepted arguments. With the
+indicated defaults, ``uniform`` is identical to ``random``, which can be
+seen from the fact that the first 3-by-3 tensor below is the same as the
+one produced by ``rng.random(size=(3,3))`` above.
+
+If ``size`` is ``None``, a single floating point number will be
+returned.
+
+``numpy``:
+https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.uniform.html
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    rng = np.random.Generator(123456)
+    print(rng)
+    
+    print(rng.uniform())
+    # returning numbers between 0, and 1
+    print('\n', rng.uniform(size=(3,3)))
+    
+    # returning numbers between 10, and 20
+    print('\n', rng.uniform(low=10, high=20, size=(3,3)))
+    
+    # same as above, without the keywords
+    print('\n', rng.uniform(10, 20, (3,3)))
+
+.. parsed-literal::
+
+    Gnerator() at 0x7f1891205340
+    6.384615058863119e-11
+    
+     array([[0.4348157846574171, 0.7906325931024071, 0.878697619856133],
+           [0.8738606263361598, 0.4946080034142021, 0.7765890156101152],
+           [0.1770783715717074, 0.02080447648492112, 0.1053837559005948]], dtype=float64)
+    
+     array([[18.5080242873932, 19.84848982915605, 17.598167589604],
+           [17.82995698302952, 12.86633778284783, 17.91588449802223],
+           [14.6140717063159, 14.79265744308859, 11.58158206623072]], dtype=float64)
+    
+     array([[14.3380400319162, 12.72487657409978, 15.77119643621117],
+           [13.61835831436355, 18.96062889255558, 15.78847796795966],
+           [12.59435855187034, 17.68262037443622, 14.77943040598734]], dtype=float64)
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/numpy-universal.rst b/tulip/shared/ulab/docs/manual/source/numpy-universal.rst
new file mode 100644
index 000000000..f39ee173a
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/numpy-universal.rst
@@ -0,0 +1,510 @@
+
+Universal functions
+===================
+
+Standard mathematical functions can be calculated on any scalar,
+scalar-valued iterable (ranges, lists, tuples containing numbers), and
+on ``ndarray``\ s without having to change the call signature. In all
+cases the functions return a new ``ndarray`` of typecode ``float``
+(since these functions usually generate float values, anyway). The only
+exceptions to this rule are the ``exp``, and ``sqrt`` functions, which,
+if ``ULAB_SUPPORTS_COMPLEX`` is set to 1 in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__,
+can return complex arrays, depending on the argument. All functions
+execute faster with ``ndarray`` arguments than with iterables, because
+the values of the input vector can be extracted faster.
+
+At present, the following functions are supported (starred functions can
+operate on, or can return complex arrays):
+
+``acos``, ``acosh``, ``arctan2``, ``around``, ``asin``, ``asinh``,
+``atan``, ``arctan2``, ``atanh``, ``ceil``, ``cos``, ``degrees``,
+``exp*``, ``expm1``, ``floor``, ``log``, ``log10``, ``log2``,
+``radians``, ``sin``, ``sinc``, ``sinh``, ``sqrt*``, ``tan``, ``tanh``.
+
+These functions are applied element-wise to the arguments, thus, e.g.,
+the exponential of a matrix cannot be calculated in this way, only the
+exponential of the matrix entries.
+
+In order to avoid repeated memory allocations, functions can take the
+``out=None`` optional argument, which must be a floating point
+``ndarray`` of the same size as the input ``array``. If these conditions
+are not fulfilled, and exception will be raised. If ``out=None``, a new
+array will be created upon each invocation of the function.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = range(9)
+    b = np.array(a)
+    
+    # works with ranges, lists, tuples etc.
+    print('a:\t', a)
+    print('exp(a):\t', np.exp(a))
+    
+    # with 1D arrays
+    print('\n=============\nb:\n', b)
+    print('exp(b):\n', np.exp(b))
+    
+    # as well as with matrices
+    c = np.array(range(9)).reshape((3, 3))
+    print('\n=============\nc:\n', c)
+    print('exp(c):\n', np.exp(c))
+    
+    # using the `out` argument
+    d = np.array(range(9)).reshape((3, 3))
+    
+    print('\nd before invoking the function:\n', d)
+    np.exp(c, out=d)
+    print('\nd afteri nvoking the function:\n', d)
+
+.. parsed-literal::
+
+    a:	 range(0, 9)
+    exp(a):	 array([1.0, 2.718281828459045, 7.38905609893065, 20.08553692318767, 54.59815003314424, 148.4131591025766, 403.4287934927351, 1096.633158428459, 2980.957987041728], dtype=float64)
+    
+    =============
+    b:
+     array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    exp(b):
+     array([1.0, 2.718281828459045, 7.38905609893065, 20.08553692318767, 54.59815003314424, 148.4131591025766, 403.4287934927351, 1096.633158428459, 2980.957987041728], dtype=float64)
+    
+    =============
+    c:
+     array([[0.0, 1.0, 2.0],
+           [3.0, 4.0, 5.0],
+           [6.0, 7.0, 8.0]], dtype=float64)
+    exp(c):
+     array([[1.0, 2.718281828459045, 7.38905609893065],
+           [20.08553692318767, 54.59815003314424, 148.4131591025766],
+           [403.4287934927351, 1096.633158428459, 2980.957987041728]], dtype=float64)
+    
+    d before invoking the function:
+     array([[0.0, 1.0, 2.0],
+           [3.0, 4.0, 5.0],
+           [6.0, 7.0, 8.0]], dtype=float64)
+    
+    d afteri nvoking the function:
+     array([[1.0, 2.718281828459045, 7.38905609893065],
+           [20.08553692318767, 54.59815003314424, 148.4131591025766],
+           [403.4287934927351, 1096.633158428459, 2980.957987041728]], dtype=float64)
+    
+    
+
+
+Computation expenses
+--------------------
+
+The overhead for calculating with micropython iterables is quite
+significant: for the 1000 samples below, the difference is more than 800
+microseconds, because internally the function has to create the
+``ndarray`` for the output, has to fetch the iterable’s items of unknown
+type, and then convert them to floats. All these steps are skipped for
+``ndarray``\ s, because these pieces of information are already known.
+
+Doing the same with ``list`` comprehension requires 30 times more time
+than with the ``ndarray``, which would become even more, if we converted
+the resulting list to an ``ndarray``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    import math
+    
+    a = [0]*1000
+    b = np.array(a)
+    
+    @timeit
+    def timed_vector(iterable):
+        return np.exp(iterable)
+    
+    @timeit
+    def timed_list(iterable):
+        return [math.exp(i) for i in iterable]
+    
+    print('iterating over ndarray in ulab')
+    timed_vector(b)
+    
+    print('\niterating over list in ulab')
+    timed_vector(a)
+    
+    print('\niterating over list in python')
+    timed_list(a)
+
+.. parsed-literal::
+
+    iterating over ndarray in ulab
+    execution time:  441  us
+    
+    iterating over list in ulab
+    execution time:  1266  us
+    
+    iterating over list in python
+    execution time:  11379  us
+    
+
+
+arctan2
+-------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.arctan2.html
+
+The two-argument inverse tangent function is also part of the ``vector``
+sub-module. The function implements broadcasting as discussed in the
+section on ``ndarray``\ s. Scalars (``micropython`` integers or floats)
+are also allowed.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2.2, 33.33, 444.444])
+    print('a:\n', a)
+    print('\narctan2(a, 1.0)\n', np.arctan2(a, 1.0))
+    print('\narctan2(1.0, a)\n', np.arctan2(1.0, a))
+    print('\narctan2(a, a): \n', np.arctan2(a, a))
+
+.. parsed-literal::
+
+    a:
+     array([1.0, 2.2, 33.33, 444.444], dtype=float64)
+    
+    arctan2(a, 1.0)
+     array([0.7853981633974483, 1.14416883366802, 1.5408023243361, 1.568546328341769], dtype=float64)
+    
+    arctan2(1.0, a)
+     array([0.7853981633974483, 0.426627493126876, 0.02999400245879636, 0.002249998453127392], dtype=float64)
+    
+    arctan2(a, a): 
+     array([0.7853981633974483, 0.7853981633974483, 0.7853981633974483, 0.7853981633974483], dtype=float64)
+    
+    
+
+
+around
+------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.around.html
+
+``numpy``\ ’s ``around`` function can also be found in the ``vector``
+sub-module. The function implements the ``decimals`` keyword argument
+with default value ``0``. The first argument must be an ``ndarray``. If
+this is not the case, the function raises a ``TypeError`` exception.
+Note that ``numpy`` accepts general iterables. The ``out`` keyword
+argument known from ``numpy`` is not accepted. The function always
+returns an ndarray of type ``mp_float_t``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2.2, 33.33, 444.444])
+    print('a:\t\t', a)
+    print('\ndecimals = 0\t', np.around(a, decimals=0))
+    print('\ndecimals = 1\t', np.around(a, decimals=1))
+    print('\ndecimals = -1\t', np.around(a, decimals=-1))
+
+.. parsed-literal::
+
+    a:		 array([1.0, 2.2, 33.33, 444.444], dtype=float64)
+    
+    decimals = 0	 array([1.0, 2.0, 33.0, 444.0], dtype=float64)
+    
+    decimals = 1	 array([1.0, 2.2, 33.3, 444.4], dtype=float64)
+    
+    decimals = -1	 array([0.0, 0.0, 30.0, 440.0], dtype=float64)
+    
+    
+
+
+exp
+---
+
+If ``ULAB_SUPPORTS_COMPLEX`` is set to 1 in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__,
+the exponential function can also take complex arrays as its argument,
+in which case the return value is also complex.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3])
+    print('a:\t\t', a)
+    print('exp(a):\t\t', np.exp(a))
+    print()
+    
+    b = np.array([1+1j, 2+2j, 3+3j], dtype=np.complex)
+    print('b:\t\t', b)
+    print('exp(b):\t\t', np.exp(b))
+
+.. parsed-literal::
+
+    a:		 array([1.0, 2.0, 3.0], dtype=float64)
+    exp(a):		 array([2.718281828459045, 7.38905609893065, 20.08553692318767], dtype=float64)
+    
+    b:		 array([1.0+1.0j, 2.0+2.0j, 3.0+3.0j], dtype=complex)
+    exp(b):		 array([1.468693939915885+2.287355287178842j, -3.074932320639359+6.71884969742825j, -19.88453084414699+2.834471132487004j], dtype=complex)
+    
+    
+
+
+sqrt
+----
+
+If ``ULAB_SUPPORTS_COMPLEX`` is set to 1 in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__,
+the exponential function can also take complex arrays as its argument,
+in which case the return value is also complex. If the input is real,
+but the results might be complex, the user is supposed to specify the
+output ``dtype`` in the function call. Otherwise, the square roots of
+negative numbers will result in ``NaN``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, -1])
+    print('a:\t\t', a)
+    print('sqrt(a):\t\t', np.sqrt(a))
+    print('sqrt(a):\t\t', np.sqrt(a, dtype=np.complex))
+
+.. parsed-literal::
+
+    a:		 array([1.0, -1.0], dtype=float64)
+    sqrt(a):		 array([1.0, nan], dtype=float64)
+    sqrt(a):		 array([1.0+0.0j, 0.0+1.0j], dtype=complex)
+    
+    
+
+
+Vectorising generic python functions
+------------------------------------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.vectorize.html
+
+The examples above use factory functions. In fact, they are nothing but
+the vectorised versions of the standard mathematical functions.
+User-defined ``python`` functions can also be vectorised by help of
+``vectorize``. This function takes a positional argument, namely, the
+``python`` function that you want to vectorise, and a non-mandatory
+keyword argument, ``otypes``, which determines the ``dtype`` of the
+output array. The ``otypes`` must be ``None`` (default), or any of the
+``dtypes`` defined in ``ulab``. With ``None``, the output is
+automatically turned into a float array.
+
+The return value of ``vectorize`` is a ``micropython`` object that can
+be called as a standard function, but which now accepts either a scalar,
+an ``ndarray``, or a generic ``micropython`` iterable as its sole
+argument. Note that the function that is to be vectorised must have a
+single argument.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    def f(x):
+        return x*x
+    
+    vf = np.vectorize(f)
+    
+    # calling with a scalar
+    print('{:20}'.format('f on a scalar: '), vf(44.0))
+    
+    # calling with an ndarray
+    a = np.array([1, 2, 3, 4])
+    print('{:20}'.format('f on an ndarray: '), vf(a))
+    
+    # calling with a list
+    print('{:20}'.format('f on a list: '), vf([2, 3, 4]))
+
+.. parsed-literal::
+
+    f on a scalar:       array([1936.0], dtype=float64)
+    f on an ndarray:     array([1.0, 4.0, 9.0, 16.0], dtype=float64)
+    f on a list:         array([4.0, 9.0, 16.0], dtype=float64)
+    
+    
+
+
+As mentioned, the ``dtype`` of the resulting ``ndarray`` can be
+specified via the ``otypes`` keyword. The value is bound to the function
+object that ``vectorize`` returns, therefore, if the same function is to
+be vectorised with different output types, then for each type a new
+function object must be created.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    l = [1, 2, 3, 4]
+    def f(x):
+        return x*x
+    
+    vf1 = np.vectorize(f, otypes=np.uint8)
+    vf2 = np.vectorize(f, otypes=np.float)
+    
+    print('{:20}'.format('output is uint8: '), vf1(l))
+    print('{:20}'.format('output is float: '), vf2(l))
+
+.. parsed-literal::
+
+    output is uint8:     array([1, 4, 9, 16], dtype=uint8)
+    output is float:     array([1.0, 4.0, 9.0, 16.0], dtype=float64)
+    
+    
+
+
+The ``otypes`` keyword argument cannot be used for type coercion: if the
+function evaluates to a float, but ``otypes`` would dictate an integer
+type, an exception will be raised:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    int_list = [1, 2, 3, 4]
+    float_list = [1.0, 2.0, 3.0, 4.0]
+    def f(x):
+        return x*x
+    
+    vf = np.vectorize(f, otypes=np.uint8)
+    
+    print('{:20}'.format('integer list: '), vf(int_list))
+    # this will raise a TypeError exception
+    print(vf(float_list))
+
+.. parsed-literal::
+
+    integer list:        array([1, 4, 9, 16], dtype=uint8)
+    
+    Traceback (most recent call last):
+      File "/dev/shm/micropython.py", line 14, in <module>
+    TypeError: can't convert float to int
+    
+
+
+Benchmarks
+~~~~~~~~~~
+
+It should be pointed out that the ``vectorize`` function produces the
+pseudo-vectorised version of the ``python`` function that is fed into
+it, i.e., on the C level, the same ``python`` function is called, with
+the all-encompassing ``mp_obj_t`` type arguments, and all that happens
+is that the ``for`` loop in ``[f(i) for i in iterable]`` runs purely in
+C. Since type checking and type conversion in ``f()`` is expensive, the
+speed-up is not so spectacular as when iterating over an ``ndarray``
+with a factory function: a gain of approximately 30% can be expected,
+when a native ``python`` type (e.g., ``list``) is returned by the
+function, and this becomes around 50% (a factor of 2), if conversion to
+an ``ndarray`` is also counted.
+
+The following code snippet calculates the square of a 1000 numbers with
+the vectorised function (which returns an ``ndarray``), with ``list``
+comprehension, and with ``list`` comprehension followed by conversion to
+an ``ndarray``. For comparison, the execution time is measured also for
+the case, when the square is calculated entirely in ``ulab``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    def f(x):
+        return x*x
+    
+    vf = np.vectorize(f)
+    
+    @timeit
+    def timed_vectorised_square(iterable):
+        return vf(iterable)
+    
+    @timeit
+    def timed_python_square(iterable):
+        return [f(i) for i in iterable]
+    
+    @timeit
+    def timed_ndarray_square(iterable):
+        return np.array([f(i) for i in iterable])
+    
+    @timeit
+    def timed_ulab_square(ndarray):
+        return ndarray**2
+    
+    print('vectorised function')
+    squares = timed_vectorised_square(range(1000))
+    
+    print('\nlist comprehension')
+    squares = timed_python_square(range(1000))
+    
+    print('\nlist comprehension + ndarray conversion')
+    squares = timed_ndarray_square(range(1000))
+    
+    print('\nsquaring an ndarray entirely in ulab')
+    a = np.array(range(1000))
+    squares = timed_ulab_square(a)
+
+.. parsed-literal::
+
+    vectorised function
+    execution time:  7237  us
+    
+    list comprehension
+    execution time:  10248  us
+    
+    list comprehension + ndarray conversion
+    execution time:  12562  us
+    
+    squaring an ndarray entirely in ulab
+    execution time:  560  us
+    
+
+
+From the comparisons above, it is obvious that ``python`` functions
+should only be vectorised, when the same effect cannot be gotten in
+``ulab`` only. However, although the time savings are not significant,
+there is still a good reason for caring about vectorised functions.
+Namely, user-defined ``python`` functions become universal, i.e., they
+can accept generic iterables as well as ``ndarray``\ s as their
+arguments. A vectorised function is still a one-liner, resulting in
+transparent and elegant code.
+
+A final comment on this subject: the ``f(x)`` that we defined is a
+*generic* ``python`` function. This means that it is not required that
+it just crunches some numbers. It has to return a number object, but it
+can still access the hardware in the meantime. So, e.g.,
+
+.. code:: python
+
+
+   led = pyb.LED(2)
+
+   def f(x):
+       if x < 100:
+           led.toggle()
+       return x*x
+
+is perfectly valid code.
diff --git a/tulip/shared/ulab/docs/manual/source/scipy-linalg.rst b/tulip/shared/ulab/docs/manual/source/scipy-linalg.rst
new file mode 100644
index 000000000..525968207
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/scipy-linalg.rst
@@ -0,0 +1,151 @@
+
+scipy.linalg
+============
+
+``scipy``\ ’s ``linalg`` module contains two functions,
+``solve_triangular``, and ``cho_solve``. The functions can be called by
+prepending them by ``scipy.linalg.``.
+
+1. `scipy.linalg.solve_cho <#cho_solve>`__
+2. `scipy.linalg.solve_triangular <#solve_triangular>`__
+
+cho_solve
+---------
+
+``scipy``:
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.cho_solve.html
+
+Solve the linear equations
+
+:raw-latex:`\begin{equation}
+\mathbf{A}\cdot\mathbf{x} = \mathbf{b}
+\end{equation}`
+
+given the Cholesky factorization of :math:`\mathbf{A}`. As opposed to
+``scipy``, the function simply takes the Cholesky-factorised matrix,
+:math:`\mathbf{A}`, and :math:`\mathbf{b}` as inputs.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    A = np.array([[3, 0, 0, 0], [2, 1, 0, 0], [1, 0, 1, 0], [1, 2, 1, 8]])
+    b = np.array([4, 2, 4, 2])
+    
+    print(spy.linalg.cho_solve(A, b))
+
+.. parsed-literal::
+
+    array([-0.01388888888888906, -0.6458333333333331, 2.677083333333333, -0.01041666666666667], dtype=float64)
+    
+    
+
+
+solve_triangular
+----------------
+
+``scipy``:
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.solve_triangular.html
+
+Solve the linear equation
+
+:raw-latex:`\begin{equation}
+\mathbf{a}\cdot\mathbf{x} = \mathbf{b}
+\end{equation}`
+
+with the assumption that :math:`\mathbf{a}` is a triangular matrix. The
+two position arguments are :math:`\mathbf{a}`, and :math:`\mathbf{b}`,
+and the optional keyword argument is ``lower`` with a default value of
+``False``. ``lower`` determines, whether data are taken from the lower,
+or upper triangle of :math:`\mathbf{a}`.
+
+Note that :math:`\mathbf{a}` itself does not have to be a triangular
+matrix: if it is not, then the values are simply taken to be 0 in the
+upper or lower triangle, as dictated by ``lower``. However,
+:math:`\mathbf{a}\cdot\mathbf{x}` will yield :math:`\mathbf{b}` only,
+when :math:`\mathbf{a}` is triangular. You should keep this in mind,
+when trying to establish the validity of the solution by back
+substitution.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    a = np.array([[3, 0, 0, 0], [2, 1, 0, 0], [1, 0, 1, 0], [1, 2, 1, 8]])
+    b = np.array([4, 2, 4, 2])
+    
+    print('a:\n')
+    print(a)
+    print('\nb: ', b)
+    
+    x = spy.linalg.solve_triangular(a, b, lower=True)
+    
+    print('='*20)
+    print('x: ', x)
+    print('\ndot(a, x): ', np.dot(a, x))
+
+.. parsed-literal::
+
+    a:
+    
+    array([[3.0, 0.0, 0.0, 0.0],
+           [2.0, 1.0, 0.0, 0.0],
+           [1.0, 0.0, 1.0, 0.0],
+           [1.0, 2.0, 1.0, 8.0]], dtype=float64)
+    
+    b:  array([4.0, 2.0, 4.0, 2.0], dtype=float64)
+    ====================
+    x:  array([1.333333333333333, -0.6666666666666665, 2.666666666666667, -0.08333333333333337], dtype=float64)
+    
+    dot(a, x):  array([4.0, 2.0, 4.0, 2.0], dtype=float64)
+    
+    
+
+
+With get the same solution, :math:`\mathbf{x}`, with the following
+matrix, but the dot product of :math:`\mathbf{a}`, and
+:math:`\mathbf{x}` is no longer :math:`\mathbf{b}`:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    a = np.array([[3, 2, 1, 0], [2, 1, 0, 1], [1, 0, 1, 4], [1, 2, 1, 8]])
+    b = np.array([4, 2, 4, 2])
+    
+    print('a:\n')
+    print(a)
+    print('\nb: ', b)
+    
+    x = spy.linalg.solve_triangular(a, b, lower=True)
+    
+    print('='*20)
+    print('x: ', x)
+    print('\ndot(a, x): ', np.dot(a, x))
+
+.. parsed-literal::
+
+    a:
+    
+    array([[3.0, 2.0, 1.0, 0.0],
+           [2.0, 1.0, 0.0, 1.0],
+           [1.0, 0.0, 1.0, 4.0],
+           [1.0, 2.0, 1.0, 8.0]], dtype=float64)
+    
+    b:  array([4.0, 2.0, 4.0, 2.0], dtype=float64)
+    ====================
+    x:  array([1.333333333333333, -0.6666666666666665, 2.666666666666667, -0.08333333333333337], dtype=float64)
+    
+    dot(a, x):  array([5.333333333333334, 1.916666666666666, 3.666666666666667, 2.0], dtype=float64)
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/scipy-optimize.rst b/tulip/shared/ulab/docs/manual/source/scipy-optimize.rst
new file mode 100644
index 000000000..63d60ddac
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/scipy-optimize.rst
@@ -0,0 +1,173 @@
+
+scipy.optimize
+==============
+
+Functions in the ``optimize`` module can be called by prepending them by
+``scipy.optimize.``. The module defines the following three functions:
+
+1. `scipy.optimize.bisect <#bisect>`__
+2. `scipy.optimize.fmin <#fmin>`__
+3. `scipy.optimize.newton <#newton>`__
+
+Note that routines that work with user-defined functions still have to
+call the underlying ``python`` code, and therefore, gains in speed are
+not as significant as with other vectorised operations. As a rule of
+thumb, a factor of two can be expected, when compared to an optimised
+``python`` implementation.
+
+bisect
+------
+
+``scipy``:
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.bisect.html
+
+``bisect`` finds the root of a function of one variable using a simple
+bisection routine. It takes three positional arguments, the function
+itself, and two starting points. The function must have opposite signs
+at the starting points. Returned is the position of the root.
+
+Two keyword arguments, ``xtol``, and ``maxiter`` can be supplied to
+control the accuracy, and the number of bisections, respectively.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import scipy as spy
+        
+    def f(x):
+        return x*x - 1
+    
+    print(spy.optimize.bisect(f, 0, 4))
+    
+    print('only 8 bisections: ',  spy.optimize.bisect(f, 0, 4, maxiter=8))
+    
+    print('with 0.1 accuracy: ',  spy.optimize.bisect(f, 0, 4, xtol=0.1))
+
+.. parsed-literal::
+
+    0.9999997615814209
+    only 8 bisections:  0.984375
+    with 0.1 accuracy:  0.9375
+    
+    
+
+
+Performance
+~~~~~~~~~~~
+
+Since the ``bisect`` routine calls user-defined ``python`` functions,
+the speed gain is only about a factor of two, if compared to a purely
+``python`` implementation.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import scipy as spy
+    
+    def f(x):
+        return (x-1)*(x-1) - 2.0
+    
+    def bisect(f, a, b, xtol=2.4e-7, maxiter=100):
+        if f(a) * f(b) > 0:
+            raise ValueError
+    
+        rtb = a if f(a) < 0.0 else b
+        dx = b - a if f(a) < 0.0 else a - b
+        for i in range(maxiter):
+            dx *= 0.5
+            x_mid = rtb + dx
+            mid_value = f(x_mid)
+            if mid_value < 0:
+                rtb = x_mid
+            if abs(dx) < xtol:
+                break
+    
+        return rtb
+    
+    @timeit
+    def bisect_scipy(f, a, b):
+        return spy.optimize.bisect(f, a, b)
+    
+    @timeit
+    def bisect_timed(f, a, b):
+        return bisect(f, a, b)
+    
+    print('bisect running in python')
+    bisect_timed(f, 3, 2)
+    
+    print('bisect running in C')
+    bisect_scipy(f, 3, 2)
+
+.. parsed-literal::
+
+    bisect running in python
+    execution time:  1270  us
+    bisect running in C
+    execution time:  642  us
+    
+
+
+fmin
+----
+
+``scipy``:
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin.html
+
+The ``fmin`` function finds the position of the minimum of a
+user-defined function by using the downhill simplex method. Requires two
+positional arguments, the function, and the initial value. Three keyword
+arguments, ``xatol``, ``fatol``, and ``maxiter`` stipulate conditions
+for stopping.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import scipy as spy
+    
+    def f(x):
+        return (x-1)**2 - 1
+    
+    print(spy.optimize.fmin(f, 3.0))
+    print(spy.optimize.fmin(f, 3.0, xatol=0.1))
+
+.. parsed-literal::
+
+    0.9996093749999952
+    1.199999999999996
+    
+    
+
+
+newton
+------
+
+``scipy``:https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.newton.html
+
+``newton`` finds a zero of a real, user-defined function using the
+Newton-Raphson (or secant or Halley’s) method. The routine requires two
+positional arguments, the function, and the initial value. Three keyword
+arguments can be supplied to control the iteration. These are the
+absolute and relative tolerances ``tol``, and ``rtol``, respectively,
+and the number of iterations before stopping, ``maxiter``. The function
+retuns a single scalar, the position of the root.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import scipy as spy
+        
+    def f(x):
+        return x*x*x - 2.0
+    
+    print(spy.optimize.newton(f, 3., tol=0.001, rtol=0.01))
+
+.. parsed-literal::
+
+    1.260135727246117
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/scipy-signal.rst b/tulip/shared/ulab/docs/manual/source/scipy-signal.rst
new file mode 100644
index 000000000..d1f34818a
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/scipy-signal.rst
@@ -0,0 +1,69 @@
+
+scipy.signal
+============
+
+This module defines the single function:
+
+1. `scipy.signal.sosfilt <#sosfilt>`__
+
+sosfilt
+-------
+
+``scipy``:
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sosfilt.html
+
+Filter data along one dimension using cascaded second-order sections.
+
+The function takes two positional arguments, ``sos``, the filter
+segments of length 6, and the one-dimensional, uniformly sampled data
+set to be filtered. Returns the filtered data, or the filtered data and
+the final filter delays, if the ``zi`` keyword arguments is supplied.
+The keyword argument must be a float ``ndarray`` of shape
+``(n_sections, 2)``. If ``zi`` is not passed to the function, the
+initial values are assumed to be 0.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    sos = [[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]]
+    y = spy.signal.sosfilt(sos, x)
+    print('y: ', y)
+
+.. parsed-literal::
+
+    y:  array([0.0, 1.0, -4.0, 24.0, -104.0, 440.0, -1728.0, 6532.000000000001, -23848.0, 84864.0], dtype=float)
+    
+    
+
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    sos = [[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]]
+    # initial conditions of the filter
+    zi = np.array([[1, 2], [3, 4]])
+    
+    y, zf = spy.signal.sosfilt(sos, x, zi=zi)
+    print('y: ', y)
+    print('\n' + '='*40 + '\nzf: ', zf)
+
+.. parsed-literal::
+
+    y:  array([4.0, -16.0, 63.00000000000001, -227.0, 802.9999999999999, -2751.0, 9271.000000000001, -30775.0, 101067.0, -328991.0000000001], dtype=float)
+    
+    ========================================
+    zf:  array([[37242.0, 74835.0],
+    	 [1026187.0, 1936542.0]], dtype=float)
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/scipy-special.rst b/tulip/shared/ulab/docs/manual/source/scipy-special.rst
new file mode 100644
index 000000000..755a5359b
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/scipy-special.rst
@@ -0,0 +1,44 @@
+
+scipy.special
+=============
+
+``scipy``\ ’s ``special`` module defines several functions that behave
+as do the standard mathematical functions of the ``numpy``, i.e., they
+can be called on any scalar, scalar-valued iterable (ranges, lists,
+tuples containing numbers), and on ``ndarray``\ s without having to
+change the call signature. In all cases the functions return a new
+``ndarray`` of typecode ``float`` (since these functions usually
+generate float values, anyway).
+
+At present, ``ulab``\ ’s ``special`` module contains the following
+functions:
+
+``erf``, ``erfc``, ``gamma``, and ``gammaln``, and they can be called by
+prepending them by ``scipy.special.``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    a = range(9)
+    b = np.array(a)
+    
+    print('a: ', a)
+    print(spy.special.erf(a))
+    
+    print('\nb: ', b)
+    print(spy.special.erfc(b))
+
+.. parsed-literal::
+
+    a:  range(0, 9)
+    array([0.0, 0.8427007929497149, 0.9953222650189527, 0.9999779095030014, 0.9999999845827421, 1.0, 1.0, 1.0, 1.0], dtype=float64)
+    
+    b:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    array([1.0, 0.1572992070502851, 0.004677734981047265, 2.209049699858544e-05, 1.541725790028002e-08, 1.537459794428035e-12, 2.151973671249892e-17, 4.183825607779414e-23, 1.122429717298293e-29], dtype=float64)
+    
+    
+
diff --git a/tulip/shared/ulab/docs/manual/source/ulab-intro.rst b/tulip/shared/ulab/docs/manual/source/ulab-intro.rst
new file mode 100644
index 000000000..42e5b260c
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/ulab-intro.rst
@@ -0,0 +1,625 @@
+
+Introduction
+============
+
+Enter ulab
+----------
+
+``ulab`` is a ``numpy``-like module for ``micropython`` and its
+derivatives, meant to simplify and speed up common mathematical
+operations on arrays. ``ulab`` implements a small subset of ``numpy``
+and ``scipy``, as well as a number of functions manipulating byte
+arrays. The functions were chosen such that they might be useful in the
+context of a microcontroller. However, the project is a living one, and
+suggestions for new features are always welcome.
+
+This document discusses how you can use the library, starting from
+building your own firmware, through questions like what affects the
+firmware size, what are the trade-offs, and what are the most important
+differences to ``numpy`` and ``scipy``, respectively. The document is
+organised as follows:
+
+The chapter after this one helps you with firmware customisation.
+
+The third chapter gives a very concise summary of the ``ulab`` functions
+and array methods. This chapter can be used as a quick reference.
+
+The chapters after that are an in-depth review of most functions. Here
+you can find usage examples, benchmarks, as well as a thorough
+discussion of such concepts as broadcasting, and views versus copies.
+
+The final chapter of this book can be regarded as the programming
+manual. The inner working of ``ulab`` is dissected here, and you will
+also find hints as to how to implement your own ``numpy``-compatible
+functions.
+
+Purpose
+-------
+
+Of course, the first question that one has to answer is, why on Earth
+one would need a fast math library on a microcontroller. After all, it
+is not expected that heavy number crunching is going to take place on
+bare metal. It is not meant to. On a PC, the main reason for writing
+fast code is the sheer amount of data that one wants to process. On a
+microcontroller, the data volume is probably small, but it might lead to
+catastrophic system failure, if these data are not processed in time,
+because the microcontroller is supposed to interact with the outside
+world in a timely fashion. In fact, this latter objective was the
+initiator of this project: I needed the Fourier transform of a signal
+coming from the ADC of the ``pyboard``, and all available options were
+simply too slow.
+
+In addition to speed, another issue that one has to keep in mind when
+working with embedded systems is the amount of available RAM: I believe,
+everything here could be implemented in pure ``python`` with relatively
+little effort (in fact, there are a couple of ``python``-only
+implementations of ``numpy`` functions out there), but the price we
+would have to pay for that is not only speed, but RAM, too. ``python``
+code, if is not frozen, and compiled into the firmware, has to be
+compiled at runtime, which is not exactly a cheap process. On top of
+that, if numbers are stored in a list or tuple, which would be the
+high-level container, then they occupy 8 bytes, no matter, whether they
+are all smaller than 100, or larger than one hundred million. This is
+obviously a waste of resources in an environment, where resources are
+scarce.
+
+Finally, there is a reason for using ``micropython`` in the first place.
+Namely, that a microcontroller can be programmed in a very elegant, and
+*pythonic* way. But if it is so, why should we not extend this idea to
+other tasks and concepts that might come up in this context? If there
+was no other reason than this *elegance*, I would find that convincing
+enough.
+
+Based on the above-mentioned considerations, all functions in ``ulab``
+are implemented in a way that
+
+1. conforms to ``numpy`` as much as possible
+2. is so frugal with RAM as possible,
+3. and yet, fast. Much faster than pure python. Think of speed-ups of
+   30-50!
+
+The main points of ``ulab`` are
+
+-  compact, iterable and slicable containers of numerical data in one to
+   four dimensions. These containers support all the relevant unary and
+   binary operators (e.g., ``len``, ==, +, \*, etc.)
+-  vectorised computations on ``micropython`` iterables and numerical
+   arrays (in ``numpy``-speak, universal functions)
+-  computing statistical properties (mean, standard deviation etc.) on
+   arrays
+-  basic linear algebra routines (matrix inversion, multiplication,
+   reshaping, transposition, determinant, and eigenvalues, Cholesky
+   decomposition and so on)
+-  polynomial fits to numerical data, and evaluation of polynomials
+-  fast Fourier transforms
+-  filtering of data (convolution and second-order filters)
+-  function minimisation, fitting, and numerical approximation routines
+-  interfacing between numerical data and peripheral hardware devices
+
+``ulab`` implements close to a hundred functions and array methods. At
+the time of writing this manual (for version 4.0.0), the library adds
+approximately 120 kB of extra compiled code to the ``micropython``
+(pyboard.v.1.17) firmware. However, if you are tight with flash space,
+you can easily shave tens of kB off the firmware. In fact, if only a
+small sub-set of functions are needed, you can get away with less than
+10 kB of flash space. See the section on `customising
+ulab <#Customising-the-firmware>`__.
+
+Resources and legal matters
+---------------------------
+
+The source code of the module can be found under
+https://github.com/v923z/micropython-ulab/tree/master/code. while the
+source of this user manual is under
+https://github.com/v923z/micropython-ulab/tree/master/docs.
+
+The MIT licence applies to all material.
+
+Friendly request
+----------------
+
+If you use ``ulab``, and bump into a bug, or think that a particular
+function is missing, or its behaviour does not conform to ``numpy``,
+please, raise a `ulab
+issue <#https://github.com/v923z/micropython-ulab/issues>`__ on github,
+so that the community can profit from your experiences.
+
+Even better, if you find the project to be useful, and think that it
+could be made better, faster, tighter, and shinier, please, consider
+contributing, and issue a pull request with the implementation of your
+improvements and new features. ``ulab`` can only become successful, if
+it offers what the community needs.
+
+These last comments apply to the documentation, too. If, in your
+opinion, the documentation is obscure, misleading, or not detailed
+enough, please, let us know, so that *we* can fix it.
+
+Differences between micropython-ulab and circuitpython-ulab
+-----------------------------------------------------------
+
+``ulab`` has originally been developed for ``micropython``, but has
+since been integrated into a number of its flavours. Most of these are
+simply forks of ``micropython`` itself, with some additional
+functionality. One of the notable exceptions is ``circuitpython``, which
+has slightly diverged at the core level, and this has some minor
+consequences. Some of these concern the C implementation details only,
+which all have been sorted out with the generous and enthusiastic
+support of Jeff Epler from `Adafruit
+Industries <http://www.adafruit.com>`__.
+
+There are, however, a couple of instances, where the two environments
+differ at the python level in how the class properties can be accessed.
+We will point out the differences and possible workarounds at the
+relevant places in this document.
+
+Customising the firmware
+========================
+
+As mentioned above, ``ulab`` has considerably grown since its
+conception, which also means that it might no longer fit on the
+microcontroller of your choice. There are, however, a couple of ways of
+customising the firmware, and thereby reducing its size.
+
+All ``ulab`` options are listed in a single header file,
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__,
+which contains pre-processor flags for each feature that can be
+fine-tuned. The first couple of lines of the file look like this
+
+.. code:: c
+
+   // The pre-processor constants in this file determine how ulab behaves:
+   //
+   // - how many dimensions ulab can handle
+   // - which functions are included in the compiled firmware
+   // - whether the python syntax is numpy-like, or modular
+   // - whether arrays can be sliced and iterated over
+   // - which binary/unary operators are supported
+   //
+   // A considerable amount of flash space can be saved by removing (setting
+   // the corresponding constants to 0) the unnecessary functions and features.
+
+   // Values defined here can be overridden by your own config file as
+   // make -DULAB_CONFIG_FILE="my_ulab_config.h"
+   #if defined(ULAB_CONFIG_FILE)
+   #include ULAB_CONFIG_FILE
+   #endif
+
+   // Adds support for complex ndarrays
+   #ifndef ULAB_SUPPORTS_COMPLEX
+   #define ULAB_SUPPORTS_COMPLEX               (1)
+   #endif
+
+   // Determines, whether scipy is defined in ulab. The sub-modules and functions
+   // of scipy have to be defined separately
+   #define ULAB_HAS_SCIPY                      (1)
+
+   // The maximum number of dimensions the firmware should be able to support
+   // Possible values lie between 1, and 4, inclusive
+   #define ULAB_MAX_DIMS                       2
+
+   // By setting this constant to 1, iteration over array dimensions will be implemented
+   // as a function (ndarray_rewind_array), instead of writing out the loops in macros
+   // This reduces firmware size at the expense of speed
+   #define ULAB_HAS_FUNCTION_ITERATOR          (0)
+
+   // If NDARRAY_IS_ITERABLE is 1, the ndarray object defines its own iterator function
+   // This option saves approx. 250 bytes of flash space
+   #define NDARRAY_IS_ITERABLE                 (1)
+
+   // Slicing can be switched off by setting this variable to 0
+   #define NDARRAY_IS_SLICEABLE                (1)
+
+   // The default threshold for pretty printing. These variables can be overwritten
+   // at run-time via the set_printoptions() function
+   #define ULAB_HAS_PRINTOPTIONS               (1)
+   #define NDARRAY_PRINT_THRESHOLD             10
+   #define NDARRAY_PRINT_EDGEITEMS             3
+
+   // determines, whether the dtype is an object, or simply a character
+   // the object implementation is numpythonic, but requires more space
+   #define ULAB_HAS_DTYPE_OBJECT               (0)
+
+   // the ndarray binary operators
+   #define NDARRAY_HAS_BINARY_OPS              (1)
+
+   // Firmware size can be reduced at the expense of speed by using function
+   // pointers in iterations. For each operator, he function pointer saves around
+   // 2 kB in the two-dimensional case, and around 4 kB in the four-dimensional case.
+
+   #define NDARRAY_BINARY_USES_FUN_POINTER     (0)
+
+   #define NDARRAY_HAS_BINARY_OP_ADD           (1)
+   #define NDARRAY_HAS_BINARY_OP_EQUAL         (1)
+   #define NDARRAY_HAS_BINARY_OP_LESS          (1)
+   #define NDARRAY_HAS_BINARY_OP_LESS_EQUAL    (1)
+   #define NDARRAY_HAS_BINARY_OP_MORE          (1)
+   #define NDARRAY_HAS_BINARY_OP_MORE_EQUAL    (1)
+   #define NDARRAY_HAS_BINARY_OP_MULTIPLY      (1)
+   #define NDARRAY_HAS_BINARY_OP_NOT_EQUAL     (1)
+   #define NDARRAY_HAS_BINARY_OP_POWER         (1)
+   #define NDARRAY_HAS_BINARY_OP_SUBTRACT      (1)
+   #define NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE   (1)
+   ...     
+
+The meaning of flags with names ``_HAS_`` should be obvious, so we will
+just explain the other options.
+
+To see how much you can gain by un-setting the functions that you do not
+need, here are some pointers. In four dimensions, including all
+functions adds around 120 kB to the ``micropython`` firmware. On the
+other hand, if you are interested in Fourier transforms only, and strip
+everything else, you get away with less than 5 kB extra.
+
+Compatibility with numpy
+------------------------
+
+The functions implemented in ``ulab`` are organised in four sub-modules
+at the C level, namely, ``numpy``, ``scipy``, ``utils``, and ``user``.
+This modularity is elevated to ``python``, meaning that in order to use
+functions that are part of ``numpy``, you have to import ``numpy`` as
+
+.. code:: python
+
+   from ulab import numpy as np
+
+   x = np.array([4, 5, 6])
+   p = np.array([1, 2, 3])
+   np.polyval(p, x)
+
+There are a couple of exceptions to this rule, namely ``fft``,
+``linalg``, and ``random``, which are sub-modules even in ``numpy``,
+thus you have to write them out as
+
+.. code:: python
+
+   from ulab import numpy as np
+
+   A = np.array([1, 2, 3, 4]).reshape()
+   np.linalg.trace(A)
+
+Some of the functions in ``ulab`` are re-implementations of ``scipy``
+functions, and they are to be imported as
+
+.. code:: python
+
+   from ulab import numpy as np
+   from ulab import scipy as spy
+
+
+   x = np.array([1, 2, 3])
+   spy.special.erf(x)
+
+``numpy``-compatibility has an enormous benefit : namely, by
+``try``\ ing to ``import``, we can guarantee that the same, unmodified
+code runs in ``CPython``, as in ``micropython``. The following snippet
+is platform-independent, thus, the ``python`` code can be tested and
+debugged on a computer before loading it onto the microcontroller.
+
+.. code:: python
+
+
+   try:
+       from ulab import numpy as np
+       from ulab import scipy as spy
+   except ImportError:
+       import numpy as np
+       import scipy as spy
+       
+   x = np.array([1, 2, 3])
+   spy.special.erf(x)    
+
+The impact of dimensionality
+----------------------------
+
+Reducing the number of dimensions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``ulab`` supports tensors of rank four, but this is expensive in terms
+of flash: with all available functions and options, the library adds
+around 100 kB to the firmware. However, if such high dimensions are not
+required, significant reductions in size can be gotten by changing the
+value of
+
+.. code:: c
+
+   #define ULAB_MAX_DIMS                   2
+
+Two dimensions cost a bit more than half of four, while you can get away
+with around 20 kB of flash in one dimension, because all those functions
+that don’t make sense (e.g., matrix inversion, eigenvalues etc.) are
+automatically stripped from the firmware.
+
+Using the function iterator
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In higher dimensions, the firmware size increases, because each
+dimension (axis) adds another level of nested loops. An example of this
+is the macro of the binary operator in three dimensions
+
+.. code:: c
+
+   #define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)
+       type_out *array = (type_out *)results->array;
+       size_t j = 0;
+       do {
+           size_t k = 0;
+           do {
+               size_t l = 0;
+               do {
+                   *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));
+                   (larray) += (lstrides)[ULAB_MAX_DIMS - 1];
+                   (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];
+                   l++;
+               } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);
+               (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];
+               (larray) += (lstrides)[ULAB_MAX_DIMS - 2];
+               (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];
+               (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];
+               k++;
+           } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);
+           (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+           (larray) += (lstrides)[ULAB_MAX_DIMS - 3];
+           (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];
+           (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];
+           j++;
+       } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);
+
+In order to reduce firmware size, it *might* make sense in higher
+dimensions to make use of the function iterator by setting the
+
+.. code:: c
+
+   #define ULAB_HAS_FUNCTION_ITERATOR      (1)
+
+constant to 1. This allows the compiler to call the
+``ndarray_rewind_array`` function, so that it doesn’t have to unwrap the
+loops for ``k``, and ``j``. Instead of the macro above, we now have
+
+.. code:: c
+
+   #define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)
+       type_out *array = (type_out *)(results)->array;
+       size_t *lcoords = ndarray_new_coords((results)->ndim);
+       size_t *rcoords = ndarray_new_coords((results)->ndim);
+       for(size_t i=0; i < (results)->len/(results)->shape[ULAB_MAX_DIMS -1]; i++) {
+           size_t l = 0;
+           do {
+               *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));
+               (larray) += (lstrides)[ULAB_MAX_DIMS - 1];
+               (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];
+               l++;
+           } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);
+           ndarray_rewind_array((results)->ndim, larray, (results)->shape, lstrides, lcoords);
+           ndarray_rewind_array((results)->ndim, rarray, (results)->shape, rstrides, rcoords);
+       } while(0)
+
+Since the ``ndarray_rewind_array`` function is implemented only once, a
+lot of space can be saved. Obviously, function calls cost time, thus
+such trade-offs must be evaluated for each application. The gain also
+depends on which functions and features you include. Operators and
+functions that involve two arrays are expensive, because at the C level,
+the number of cases that must be handled scales with the squares of the
+number of data types. As an example, the innocent-looking expression
+
+.. code:: python
+
+
+   from ulab import numpy as np
+
+   a = np.array([1, 2, 3])
+   b = np.array([4, 5, 6])
+
+   c = a + b
+
+requires 25 loops in C, because the ``dtypes`` of both ``a``, and ``b``
+can assume 5 different values, and the addition has to be resolved for
+all possible cases. Hint: each binary operator costs between 3 and 4 kB
+in two dimensions.
+
+The ulab version string
+-----------------------
+
+As is customary with ``python`` packages, information on the package
+version can be found be querying the ``__version__`` string.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    import ulab
+    
+    print('you are running ulab version', ulab.__version__)
+
+.. parsed-literal::
+
+    you are running ulab version 2.1.0-2D
+    
+    
+
+
+The first three numbers indicate the major, minor, and sub-minor
+versions of ``ulab`` (defined by the ``ULAB_VERSION`` constant in
+`ulab.c <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.c>`__).
+We usually change the minor version, whenever a new function is added to
+the code, and the sub-minor version will be incremented, if a bug fix is
+implemented.
+
+``2D`` tells us that the particular firmware supports tensors of rank 2
+(defined by ``ULAB_MAX_DIMS`` in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__).
+
+If you find a bug, please, include the version string in your report!
+
+Should you need the numerical value of ``ULAB_MAX_DIMS``, you can get it
+from the version string in the following way:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    import ulab
+    
+    version = ulab.__version__
+    version_dims = version.split('-')[1]
+    version_num = int(version_dims.replace('D', ''))
+    
+    print('version string: ', version)
+    print('version dimensions: ', version_dims)
+    print('numerical value of dimensions: ', version_num)
+
+.. parsed-literal::
+
+    version string:  2.1.0-2D
+    version dimensions:  2D
+    numerical value of dimensions:  2
+    
+    
+
+
+ulab with complex arrays
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the firmware supports complex arrays, ``-c`` is appended to the
+version string as can be seen below.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    import ulab
+    
+    version = ulab.__version__
+    
+    print('version string: ', version)
+
+.. parsed-literal::
+
+    version string:  4.0.0-2D-c
+    
+    
+
+
+Finding out what your firmware supports
+---------------------------------------
+
+``ulab`` implements a number of array operators and functions, but this
+does not mean that all of these functions and methods are actually
+compiled into the firmware. You can fine-tune your firmware by
+setting/unsetting any of the ``_HAS_`` constants in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__.
+
+Functions included in the firmware
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The version string will not tell you everything about your firmware,
+because the supported functions and sub-modules can still arbitrarily be
+included or excluded. One way of finding out what is compiled into the
+firmware is calling ``dir`` with ``ulab`` as its argument.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import scipy as spy
+    
+    
+    print('===== constants, functions, and modules of numpy =====\n\n', dir(np))
+    
+    # since fft and linalg are sub-modules, print them separately
+    print('\nfunctions included in the fft module:\n', dir(np.fft))
+    print('\nfunctions included in the linalg module:\n', dir(np.linalg))
+    
+    print('\n\n===== modules of scipy =====\n\n', dir(spy))
+    print('\nfunctions included in the optimize module:\n', dir(spy.optimize))
+    print('\nfunctions included in the signal module:\n', dir(spy.signal))
+    print('\nfunctions included in the special module:\n', dir(spy.special))
+
+.. parsed-literal::
+
+    ===== constants, functions, and modules of numpy =====
+    
+     ['__class__', '__name__', 'bool', 'sort', 'sum', 'acos', 'acosh', 'arange', 'arctan2', 'argmax', 'argmin', 'argsort', 'around', 'array', 'asin', 'asinh', 'atan', 'atanh', 'ceil', 'clip', 'concatenate', 'convolve', 'cos', 'cosh', 'cross', 'degrees', 'diag', 'diff', 'e', 'equal', 'exp', 'expm1', 'eye', 'fft', 'flip', 'float', 'floor', 'frombuffer', 'full', 'get_printoptions', 'inf', 'int16', 'int8', 'interp', 'linalg', 'linspace', 'log', 'log10', 'log2', 'logspace', 'max', 'maximum', 'mean', 'median', 'min', 'minimum', 'nan', 'ndinfo', 'not_equal', 'ones', 'pi', 'polyfit', 'polyval', 'radians', 'roll', 'set_printoptions', 'sin', 'sinh', 'sqrt', 'std', 'tan', 'tanh', 'trapz', 'uint16', 'uint8', 'vectorize', 'zeros']
+    
+    functions included in the fft module:
+     ['__class__', '__name__', 'fft', 'ifft']
+    
+    functions included in the linalg module:
+     ['__class__', '__name__', 'cholesky', 'det', 'dot', 'eig', 'inv', 'norm', 'trace']
+    
+    
+    ===== modules of scipy =====
+    
+     ['__class__', '__name__', 'optimize', 'signal', 'special']
+    
+    functions included in the optimize module:
+     ['__class__', '__name__', 'bisect', 'fmin', 'newton']
+    
+    functions included in the signal module:
+     ['__class__', '__name__', 'sosfilt', 'spectrogram']
+    
+    functions included in the special module:
+     ['__class__', '__name__', 'erf', 'erfc', 'gamma', 'gammaln']
+    
+    
+
+
+Methods included in the firmware
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``dir`` function applied to the module or its sub-modules gives
+information on what the module and sub-modules include, but is not
+enough to find out which methods the ``ndarray`` class supports. We can
+list the methods by calling ``dir`` with the ``array`` object itself:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print(dir(np.array))
+
+.. parsed-literal::
+
+    ['__class__', '__name__', 'copy', 'sort', '__bases__', '__dict__', 'dtype', 'flatten', 'itemsize', 'reshape', 'shape', 'size', 'strides', 'tobytes', 'transpose']
+    
+    
+
+
+Operators included in the firmware
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A list of operators cannot be generated as shown above. If you really
+need to find out, whether, e.g., the ``**`` operator is supported by the
+firmware, you have to ``try`` it:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3])
+    b = np.array([4, 5, 6])
+    
+    try:
+        print(a ** b)
+    except Exception as e:
+        print('operator is not supported: ', e)
+
+.. parsed-literal::
+
+    operator is not supported:  unsupported types for __pow__: 'ndarray', 'ndarray'
+    
+    
+
+
+The exception above would be raised, if the firmware was compiled with
+the
+
+.. code:: c
+
+   #define NDARRAY_HAS_BINARY_OP_POWER         (0)
+
+definition.
diff --git a/tulip/shared/ulab/docs/manual/source/ulab-ndarray.rst b/tulip/shared/ulab/docs/manual/source/ulab-ndarray.rst
new file mode 100644
index 000000000..040363385
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/ulab-ndarray.rst
@@ -0,0 +1,2657 @@
+
+ndarray, the base class
+=======================
+
+The ``ndarray`` is the underlying container of numerical data. It can be
+thought of as micropython’s own ``array`` object, but has a great number
+of extra features starting with how it can be initialised, which
+operations can be done on it, and which functions can accept it as an
+argument. One important property of an ``ndarray`` is that it is also a
+proper ``micropython`` iterable.
+
+The ``ndarray`` consists of a short header, and a pointer that holds the
+data. The pointer always points to a contiguous segment in memory
+(``numpy`` is more flexible in this regard), and the header tells the
+interpreter, how the data from this segment is to be read out, and what
+the bytes mean. Some operations, e.g., ``reshape``, are fast, because
+they do not operate on the data, they work on the header, and therefore,
+only a couple of bytes are manipulated, even if there are a million data
+entries. A more detailed exposition of how operators are implemented can
+be found in the section titled `Programming ulab <#Programming_ula>`__.
+
+Since the ``ndarray`` is a binary container, it is also compact, meaning
+that it takes only a couple of bytes of extra RAM in addition to what is
+required for storing the numbers themselves. ``ndarray``\ s are also
+type-aware, i.e., one can save RAM by specifying a data type, and using
+the smallest reasonable one. Five such types are defined, namely
+``uint8``, ``int8``, which occupy a single byte of memory per datum,
+``uint16``, and ``int16``, which occupy two bytes per datum, and
+``float``, which occupies four or eight bytes per datum. The
+precision/size of the ``float`` type depends on the definition of
+``mp_float_t``. Some platforms, e.g., the PYBD, implement ``double``\ s,
+but some, e.g., the pyboard.v.11, do not. You can find out, what type of
+float your particular platform implements by looking at the output of
+the `.itemsize <#.itemsize>`__ class property, or looking at the exact
+``dtype``, when you print out an array.
+
+In addition to the five above-mentioned numerical types, it is also
+possible to define Boolean arrays, which can be used in the indexing of
+data. However, Boolean arrays are really nothing but arrays of type
+``uint8`` with an extra flag.
+
+On the following pages, we will see how one can work with
+``ndarray``\ s. Those familiar with ``numpy`` should find that the
+nomenclature and naming conventions of ``numpy`` are adhered to as
+closely as possible. We will point out the few differences, where
+necessary.
+
+For the sake of comparison, in addition to the ``ulab`` code snippets,
+sometimes the equivalent ``numpy`` code is also presented. You can find
+out, where the snippet is supposed to run by looking at its first line,
+the header of the code block.
+
+The ndinfo function
+-------------------
+
+A concise summary of a couple of the properties of an ``ndarray`` can be
+printed out by calling the ``ndinfo`` function. In addition to finding
+out what the *shape* and *strides* of the array array, we also get the
+``itemsize``, as well as the type. An interesting piece of information
+is the *data pointer*, which tells us, what the address of the data
+segment of the ``ndarray`` is. We will see the significance of this in
+the section `Slicing and indexing <#Slicing-and-indexing>`__.
+
+Note that this function simply prints some information, but does not
+return anything. If you need to get a handle of the data contained in
+the printout, you should call the dedicated ``shape``, ``strides``, or
+``itemsize`` functions directly.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(5), dtype=np.float)
+    b = np.array(range(25), dtype=np.uint8).reshape((5, 5))
+    np.ndinfo(a)
+    print('\n')
+    np.ndinfo(b)
+
+.. parsed-literal::
+
+    class: ndarray
+    shape: (5,)
+    strides: (8,)
+    itemsize: 8
+    data pointer: 0x7f8f6fa2e240
+    type: float
+    
+    
+    class: ndarray
+    shape: (5, 5)
+    strides: (5, 1)
+    itemsize: 1
+    data pointer: 0x7f8f6fa2e2e0
+    type: uint8
+    
+    
+
+
+Initialising an array
+---------------------
+
+A new array can be created by passing either a standard micropython
+iterable, or another ``ndarray`` into the constructor.
+
+Initialising by passing iterables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the iterable is one-dimensional, i.e., one whose elements are
+numbers, then a row vector will be created and returned. If the iterable
+is two-dimensional, i.e., one whose elements are again iterables, a
+matrix will be created. If the lengths of the iterables are not
+consistent, a ``ValueError`` will be raised. Iterables of different
+types can be mixed in the initialisation function.
+
+If the ``dtype`` keyword with the possible
+``uint8/int8/uint16/int16/float`` values is supplied, the new
+``ndarray`` will have that type, otherwise, it assumes ``float`` as
+default. In addition, if ``ULAB_SUPPORTS_COMPLEX`` is set to 1 in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__,
+the ``dtype`` can also take on the value of ``complex``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = [1, 2, 3, 4, 5, 6, 7, 8]
+    b = np.array(a)
+    
+    print("a:\t", a)
+    print("b:\t", b)
+    
+    # a two-dimensional array with mixed-type initialisers
+    c = np.array([range(5), range(20, 25, 1), [44, 55, 66, 77, 88]], dtype=np.uint8)
+    print("\nc:\t", c)
+    
+    # and now we throw an exception
+    d = np.array([range(5), range(10), [44, 55, 66, 77, 88]], dtype=np.uint8)
+    print("\nd:\t", d)
+
+.. parsed-literal::
+
+    a:	 [1, 2, 3, 4, 5, 6, 7, 8]
+    b:	 array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    
+    c:	 array([[0, 1, 2, 3, 4],
+           [20, 21, 22, 23, 24],
+           [44, 55, 66, 77, 88]], dtype=uint8)
+    
+    Traceback (most recent call last):
+      File "/dev/shm/micropython.py", line 15, in <module>
+    ValueError: iterables are not of the same length
+    
+
+
+Initialising by passing arrays
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An ``ndarray`` can be initialised by supplying another array. This
+statement is almost trivial, since ``ndarray``\ s are iterables
+themselves, though it should be pointed out that initialising through
+arrays is a bit faster. This statement is especially true, if the
+``dtype``\ s of the source and output arrays are the same, because then
+the contents can simply be copied without further ado. While type
+conversion is also possible, it will always be slower than straight
+copying.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = [1, 2, 3, 4, 5, 6, 7, 8]
+    b = np.array(a)
+    c = np.array(b)
+    d = np.array(b, dtype=np.uint8)
+    
+    print("a:\t", a)
+    print("\nb:\t", b)
+    print("\nc:\t", c)
+    print("\nd:\t", d)
+
+.. parsed-literal::
+
+    a:	 [1, 2, 3, 4, 5, 6, 7, 8]
+    
+    b:	 array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    
+    c:	 array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)
+    
+    d:	 array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    
+    
+
+
+Note that the default type of the ``ndarray`` is ``float``. Hence, if
+the array is initialised from another array, type conversion will always
+take place, except, when the output type is specifically supplied. I.e.,
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(5), dtype=np.uint8)
+    b = np.array(a)
+    print("a:\t", a)
+    print("\nb:\t", b)
+
+.. parsed-literal::
+
+    a:	 array([0, 1, 2, 3, 4], dtype=uint8)
+    
+    b:	 array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float64)
+    
+    
+
+
+will iterate over the elements in ``a``, since in the assignment
+``b = np.array(a)``, no output type was given, therefore, ``float`` was
+assumed. On the other hand,
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(5), dtype=np.uint8)
+    b = np.array(a, dtype=np.uint8)
+    print("a:\t", a)
+    print("\nb:\t", b)
+
+.. parsed-literal::
+
+    a:	 array([0, 1, 2, 3, 4], dtype=uint8)
+    
+    b:	 array([0, 1, 2, 3, 4], dtype=uint8)
+    
+    
+
+
+will simply copy the content of ``a`` into ``b`` without any iteration,
+and will, therefore, be faster. Keep this in mind, whenever the output
+type, or performance is important.
+
+Array initialisation functions
+==============================
+
+There are nine functions that can be used for initialising an array.
+Starred functions accept ``complex`` as the value of the ``dtype``, if
+the firmware was compiled with complex support.
+
+1.  `numpy.arange <#arange>`__
+2.  `numpy.concatenate <#concatenate>`__
+3.  `numpy.diag\* <#diag>`__
+4.  `numpy.empty\* <#empty>`__
+5.  `numpy.eye\* <#eye>`__
+6.  `numpy.frombuffer <#frombuffer>`__
+7.  `numpy.full\* <#full>`__
+8.  `numpy.linspace\* <#linspace>`__
+9.  `numpy.logspace <#logspace>`__
+10. `numpy.ones\* <#ones>`__
+11. `numpy.zeros\* <#zeros>`__
+
+arange
+------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.arange.html
+
+The function returns a one-dimensional array with evenly spaced values.
+Takes 3 positional arguments (two are optional), and the ``dtype``
+keyword argument.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print(np.arange(10))
+    print(np.arange(2, 10))
+    print(np.arange(2, 10, 3))
+    print(np.arange(2, 10, 3, dtype=np.float))
+
+.. parsed-literal::
+
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int16)
+    array([2, 3, 4, 5, 6, 7, 8, 9], dtype=int16)
+    array([2, 5, 8], dtype=int16)
+    array([2.0, 5.0, 8.0], dtype=float64)
+    
+    
+
+
+concatenate
+-----------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.concatenate.html
+
+The function joins a sequence of arrays, if they are compatible in
+shape, i.e., if all shapes except the one along the joining axis are
+equal.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(25), dtype=np.uint8).reshape((5, 5))
+    b = np.array(range(15), dtype=np.uint8).reshape((3, 5))
+    
+    c = np.concatenate((a, b), axis=0)
+    print(c)
+
+.. parsed-literal::
+
+    array([[0, 1, 2, 3, 4],
+           [5, 6, 7, 8, 9],
+           [10, 11, 12, 13, 14],
+           [15, 16, 17, 18, 19],
+           [20, 21, 22, 23, 24],
+           [0, 1, 2, 3, 4],
+           [5, 6, 7, 8, 9],
+           [10, 11, 12, 13, 14]], dtype=uint8)
+    
+    
+
+
+**WARNING**: ``numpy`` accepts arbitrary ``dtype``\ s in the sequence of
+arrays, in ``ulab`` the ``dtype``\ s must be identical. If you want to
+concatenate different types, you have to convert all arrays to the same
+type first. Here ``b`` is of ``float`` type, so it cannot directly be
+concatenated to ``a``. However, if we cast the ``dtype`` of ``b``, the
+concatenation works:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(25), dtype=np.uint8).reshape((5, 5))
+    b = np.array(range(15), dtype=np.float).reshape((5, 3))
+    d = np.array(b+1, dtype=np.uint8)
+    print('a: ', a)
+    print('='*20 + '\nd: ', d)
+    c = np.concatenate((d, a), axis=1)
+    print('='*20 + '\nc: ', c)
+
+.. parsed-literal::
+
+    a:  array([[0, 1, 2, 3, 4],
+           [5, 6, 7, 8, 9],
+           [10, 11, 12, 13, 14],
+           [15, 16, 17, 18, 19],
+           [20, 21, 22, 23, 24]], dtype=uint8)
+    ====================
+    d:  array([[1, 2, 3],
+           [4, 5, 6],
+           [7, 8, 9],
+           [10, 11, 12],
+           [13, 14, 15]], dtype=uint8)
+    ====================
+    c:  array([[1, 2, 3, 0, 1, 2, 3, 4],
+           [4, 5, 6, 5, 6, 7, 8, 9],
+           [7, 8, 9, 10, 11, 12, 13, 14],
+           [10, 11, 12, 15, 16, 17, 18, 19],
+           [13, 14, 15, 20, 21, 22, 23, 24]], dtype=uint8)
+    
+    
+
+
+diag
+----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.diag.html
+
+Extract a diagonal, or construct a diagonal array.
+
+The function takes a positional argument, an ``ndarray``, or any
+``micropython`` iterable, and an optional keyword argument, a shift,
+with a default value of 0. If the first argument is a two-dimensional
+array (or a two-dimensional iterable, e.g., a list of lists), the
+function returns a one-dimensional array containing the diagonal
+entries. The diagonal can be shifted by an amount given in the second
+argument. If the shift is larger than the length of the corresponding
+axis, an empty array is returned.
+
+If the first argument is a one-dimensional array, the function returns a
+two-dimensional square tensor with its diagonal elements given by the
+first argument. Again, the diagonal be shifted by an amount given by the
+keyword argument.
+
+The ``diag`` function can accept a complex array, if the firmware was
+compiled with complex support.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.uint8)
+    print(np.diag(a))
+    
+    print('\ndiagonal shifted by 2')
+    print(np.diag(a, k=2))
+    
+    print('\ndiagonal shifted by -2')
+    print(np.diag(a, k=-2))
+
+.. parsed-literal::
+
+    array([[1, 0, 0],
+           [0, 2, 0],
+           [0, 0, 3]], dtype=uint8)
+    
+    diagonal shifted by 2
+    array([[0, 0, 1, 0, 0],
+           [0, 0, 0, 2, 0],
+           [0, 0, 0, 0, 3],
+           [0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0]], dtype=uint8)
+    
+    diagonal shifted by -2
+    array([[0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0],
+           [1, 0, 0, 0, 0],
+           [0, 2, 0, 0, 0],
+           [0, 0, 3, 0, 0]], dtype=uint8)
+    
+    
+
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.arange(16).reshape((4, 4))
+    print(a)
+    print('\ndiagonal of a:')
+    print(np.diag(a))
+    
+    print('\ndiagonal of a:')
+    print(np.diag(a))
+    
+    print('\ndiagonal of a, shifted by 2')
+    print(np.diag(a, k=2))
+    
+    print('\ndiagonal of a, shifted by 5')
+    print(np.diag(a, k=5))
+
+.. parsed-literal::
+
+    array([[0, 1, 2, 3],
+           [4, 5, 6, 7],
+           [8, 9, 10, 11],
+           [12, 13, 14, 15]], dtype=int16)
+    
+    diagonal of a:
+    array([0, 5, 10, 15], dtype=int16)
+    
+    diagonal of a:
+    array([0, 5, 10, 15], dtype=int16)
+    
+    diagonal of a, shifted by 2
+    array([2, 7], dtype=int16)
+    
+    diagonal of a, shifted by 5
+    array([], dtype=int16)
+    
+    
+
+
+empty
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.empty.html
+
+``empty`` is simply an alias for ``zeros``, i.e., as opposed to
+``numpy``, the entries of the tensor will be initialised to zero.
+
+The ``empty`` function can accept complex as the value of the dtype, if
+the firmware was compiled with complex support.
+
+eye
+---
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.eye.html
+
+Another special array method is the ``eye`` function, whose call
+signature is
+
+.. code:: python
+
+   eye(N, M, k=0, dtype=float)
+
+where ``N`` (``M``) specify the dimensions of the matrix (if only ``N``
+is supplied, then we get a square matrix, otherwise one with ``M`` rows,
+and ``N`` columns), and ``k`` is the shift of the ones (the main
+diagonal corresponds to ``k=0``). Here are a couple of examples.
+
+The ``eye`` function can accept ``complex`` as the value of the
+``dtype``, if the firmware was compiled with complex support.
+
+With a single argument
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print(np.eye(5))
+
+.. parsed-literal::
+
+    array([[1.0, 0.0, 0.0, 0.0, 0.0],
+           [0.0, 1.0, 0.0, 0.0, 0.0],
+           [0.0, 0.0, 1.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0, 1.0, 0.0],
+           [0.0, 0.0, 0.0, 0.0, 1.0]], dtype=float64)
+    
+    
+
+
+Specifying the dimensions of the matrix
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print(np.eye(4, M=6, k=-1, dtype=np.int16))
+
+.. parsed-literal::
+
+    array([[0, 0, 0, 0, 0, 0],
+           [1, 0, 0, 0, 0, 0],
+           [0, 1, 0, 0, 0, 0],
+           [0, 0, 1, 0, 0, 0]], dtype=int16)
+    
+    
+
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print(np.eye(4, M=6, dtype=np.int8))
+
+.. parsed-literal::
+
+    array([[1, 0, 0, 0, 0, 0],
+           [0, 1, 0, 0, 0, 0],
+           [0, 0, 1, 0, 0, 0],
+           [0, 0, 0, 1, 0, 0]], dtype=int8)
+    
+    
+
+
+frombuffer
+----------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.frombuffer.html
+
+The function interprets a contiguous buffer as a one-dimensional array,
+and thus can be used for piping buffered data directly into an array.
+This method of analysing, e.g., ADC data is much more efficient than
+passing the ADC buffer into the ``array`` constructor, because
+``frombuffer`` simply creates the ``ndarray`` header and blindly copies
+the memory segment, without inspecting the underlying data.
+
+The function takes a single positional argument, the buffer, and three
+keyword arguments. These are the ``dtype`` with a default value of
+``float``, the ``offset``, with a default of 0, and the ``count``, with
+a default of -1, meaning that all data are taken in.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    buffer = b'\x01\x02\x03\x04\x05\x06\x07\x08'
+    print('buffer: ', buffer)
+    
+    a = np.frombuffer(buffer, dtype=np.uint8)
+    print('a, all data read: ', a)
+    
+    b = np.frombuffer(buffer, dtype=np.uint8, offset=2)
+    print('b, all data with an offset: ', b)
+    
+    c = np.frombuffer(buffer, dtype=np.uint8, offset=2, count=3)
+    print('c, only 3 items with an offset: ', c)
+
+.. parsed-literal::
+
+    buffer:  b'\x01\x02\x03\x04\x05\x06\x07\x08'
+    a, all data read:  array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    b, all data with an offset:  array([3, 4, 5, 6, 7, 8], dtype=uint8)
+    c, only 3 items with an offset:  array([3, 4, 5], dtype=uint8)
+    
+    
+
+
+full
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.full.html
+
+The function returns an array of arbitrary dimension, whose elements are
+all equal to the second positional argument. The first argument is a
+tuple describing the shape of the tensor. The ``dtype`` keyword argument
+with a default value of ``float`` can also be supplied.
+
+The ``full`` function can accept a complex scalar, or ``complex`` as the
+value of ``dtype``, if the firmware was compiled with complex support.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    # create an array with the default type
+    print(np.full((2, 4), 3))
+    
+    print('\n' + '='*20 + '\n')
+    # the array type is uint8 now
+    print(np.full((2, 4), 3, dtype=np.uint8))
+
+.. parsed-literal::
+
+    array([[3.0, 3.0, 3.0, 3.0],
+           [3.0, 3.0, 3.0, 3.0]], dtype=float64)
+    
+    ====================
+    
+    array([[3, 3, 3, 3],
+           [3, 3, 3, 3]], dtype=uint8)
+    
+    
+
+
+linspace
+--------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.linspace.html
+
+This function returns an array, whose elements are uniformly spaced
+between the ``start``, and ``stop`` points. The number of intervals is
+determined by the ``num`` keyword argument, whose default value is 50.
+With the ``endpoint`` keyword argument (defaults to ``True``) one can
+include ``stop`` in the sequence. In addition, the ``dtype`` keyword can
+be supplied to force type conversion of the output. The default is
+``float``. Note that, when ``dtype`` is of integer type, the sequence is
+not necessarily evenly spaced. This is not an error, rather a
+consequence of rounding. (This is also the ``numpy`` behaviour.)
+
+The ``linspace`` function can accept ``complex`` as the value of the
+``dtype``, if the firmware was compiled with complex support. The output
+``dtype`` is automatically complex, if either of the endpoints is a
+complex scalar.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    # generate a sequence with defaults
+    print('default sequence:\t', np.linspace(0, 10))
+    
+    # num=5
+    print('num=5:\t\t\t', np.linspace(0, 10, num=5))
+    
+    # num=5, endpoint=False
+    print('num=5:\t\t\t', np.linspace(0, 10, num=5, endpoint=False))
+    
+    # num=5, endpoint=False, dtype=uint8
+    print('num=5:\t\t\t', np.linspace(0, 5, num=7, endpoint=False, dtype=np.uint8))
+
+.. parsed-literal::
+
+    default sequence:	 array([0.0, 0.2040816326530612, 0.4081632653061225, ..., 9.591836734693871, 9.795918367346932, 9.999999999999993], dtype=float64)
+    num=5:			 array([0.0, 2.5, 5.0, 7.5, 10.0], dtype=float64)
+    num=5:			 array([0.0, 2.0, 4.0, 6.0, 8.0], dtype=float64)
+    num=5:			 array([0, 0, 1, 2, 2, 3, 4], dtype=uint8)
+    
+    
+
+
+logspace
+--------
+
+``linspace``\ ’ equivalent for logarithmically spaced data is
+``logspace``. This function produces a sequence of numbers, in which the
+quotient of consecutive numbers is constant. This is a geometric
+sequence.
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.logspace.html
+
+This function returns an array, whose elements are uniformly spaced
+between the ``start``, and ``stop`` points. The number of intervals is
+determined by the ``num`` keyword argument, whose default value is 50.
+With the ``endpoint`` keyword argument (defaults to ``True``) one can
+include ``stop`` in the sequence. In addition, the ``dtype`` keyword can
+be supplied to force type conversion of the output. The default is
+``float``. Note that, exactly as in ``linspace``, when ``dtype`` is of
+integer type, the sequence is not necessarily evenly spaced in log
+space.
+
+In addition to the keyword arguments found in ``linspace``, ``logspace``
+also accepts the ``base`` argument. The default value is 10.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    # generate a sequence with defaults
+    print('default sequence:\t', np.logspace(0, 3))
+    
+    # num=5
+    print('num=5:\t\t\t', np.logspace(1, 10, num=5))
+    
+    # num=5, endpoint=False
+    print('num=5:\t\t\t', np.logspace(1, 10, num=5, endpoint=False))
+    
+    # num=5, endpoint=False
+    print('num=5:\t\t\t', np.logspace(1, 10, num=5, endpoint=False, base=2))
+
+.. parsed-literal::
+
+    default sequence:	 array([1.0, 1.151395399326447, 1.325711365590109, ..., 754.3120063354646, 868.5113737513561, 1000.000000000004], dtype=float64)
+    num=5:			 array([10.0, 1778.279410038923, 316227.766016838, 56234132.5190349, 10000000000.0], dtype=float64)
+    num=5:			 array([10.0, 630.9573444801933, 39810.71705534974, 2511886.431509581, 158489319.2461114], dtype=float64)
+    num=5:			 array([2.0, 6.964404506368993, 24.25146506416637, 84.44850628946524, 294.066778879241], dtype=float64)
+    
+    
+
+
+ones, zeros
+-----------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros.html
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.ones.html
+
+A couple of special arrays and matrices can easily be initialised by
+calling one of the ``ones``, or ``zeros`` functions. ``ones`` and
+``zeros`` follow the same pattern, and have the call signature
+
+.. code:: python
+
+   ones(shape, dtype=float)
+   zeros(shape, dtype=float)
+
+where shape is either an integer, or a tuple specifying the shape.
+
+The ``ones/zeros`` functions can accept complex as the value of the
+dtype, if the firmware was compiled with complex support.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    print(np.ones(6, dtype=np.uint8))
+    
+    print(np.zeros((6, 4)))
+
+.. parsed-literal::
+
+    array([1, 1, 1, 1, 1, 1], dtype=uint8)
+    array([[0.0, 0.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0, 0.0],
+           [0.0, 0.0, 0.0, 0.0]], dtype=float64)
+    
+    
+
+
+When specifying the shape, make sure that the length of the tuple is not
+larger than the maximum dimension of your firmware.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    import ulab
+    
+    print('maximum number of dimensions: ', ulab.__version__)
+    
+    print(np.zeros((2, 2, 2)))
+
+.. parsed-literal::
+
+    maximum number of dimensions:  2.1.0-2D
+    
+    Traceback (most recent call last):
+      File "/dev/shm/micropython.py", line 7, in <module>
+    TypeError: too many dimensions
+    
+
+
+Customising array printouts
+===========================
+
+``ndarray``\ s are pretty-printed, i.e., if the number of entries along
+the last axis is larger than 10 (default value), then only the first and
+last three entries will be printed. Also note that, as opposed to
+``numpy``, the printout always contains the ``dtype``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(200))
+    print("a:\t", a)
+
+.. parsed-literal::
+
+    a:	 array([0.0, 1.0, 2.0, ..., 197.0, 198.0, 199.0], dtype=float64)
+    
+    
+
+
+set_printoptions
+----------------
+
+The default values can be overwritten by means of the
+``set_printoptions`` function
+`numpy.set_printoptions <https://numpy.org/doc/1.18/reference/generated/numpy.set_printoptions.html>`__,
+which accepts two keywords arguments, the ``threshold``, and the
+``edgeitems``. The first of these arguments determines the length of the
+longest array that will be printed in full, while the second is the
+number of items that will be printed on the left and right hand side of
+the ellipsis, if the array is longer than ``threshold``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(20))
+    print("a printed with defaults:\t", a)
+    
+    np.set_printoptions(threshold=200)
+    print("\na printed in full:\t\t", a)
+    
+    np.set_printoptions(threshold=10, edgeitems=2)
+    print("\na truncated with 2 edgeitems:\t", a)
+
+.. parsed-literal::
+
+    a printed with defaults:	 array([0.0, 1.0, 2.0, ..., 17.0, 18.0, 19.0], dtype=float64)
+    
+    a printed in full:		 array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], dtype=float64)
+    
+    a truncated with 2 edgeitems:	 array([0.0, 1.0, ..., 18.0, 19.0], dtype=float64)
+    
+    
+
+
+get_printoptions
+----------------
+
+The set value of the ``threshold`` and ``edgeitems`` can be retrieved by
+calling the ``get_printoptions`` function with no arguments. The
+function returns a *dictionary* with two keys.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    np.set_printoptions(threshold=100, edgeitems=20)
+    print(np.get_printoptions())
+
+.. parsed-literal::
+
+    {'threshold': 100, 'edgeitems': 20}
+    
+    
+
+
+Methods and properties of ndarrays
+==================================
+
+Arrays have several *properties* that can queried, and some methods that
+can be called. With the exception of the flatten and transpose
+operators, properties return an object that describe some feature of the
+array, while the methods return a new array-like object. The ``imag``,
+and ``real`` properties are included in the firmware only, when it was
+compiled with complex support.
+
+1.  `.byteswap <#.byteswap>`__
+2.  `.copy <#.copy>`__
+3.  `.dtype <#.dtype>`__
+4.  `.flat <#.flat>`__
+5.  `.flatten <#.flatten>`__
+6.  `.imag\* <#.imag>`__
+7.  `.itemsize <#.itemsize>`__
+8.  `.real\* <#.real>`__
+9.  `.reshape <#.reshape>`__
+10. `.shape <#.shape>`__
+11. `.size <#.size>`__
+12. `.T <#.transpose>`__
+13. `.tobytes <#.tobytes>`__
+14. `.tolist <#.tolist>`__
+15. `.transpose <#.transpose>`__
+16. `.sort <#.sort>`__
+
+.byteswap
+---------
+
+``numpy``
+https://numpy.org/doc/stable/reference/generated/numpy.char.chararray.byteswap.html
+
+The method takes a single keyword argument, ``inplace``, with values
+``True`` or ``False``, and swaps the bytes in the array. If
+``inplace = False``, a new ``ndarray`` is returned, otherwise the
+original values are overwritten.
+
+The ``frombuffer`` function is a convenient way of receiving data from
+peripheral devices that work with buffers. However, it is not guaranteed
+that the byte order (in other words, the *endianness*) of the peripheral
+device matches that of the microcontroller. The ``.byteswap`` method
+makes it possible to change the endianness of the incoming data stream.
+
+Obviously, byteswapping makes sense only for those cases, when a datum
+occupies more than one byte, i.e., for the ``uint16``, ``int16``, and
+``float`` ``dtype``\ s. When ``dtype`` is either ``uint8``, or ``int8``,
+the method simply returns a view or copy of self, depending upon the
+value of ``inplace``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    buffer = b'\x01\x02\x03\x04\x05\x06\x07\x08'
+    print('buffer: ', buffer)
+    
+    a = np.frombuffer(buffer, dtype=np.uint16)
+    print('a: ', a)
+    b = a.byteswap()
+    print('b: ', b)
+
+.. parsed-literal::
+
+    buffer:  b'\x01\x02\x03\x04\x05\x06\x07\x08'
+    a:  array([513, 1027, 1541, 2055], dtype=uint16)
+    b:  array([258, 772, 1286, 1800], dtype=uint16)
+    
+    
+
+
+.copy
+-----
+
+The ``.copy`` method creates a new *deep copy* of an array, i.e., the
+entries of the source array are *copied* into the target array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.int8)
+    b = a.copy()
+    print('a: ', a)
+    print('='*20)
+    print('b: ', b)
+
+.. parsed-literal::
+
+    a:  array([1, 2, 3, 4], dtype=int8)
+    ====================
+    b:  array([1, 2, 3, 4], dtype=int8)
+    
+    
+
+
+.dtype
+------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.dtype.htm
+
+The ``.dtype`` property is the ``dtype`` of an array. This can then be
+used for initialising another array with the matching type. ``ulab``
+implements two versions of ``dtype``; one that is ``numpy``-like, i.e.,
+one, which returns a ``dtype`` object, and one that is significantly
+cheaper in terms of flash space, but does not define a ``dtype`` object,
+and holds a single character (number) instead.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.int8)
+    b = np.array([5, 6, 7], dtype=a.dtype)
+    print('a: ', a)
+    print('dtype of a: ', a.dtype)
+    print('\nb: ', b)
+
+.. parsed-literal::
+
+    a:  array([1, 2, 3, 4], dtype=int8)
+    dtype of a:  dtype('int8')
+    
+    b:  array([5, 6, 7], dtype=int8)
+    
+    
+
+
+If the ``ulab.h`` header file sets the pre-processor constant
+``ULAB_HAS_DTYPE_OBJECT`` to 0 as
+
+.. code:: c
+
+   #define ULAB_HAS_DTYPE_OBJECT               (0)
+
+then the output of the previous snippet will be
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.int8)
+    b = np.array([5, 6, 7], dtype=a.dtype)
+    print('a: ', a)
+    print('dtype of a: ', a.dtype)
+    print('\nb: ', b)
+
+.. parsed-literal::
+
+    a:  array([1, 2, 3, 4], dtype=int8)
+    dtype of a:  98
+    
+    b:  array([5, 6, 7], dtype=int8)
+    
+    
+
+
+Here 98 is nothing but the ASCII value of the character ``b``, which is
+the type code for signed 8-bit integers. The object definition adds
+around 600 bytes to the firmware.
+
+.flat
+-----
+
+numpy:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flat.htm
+
+``.flat`` returns the array’s flat iterator. For one-dimensional objects
+the flat iterator is equivalent to the standart iterator, while for
+higher dimensional tensors, it amounts to first flattening the array,
+and then iterating over it. Note, however, that the flat iterator does
+not consume RAM beyond what is required for holding the position of the
+iterator itself, while flattening produces a new copy.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.int8)
+    for _a in a:
+        print(_a)
+    
+    a = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.int8)
+    print('a:\n', a)
+    
+    for _a in a:
+        print(_a)
+    
+    for _a in a.flat:
+        print(_a)
+
+.. parsed-literal::
+
+    1
+    2
+    3
+    4
+    a:
+     array([[1, 2, 3, 4],
+           [5, 6, 7, 8]], dtype=int8)
+    array([1, 2, 3, 4], dtype=int8)
+    array([5, 6, 7, 8], dtype=int8)
+    1
+    2
+    3
+    4
+    5
+    6
+    7
+    8
+    
+    
+
+
+.flatten
+--------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flatten.htm
+
+``.flatten`` returns the flattened array. The array can be flattened in
+``C`` style (i.e., moving along the last axis in the tensor), or in
+``fortran`` style (i.e., moving along the first axis in the tensor).
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.int8)
+    print("a: \t\t", a)
+    print("a flattened: \t", a.flatten())
+    
+    b = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int8)
+    print("\nb:", b)
+    
+    print("b flattened (C): \t", b.flatten())
+    print("b flattened (F): \t", b.flatten(order='F'))
+
+.. parsed-literal::
+
+    a: 		 array([1, 2, 3, 4], dtype=int8)
+    a flattened: 	 array([1, 2, 3, 4], dtype=int8)
+    
+    b: array([[1, 2, 3],
+           [4, 5, 6]], dtype=int8)
+    b flattened (C): 	 array([1, 2, 3, 4, 5, 6], dtype=int8)
+    b flattened (F): 	 array([1, 4, 2, 5, 3, 6], dtype=int8)
+    
+    
+
+
+.imag
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.imag.html
+
+The ``.imag`` property is defined only, if the firmware was compiled
+with complex support, and returns a copy with the imaginary part of an
+array. If the array is real, then the output is straight zeros with the
+``dtype`` of the input. If the input is complex, the output ``dtype`` is
+always ``float``, irrespective of the values.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.uint16)
+    print("a:\t", a)
+    print("a.imag:\t", a.imag)
+    
+    b = np.array([1, 2+1j, 3-1j], dtype=np.complex)
+    print("\nb:\t", b)
+    print("b.imag:\t", b.imag)
+
+.. parsed-literal::
+
+    a:	 array([1, 2, 3], dtype=uint16)
+    a.imag:	 array([0, 0, 0], dtype=uint16)
+    
+    b:	 array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)
+    b.imag:	 array([0.0, 1.0, -1.0], dtype=float64)
+    
+    
+
+
+.itemsize
+---------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.itemsize.html
+
+The ``.itemsize`` property is an integer with the size of elements in
+the array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.int8)
+    print("a:\n", a)
+    print("itemsize of a:", a.itemsize)
+    
+    b= np.array([[1, 2], [3, 4]], dtype=np.float)
+    print("\nb:\n", b)
+    print("itemsize of b:", b.itemsize)
+
+.. parsed-literal::
+
+    a:
+     array([1, 2, 3], dtype=int8)
+    itemsize of a: 1
+    
+    b:
+     array([[1.0, 2.0],
+           [3.0, 4.0]], dtype=float64)
+    itemsize of b: 8
+    
+    
+
+
+.real
+-----
+
+numpy:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.real.html
+
+The ``.real`` property is defined only, if the firmware was compiled
+with complex support, and returns a copy with the real part of an array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.uint16)
+    print("a:\t", a)
+    print("a.real:\t", a.real)
+    
+    b = np.array([1, 2+1j, 3-1j], dtype=np.complex)
+    print("\nb:\t", b)
+    print("b.real:\t", b.real)
+
+.. parsed-literal::
+
+    a:	 array([1, 2, 3], dtype=uint16)
+    a.real:	 array([1, 2, 3], dtype=uint16)
+    
+    b:	 array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)
+    b.real:	 array([1.0, 2.0, 3.0], dtype=float64)
+    
+    
+
+
+.reshape
+--------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html
+
+``reshape`` re-writes the shape properties of an ``ndarray``, but the
+array will not be modified in any other way. The function takes a single
+2-tuple with two integers as its argument. The 2-tuple should specify
+the desired number of rows and columns. If the new shape is not
+consistent with the old, a ``ValueError`` exception will be raised.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]], dtype=np.uint8)
+    print('a (4 by 4):', a)
+    print('a (2 by 8):', a.reshape((2, 8)))
+    print('a (1 by 16):', a.reshape((1, 16)))
+
+.. parsed-literal::
+
+    a (4 by 4): array([[1, 2, 3, 4],
+           [5, 6, 7, 8],
+           [9, 10, 11, 12],
+           [13, 14, 15, 16]], dtype=uint8)
+    a (2 by 8): array([[1, 2, 3, 4, 5, 6, 7, 8],
+           [9, 10, 11, 12, 13, 14, 15, 16]], dtype=uint8)
+    a (1 by 16): array([[1, 2, 3, ..., 14, 15, 16]], dtype=uint8)
+    
+    
+
+
+.. code::
+
+    # code to be run in CPython
+    
+    Note that `ndarray.reshape()` can also be called by assigning to `ndarray.shape`.
+.shape
+------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.shape.html
+
+The ``.shape`` property is a tuple whose elements are the length of the
+array along each axis.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.int8)
+    print("a:\n", a)
+    print("shape of a:", a.shape)
+    
+    b= np.array([[1, 2], [3, 4]], dtype=np.int8)
+    print("\nb:\n", b)
+    print("shape of b:", b.shape)
+
+.. parsed-literal::
+
+    a:
+     array([1, 2, 3, 4], dtype=int8)
+    shape of a: (4,)
+    
+    b:
+     array([[1, 2],
+           [3, 4]], dtype=int8)
+    shape of b: (2, 2)
+    
+    
+
+
+By assigning a tuple to the ``.shape`` property, the array can be
+``reshape``\ d:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
+    print('a:\n', a)
+    
+    a.shape = (3, 3)
+    print('\na:\n', a)
+
+.. parsed-literal::
+
+    a:
+     array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], dtype=float64)
+    
+    a:
+     array([[1.0, 2.0, 3.0],
+           [4.0, 5.0, 6.0],
+           [7.0, 8.0, 9.0]], dtype=float64)
+    
+    
+
+
+.size
+-----
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.size.html
+
+The ``.size`` property is an integer specifying the number of elements
+in the array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3], dtype=np.int8)
+    print("a:\n", a)
+    print("size of a:", a.size)
+    
+    b= np.array([[1, 2], [3, 4]], dtype=np.int8)
+    print("\nb:\n", b)
+    print("size of b:", b.size)
+
+.. parsed-literal::
+
+    a:
+     array([1, 2, 3], dtype=int8)
+    size of a: 3
+    
+    b:
+     array([[1, 2],
+    	 [3, 4]], dtype=int8)
+    size of b: 4
+    
+    
+
+
+.T
+
+The ``.T`` property of the ``ndarray`` is equivalent to
+`.transpose <#.transpose>`__.
+
+.tobytes
+--------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.tobytes.html
+
+The ``.tobytes`` method can be used for acquiring a handle of the
+underlying data pointer of an array, and it returns a new ``bytearray``
+that can be fed into any method that can accep a ``bytearray``, e.g.,
+ADC data can be buffered into this ``bytearray``, or the ``bytearray``
+can be fed into a DAC. Since the ``bytearray`` is really nothing but the
+bare data container of the array, any manipulation on the ``bytearray``
+automatically modifies the array itself.
+
+Note that the method raises a ``ValueError`` exception, if the array is
+not dense (i.e., it has already been sliced).
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(8), dtype=np.uint8)
+    print('a: ', a)
+    b = a.tobytes()
+    print('b: ', b)
+    
+    # modify b
+    b[0] = 13
+    
+    print('='*20)
+    print('b: ', b)
+    print('a: ', a)
+
+.. parsed-literal::
+
+    a:  array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint8)
+    b:  bytearray(b'\x00\x01\x02\x03\x04\x05\x06\x07')
+    ====================
+    b:  bytearray(b'\r\x01\x02\x03\x04\x05\x06\x07')
+    a:  array([13, 1, 2, 3, 4, 5, 6, 7], dtype=uint8)
+    
+    
+
+
+.tolist
+-------
+
+``numpy``:
+https://numpy.org/doc/stable/reference/generated/numpy.ndarray.tolist.html
+
+The ``.tolist`` method can be used for converting the numerical array
+into a (nested) ``python`` lists.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(4), dtype=np.uint8)
+    print('a: ', a)
+    b = a.tolist()
+    print('b: ', b)
+    
+    c = a.reshape((2, 2))
+    print('='*20)
+    print('c: ', c)
+    d = c.tolist()
+    print('d: ', d)
+
+.. parsed-literal::
+
+    a:  array([0, 1, 2, 3], dtype=uint8)
+    b:  [0, 1, 2, 3]
+    ====================
+    c:  array([[0, 1],
+           [2, 3]], dtype=uint8)
+    d:  [[0, 1], [2, 3]]
+    
+    
+
+
+.transpose
+----------
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.transpose.html
+
+Returns the transposed array. Only defined, if the number of maximum
+dimensions is larger than 1.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=np.uint8)
+    print('a:\n', a)
+    print('shape of a:', a.shape)
+    a.transpose()
+    print('\ntranspose of a:\n', a)
+    print('shape of a:', a.shape)
+
+.. parsed-literal::
+
+    a:
+     array([[1, 2, 3],
+    	 [4, 5, 6],
+    	 [7, 8, 9],
+    	 [10, 11, 12]], dtype=uint8)
+    shape of a: (4, 3)
+    
+    transpose of a:
+     array([[1, 4, 7, 10],
+    	 [2, 5, 8, 11],
+    	 [3, 6, 9, 12]], dtype=uint8)
+    shape of a: (3, 4)
+    
+    
+
+
+The transpose of the array can also be gotten through the ``T``
+property:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)
+    print('a:\n', a)
+    print('\ntranspose of a:\n', a.T)
+
+.. parsed-literal::
+
+    a:
+     array([[1, 2, 3],
+           [4, 5, 6],
+           [7, 8, 9]], dtype=uint8)
+    
+    transpose of a:
+     array([[1, 4, 7],
+           [2, 5, 8],
+           [3, 6, 9]], dtype=uint8)
+    
+    
+
+
+.sort
+-----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.sort.html
+
+In-place sorting of an ``ndarray``. For a more detailed exposition, see
+`sort <#sort>`__.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.uint8)
+    print('\na:\n', a)
+    a.sort(axis=0)
+    print('\na sorted along vertical axis:\n', a)
+    
+    a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.uint8)
+    a.sort(axis=1)
+    print('\na sorted along horizontal axis:\n', a)
+    
+    a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.uint8)
+    a.sort(axis=None)
+    print('\nflattened a sorted:\n', a)
+
+.. parsed-literal::
+
+    
+    a:
+     array([[1, 12, 3, 0],
+           [5, 3, 4, 1],
+           [9, 11, 1, 8],
+           [7, 10, 0, 1]], dtype=uint8)
+    
+    a sorted along vertical axis:
+     array([[1, 3, 0, 0],
+           [5, 10, 1, 1],
+           [7, 11, 3, 1],
+           [9, 12, 4, 8]], dtype=uint8)
+    
+    a sorted along horizontal axis:
+     array([[0, 1, 3, 12],
+           [1, 3, 4, 5],
+           [1, 8, 9, 11],
+           [0, 1, 7, 10]], dtype=uint8)
+    
+    flattened a sorted:
+     array([0, 0, 1, ..., 10, 11, 12], dtype=uint8)
+    
+    
+
+
+Unary operators
+===============
+
+With the exception of ``len``, which returns a single number, all unary
+operators manipulate the underlying data element-wise.
+
+len
+---
+
+This operator takes a single argument, the array, and returns either the
+length of the first axis.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)
+    b = np.array([range(5), range(5), range(5), range(5)], dtype=np.uint8)
+    
+    print("a:\t", a)
+    print("length of a: ", len(a))
+    print("shape of a: ", a.shape)
+    print("\nb:\t", b)
+    print("length of b: ", len(b))
+    print("shape of b: ", b.shape)
+
+.. parsed-literal::
+
+    a:	 array([1, 2, 3, 4, 5], dtype=uint8)
+    length of a:  5
+    shape of a:  (5,)
+    
+    b:	 array([[0, 1, 2, 3, 4],
+           [0, 1, 2, 3, 4],
+           [0, 1, 2, 3, 4],
+           [0, 1, 2, 3, 4]], dtype=uint8)
+    length of b:  2
+    shape of b:  (4, 5)
+    
+    
+
+
+The number returned by ``len`` is also the length of the iterations,
+when the array supplies the elements for an iteration (see later).
+
+invert
+------
+
+The function is defined for integer data types (``uint8``, ``int8``,
+``uint16``, and ``int16``) only, takes a single argument, and returns
+the element-by-element, bit-wise inverse of the array. If a ``float`` is
+supplied, the function raises a ``ValueError`` exception.
+
+With signed integers (``int8``, and ``int16``), the results might be
+unexpected, as in the example below:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([0, -1, -100], dtype=np.int8)
+    print("a:\t\t", a)
+    print("inverse of a:\t", ~a)
+    
+    a = np.array([0, 1, 254, 255], dtype=np.uint8)
+    print("\na:\t\t", a)
+    print("inverse of a:\t", ~a)
+
+.. parsed-literal::
+
+    a:		 array([0, -1, -100], dtype=int8)
+    inverse of a:	 array([-1, 0, 99], dtype=int8)
+    
+    a:		 array([0, 1, 254, 255], dtype=uint8)
+    inverse of a:	 array([255, 254, 1, 0], dtype=uint8)
+    
+    
+
+
+abs
+---
+
+This function takes a single argument, and returns the
+element-by-element absolute value of the array. When the data type is
+unsigned (``uint8``, or ``uint16``), a copy of the array will be
+returned immediately, and no calculation takes place.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([0, -1, -100], dtype=np.int8)
+    print("a:\t\t\t ", a)
+    print("absolute value of a:\t ", abs(a))
+
+.. parsed-literal::
+
+    a:			  array([0, -1, -100], dtype=int8)
+    absolute value of a:	  array([0, 1, 100], dtype=int8)
+    
+    
+
+
+neg
+---
+
+This operator takes a single argument, and changes the sign of each
+element in the array. Unsigned values are wrapped.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([10, -1, 1], dtype=np.int8)
+    print("a:\t\t", a)
+    print("negative of a:\t", -a)
+    
+    b = np.array([0, 100, 200], dtype=np.uint8)
+    print("\nb:\t\t", b)
+    print("negative of b:\t", -b)
+
+.. parsed-literal::
+
+    a:		 array([10, -1, 1], dtype=int8)
+    negative of a:	 array([-10, 1, -1], dtype=int8)
+    
+    b:		 array([0, 100, 200], dtype=uint8)
+    negative of b:	 array([0, 156, 56], dtype=uint8)
+    
+    
+
+
+pos
+---
+
+This function takes a single argument, and simply returns a copy of the
+array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([10, -1, 1], dtype=np.int8)
+    print("a:\t\t", a)
+    print("positive of a:\t", +a)
+
+.. parsed-literal::
+
+    a:		 array([10, -1, 1], dtype=int8)
+    positive of a:	 array([10, -1, 1], dtype=int8)
+    
+    
+
+
+Binary operators
+================
+
+``ulab`` implements the ``+``, ``-``, ``*``, ``/``, ``**``, ``<``,
+``>``, ``<=``, ``>=``, ``==``, ``!=``, ``+=``, ``-=``, ``*=``, ``/=``,
+``**=`` binary operators, as well as the ``AND``, ``OR``, ``XOR``
+bit-wise operators that work element-wise. Note that the bit-wise
+operators will raise an exception, if either of the operands is of
+``float`` or ``complex`` type.
+
+Broadcasting is available, meaning that the two operands do not even
+have to have the same shape. If the lengths along the respective axes
+are equal, or one of them is 1, or the axis is missing, the element-wise
+operation can still be carried out. A thorough explanation of
+broadcasting can be found under
+https://numpy.org/doc/stable/user/basics.broadcasting.html.
+
+**WARNING**: note that relational operators (``<``, ``>``, ``<=``,
+``>=``, ``==``, ``!=``) should have the ``ndarray`` on their left hand
+side, when compared to scalars. This means that the following works
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3])
+    print(a > 2)
+
+.. parsed-literal::
+
+    array([False, False, True], dtype=bool)
+    
+    
+
+
+while the equivalent statement, ``2 < a``, will raise a ``TypeError``
+exception:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3])
+    print(2 < a)
+
+.. parsed-literal::
+
+    
+    Traceback (most recent call last):
+      File "/dev/shm/micropython.py", line 5, in <module>
+    TypeError: unsupported types for __lt__: 'int', 'ndarray'
+    
+
+
+**WARNING:** ``circuitpython`` users should use the ``equal``, and
+``not_equal`` operators instead of ``==``, and ``!=``. See the section
+on `array comparison <#Comparison-of-arrays>`__ for details.
+
+Upcasting
+---------
+
+Binary operations require special attention, because two arrays with
+different typecodes can be the operands of an operation, in which case
+it is not trivial, what the typecode of the result is. This decision on
+the result’s typecode is called upcasting. Since the number of typecodes
+in ``ulab`` is significantly smaller than in ``numpy``, we have to
+define new upcasting rules. Where possible, I followed ``numpy``\ ’s
+conventions.
+
+``ulab`` observes the following upcasting rules:
+
+1. Operations on two ``ndarray``\ s of the same ``dtype`` preserve their
+   ``dtype``, even when the results overflow.
+
+2. if either of the operands is a float, the result is automatically a
+   float
+
+3. When one of the operands is a scalar, it will internally be turned
+   into a single-element ``ndarray`` with the *smallest* possible
+   ``dtype``. Thus, e.g., if the scalar is 123, it will be converted
+   into an array of ``dtype`` ``uint8``, while -1000 will be converted
+   into ``int16``. An ``mp_obj_float``, will always be promoted to
+   ``dtype`` ``float``. Similarly, if ``ulab`` supports complex arrays,
+   the result of a binary operation involving a ``complex`` array is
+   always complex. Other ``micropython`` types (e.g., lists, tuples,
+   etc.) raise a ``TypeError`` exception.
+
+4. 
+
+============== =============== =========== ============
+left hand side right hand side ulab result numpy result
+============== =============== =========== ============
+``uint8``      ``int8``        ``int16``   ``int16``
+``uint8``      ``int16``       ``int16``   ``int16``
+``uint8``      ``uint16``      ``uint16``  ``uint16``
+``int8``       ``int16``       ``int16``   ``int16``
+``int8``       ``uint16``      ``uint16``  ``int32``
+``uint16``     ``int16``       ``float``   ``int32``
+============== =============== =========== ============
+
+Note that the last two operations are promoted to ``int32`` in
+``numpy``.
+
+**WARNING:** Due to the lower number of available data types, the
+upcasting rules of ``ulab`` are slightly different to those of
+``numpy``. Watch out for this, when porting code!
+
+Upcasting can be seen in action in the following snippet:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4], dtype=np.uint8)
+    b = np.array([1, 2, 3, 4], dtype=np.int8)
+    print("a:\t", a)
+    print("b:\t", b)
+    print("a+b:\t", a+b)
+    
+    c = np.array([1, 2, 3, 4], dtype=np.float)
+    print("\na:\t", a)
+    print("c:\t", c)
+    print("a*c:\t", a*c)
+
+.. parsed-literal::
+
+    a:	 array([1, 2, 3, 4], dtype=uint8)
+    b:	 array([1, 2, 3, 4], dtype=int8)
+    a+b:	 array([2, 4, 6, 8], dtype=int16)
+    
+    a:	 array([1, 2, 3, 4], dtype=uint8)
+    c:	 array([1.0, 2.0, 3.0, 4.0], dtype=float64)
+    a*c:	 array([1.0, 4.0, 9.0, 16.0], dtype=float64)
+    
+    
+
+
+Benchmarks
+----------
+
+The following snippet compares the performance of binary operations to a
+possible implementation in python. For the time measurement, we will
+take the following snippet from the micropython manual:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    import utime
+    
+    def timeit(f, *args, **kwargs):
+        func_name = str(f).split(' ')[1]
+        def new_func(*args, **kwargs):
+            t = utime.ticks_us()
+            result = f(*args, **kwargs)
+            print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')
+            return result
+        return new_func
+
+.. parsed-literal::
+
+    
+
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    @timeit
+    def py_add(a, b):
+        return [a[i]+b[i] for i in range(1000)]
+    
+    @timeit
+    def py_multiply(a, b):
+        return [a[i]*b[i] for i in range(1000)]
+    
+    @timeit
+    def ulab_add(a, b):
+        return a + b
+    
+    @timeit
+    def ulab_multiply(a, b):
+        return a * b
+    
+    a = [0.0]*1000
+    b = range(1000)
+    
+    print('python add:')
+    py_add(a, b)
+    
+    print('\npython multiply:')
+    py_multiply(a, b)
+    
+    a = np.linspace(0, 10, num=1000)
+    b = np.ones(1000)
+    
+    print('\nulab add:')
+    ulab_add(a, b)
+    
+    print('\nulab multiply:')
+    ulab_multiply(a, b)
+
+.. parsed-literal::
+
+    python add:
+    execution time:  10051  us
+    
+    python multiply:
+    execution time:  14175  us
+    
+    ulab add:
+    execution time:  222  us
+    
+    ulab multiply:
+    execution time:  213  us
+    
+
+
+The python implementation above is not perfect, and certainly, there is
+much room for improvement. However, the factor of 50 difference in
+execution time is very spectacular. This is nothing but a consequence of
+the fact that the ``ulab`` functions run ``C`` code, with very little
+python overhead. The factor of 50 appears to be quite universal: the FFT
+routine obeys similar scaling (see `Speed of FFTs <#Speed-of-FFTs>`__),
+and this number came up with font rendering, too: `fast font rendering
+on graphical
+displays <https://forum.micropython.org/viewtopic.php?f=15&t=5815&p=33362&hilit=ufont#p33383>`__.
+
+Comparison operators
+====================
+
+The smaller than, greater than, smaller or equal, and greater or equal
+operators return a vector of Booleans indicating the positions
+(``True``), where the condition is satisfied.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.uint8)
+    print(a < 5)
+
+.. parsed-literal::
+
+    array([True, True, True, True, False, False, False, False], dtype=bool)
+    
+    
+
+
+**WARNING**: at the moment, due to ``micropython``\ ’s implementation
+details, the ``ndarray`` must be on the left hand side of the relational
+operators.
+
+That is, while ``a < 5`` and ``5 > a`` have the same meaning, the
+following code will not work:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    import ulab as np
+    
+    a = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.uint8)
+    print(5 > a)
+
+.. parsed-literal::
+
+    
+    Traceback (most recent call last):
+      File "/dev/shm/micropython.py", line 5, in <module>
+    TypeError: unsupported types for __gt__: 'int', 'ndarray'
+    
+
+
+Iterating over arrays
+=====================
+
+``ndarray``\ s are iterable, which means that their elements can also be
+accessed as can the elements of a list, tuple, etc. If the array is
+one-dimensional, the iterator returns scalars, otherwise a new
+reduced-dimensional *view* is created and returned.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)
+    b = np.array([range(5), range(10, 15, 1), range(20, 25, 1), range(30, 35, 1)], dtype=np.uint8)
+    
+    print("a:\t", a)
+    
+    for i, _a in enumerate(a):
+        print("element %d in a:"%i, _a)
+        
+    print("\nb:\t", b)
+    
+    for i, _b in enumerate(b):
+        print("element %d in b:"%i, _b)
+
+.. parsed-literal::
+
+    a:	 array([1, 2, 3, 4, 5], dtype=uint8)
+    element 0 in a: 1
+    element 1 in a: 2
+    element 2 in a: 3
+    element 3 in a: 4
+    element 4 in a: 5
+    
+    b:	 array([[0, 1, 2, 3, 4],
+           [10, 11, 12, 13, 14],
+           [20, 21, 22, 23, 24],
+           [30, 31, 32, 33, 34]], dtype=uint8)
+    element 0 in b: array([0, 1, 2, 3, 4], dtype=uint8)
+    element 1 in b: array([10, 11, 12, 13, 14], dtype=uint8)
+    element 2 in b: array([20, 21, 22, 23, 24], dtype=uint8)
+    element 3 in b: array([30, 31, 32, 33, 34], dtype=uint8)
+    
+    
+
+
+Slicing and indexing
+====================
+
+Views vs. copies
+----------------
+
+``numpy`` has a very important concept called *views*, which is a
+powerful extension of ``python``\ ’s own notion of slicing. Slices are
+special python objects of the form
+
+.. code:: python
+
+   slice = start:end:stop
+
+where ``start``, ``end``, and ``stop`` are (not necessarily
+non-negative) integers. Not all of these three numbers must be specified
+in an index, in fact, all three of them can be missing. The interpreter
+takes care of filling in the missing values. (Note that slices cannot be
+defined in this way, only there, where an index is expected.) For a good
+explanation on how slices work in python, you can read the stackoverflow
+question
+https://stackoverflow.com/questions/509211/understanding-slice-notation.
+
+In order to see what slicing does, let us take the string
+``a = '012345679'``! We can extract every second character by creating
+the slice ``::2``, which is equivalent to ``0:len(a):2``, i.e.,
+increments the character pointer by 2 starting from 0, and traversing
+the string up to the very end.
+
+.. code::
+
+    # code to be run in CPython
+    
+    string = '0123456789'
+    string[::2]
+
+
+
+.. parsed-literal::
+
+    '02468'
+
+
+
+Now, we can do the same with numerical arrays.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(10), dtype=np.uint8)
+    print('a:\t', a)
+    
+    print('a[::2]:\t', a[::2])
+
+.. parsed-literal::
+
+    a:	 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)
+    a[::2]:	 array([0, 2, 4, 6, 8], dtype=uint8)
+    
+    
+
+
+This looks similar to ``string`` above, but there is a very important
+difference that is not so obvious. Namely, ``string[::2]`` produces a
+partial copy of ``string``, while ``a[::2]`` only produces a *view* of
+``a``. What this means is that ``a``, and ``a[::2]`` share their data,
+and the only difference between the two is, how the data are read out.
+In other words, internally, ``a[::2]`` has the same data pointer as
+``a``. We can easily convince ourselves that this is indeed the case by
+calling the `ndinfo <#The_ndinfo_function>`__ function: the *data
+pointer* entry is the same in the two printouts.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(10), dtype=np.uint8)
+    print('a: ', a, '\n')
+    np.ndinfo(a)
+    print('\n' + '='*20)
+    print('a[::2]: ', a[::2], '\n')
+    np.ndinfo(a[::2])
+
+.. parsed-literal::
+
+    a:  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8) 
+    
+    class: ndarray
+    shape: (10,)
+    strides: (1,)
+    itemsize: 1
+    data pointer: 0x7ff6c6193220
+    type: uint8
+    
+    ====================
+    a[::2]:  array([0, 2, 4, 6, 8], dtype=uint8) 
+    
+    class: ndarray
+    shape: (5,)
+    strides: (2,)
+    itemsize: 1
+    data pointer: 0x7ff6c6193220
+    type: uint8
+    
+    
+
+
+If you are still a bit confused about the meaning of *views*, the
+section `Slicing and assigning to
+slices <#Slicing-and-assigning-to-slices>`__ should clarify the issue.
+
+Indexing
+--------
+
+The simplest form of indexing is specifying a single integer between the
+square brackets as in
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(10), dtype=np.uint8)
+    print("a: ", a)
+    print("the first, and last element of a:\n", a[0], a[-1])
+    print("the second, and last but one element of a:\n", a[1], a[-2])
+
+.. parsed-literal::
+
+    a:  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)
+    the first, and last element of a:
+     0 9
+    the second, and last but one element of a:
+     1 8
+    
+    
+
+
+Indexing can be applied to higher-dimensional tensors, too. When the
+length of the indexing sequences is smaller than the number of
+dimensions, a new *view* is returned, otherwise, we get a single number.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8).reshape((3, 3))
+    print("a:\n", a)
+    print("a[0]:\n", a[0])
+    print("a[1,1]: ", a[1,1])
+
+.. parsed-literal::
+
+    a:
+     array([[0, 1, 2],
+    	[3, 4, 5],
+    	[6, 7, 8]], dtype=uint8)
+    a[0]:
+     array([[0, 1, 2]], dtype=uint8)
+    a[1,1]:  4
+    
+    
+
+
+Indices can also be a list of Booleans. By using a Boolean list, we can
+select those elements of an array that satisfy a specific condition. At
+the moment, such indexing is defined for row vectors only; when the rank
+of the tensor is higher than 1, the function raises a
+``NotImplementedError`` exception, though this will be rectified in a
+future version of ``ulab``.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.float)
+    print("a:\t", a)
+    print("a[a < 5]:\t", a[a < 5])
+
+.. parsed-literal::
+
+    a:	 array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float)
+    a[a < 5]:	 array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
+    
+    
+
+
+Indexing with Boolean arrays can take more complicated expressions. This
+is a very concise way of comparing two vectors, e.g.:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8)
+    b = np.array([4, 4, 4, 3, 3, 3, 13, 13, 13], dtype=np.uint8)
+    print("a:\t", a)
+    print("\na**2:\t", a*a)
+    print("\nb:\t", b)
+    print("\n100*sin(b):\t", np.sin(b)*100.0)
+    print("\na[a*a > np.sin(b)*100.0]:\t", a[a*a > np.sin(b)*100.0])
+
+.. parsed-literal::
+
+    a:	 array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    
+    a**2:	 array([0, 1, 4, 9, 16, 25, 36, 49, 64], dtype=uint16)
+    
+    b:	 array([4, 4, 4, 3, 3, 3, 13, 13, 13], dtype=uint8)
+    
+    100*sin(b):	 array([-75.68024953079282, -75.68024953079282, -75.68024953079282, 14.11200080598672, 14.11200080598672, 14.11200080598672, 42.01670368266409, 42.01670368266409, 42.01670368266409], dtype=float)
+    
+    a[a*a > np.sin(b)*100.0]:	 array([0, 1, 2, 4, 5, 7, 8], dtype=uint8)
+    
+    
+
+
+Boolean indices can also be used in assignments, if the array is
+one-dimensional. The following example replaces the data in an array,
+wherever some condition is fulfilled.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8)
+    b = np.array(range(9)) + 12
+    
+    print(a[b < 15])
+    
+    a[b < 15] = 123
+    print(a)
+
+.. parsed-literal::
+
+    array([0, 1, 2], dtype=uint8)
+    array([123, 123, 123, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    
+    
+
+
+On the right hand side of the assignment we can even have another array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(9), dtype=np.uint8)
+    b = np.array(range(9)) + 12
+    
+    print(a[b < 15], b[b < 15])
+    
+    a[b < 15] = b[b < 15]
+    print(a)
+
+.. parsed-literal::
+
+    array([0, 1, 2], dtype=uint8) array([12.0, 13.0, 14.0], dtype=float)
+    array([12, 13, 14, 3, 4, 5, 6, 7, 8], dtype=uint8)
+    
+    
+
+
+Slicing and assigning to slices
+-------------------------------
+
+You can also generate sub-arrays by specifying slices as the index of an
+array. Slices are special python objects of the form
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)
+    print('a:\n', a)
+    
+    # the first row
+    print('\na[0]:\n', a[0])
+    
+    # the first two elements of the first row
+    print('\na[0,:2]:\n', a[0,:2])
+    
+    # the zeroth element in each row (also known as the zeroth column)
+    print('\na[:,0]:\n', a[:,0])
+    
+    # the last row
+    print('\na[-1]:\n', a[-1])
+    
+    # the last two rows backwards
+    print('\na[-1:-3:-1]:\n', a[-1:-3:-1])
+
+.. parsed-literal::
+
+    a:
+     array([[1, 2, 3],
+    	[4, 5, 6],
+    	[7, 8, 9]], dtype=uint8)
+    
+    a[0]:
+     array([[1, 2, 3]], dtype=uint8)
+    
+    a[0,:2]:
+     array([[1, 2]], dtype=uint8)
+    
+    a[:,0]:
+     array([[1],
+    	[4],
+    	[7]], dtype=uint8)
+    
+    a[-1]:
+     array([[7, 8, 9]], dtype=uint8)
+    
+    a[-1:-3:-1]:
+     array([[7, 8, 9],
+    	[4, 5, 6]], dtype=uint8)
+    
+    
+
+
+Assignment to slices can be done for the whole slice, per row, and per
+column. A couple of examples should make these statements clearer:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.zeros((3, 3), dtype=np.uint8)
+    print('a:\n', a)
+    
+    # assigning to the whole row
+    a[0] = 1
+    print('\na[0] = 1\n', a)
+    
+    a = np.zeros((3, 3), dtype=np.uint8)
+    
+    # assigning to a column
+    a[:,2] = 3.0
+    print('\na[:,0]:\n', a)
+
+.. parsed-literal::
+
+    a:
+     array([[0, 0, 0],
+    	[0, 0, 0],
+    	[0, 0, 0]], dtype=uint8)
+    
+    a[0] = 1
+     array([[1, 1, 1],
+    	[0, 0, 0],
+    	[0, 0, 0]], dtype=uint8)
+    
+    a[:,0]:
+     array([[0, 0, 3],
+    	[0, 0, 3],
+    	[0, 0, 3]], dtype=uint8)
+    
+    
+
+
+Now, you should notice that we re-set the array ``a`` after the first
+assignment. Do you care to see what happens, if we do not do that? Well,
+here are the results:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.zeros((3, 3), dtype=np.uint8)
+    b = a[:,:]
+    # assign 1 to the first row
+    b[0] = 1
+    
+    # assigning to the last column
+    b[:,2] = 3
+    print('a: ', a)
+
+.. parsed-literal::
+
+    a:  array([[1, 1, 3],
+    	[0, 0, 3],
+    	[0, 0, 3]], dtype=uint8)
+    
+    
+
+
+Note that both assignments involved ``b``, and not ``a``, yet, when we
+print out ``a``, its entries are updated. This proves our earlier
+statement about the behaviour of *views*: in the statement
+``b = a[:,:]`` we simply created a *view* of ``a``, and not a *deep*
+copy of it, meaning that whenever we modify ``b``, we actually modify
+``a``, because the underlying data container of ``a`` and ``b`` are
+shared between the two object. Having a single data container for two
+seemingly different objects provides an extremely powerful way of
+manipulating sub-sets of numerical data.
+
+If you want to work on a *copy* of your data, you can use the ``.copy``
+method of the ``ndarray``. The following snippet should drive the point
+home:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.zeros((3, 3), dtype=np.uint8)
+    b = a.copy()
+    
+    # get the address of the underlying data pointer
+    
+    np.ndinfo(a)
+    print()
+    np.ndinfo(b)
+    
+    # assign 1 to the first row of b, and do not touch a
+    b[0] = 1
+    
+    print()
+    print('a: ', a)
+    print('='*20)
+    print('b: ', b)
+
+.. parsed-literal::
+
+    class: ndarray
+    shape: (3, 3)
+    strides: (3, 1)
+    itemsize: 1
+    data pointer: 0x7ff737ea3220
+    type: uint8
+    
+    class: ndarray
+    shape: (3, 3)
+    strides: (3, 1)
+    itemsize: 1
+    data pointer: 0x7ff737ea3340
+    type: uint8
+    
+    a:  array([[0, 0, 0],
+    	[0, 0, 0],
+    	[0, 0, 0]], dtype=uint8)
+    ====================
+    b:  array([[1, 1, 1],
+    	[0, 0, 0],
+    	[0, 0, 0]], dtype=uint8)
+    
+    
+
+
+The ``.copy`` method can also be applied to views: below, ``a[0]`` is a
+*view* of ``a``, out of which we create a *deep copy* called ``b``. This
+is a row vector now. We can then do whatever we want to with ``b``, and
+that leaves ``a`` unchanged.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.zeros((3, 3), dtype=np.uint8)
+    b = a[0].copy()
+    print('b: ', b)
+    print('='*20)
+    # assign 1 to the first entry of b, and do not touch a
+    b[0] = 1
+    print('a: ', a)
+    print('='*20)
+    print('b: ', b)
+
+.. parsed-literal::
+
+    b:  array([0, 0, 0], dtype=uint8)
+    ====================
+    a:  array([[0, 0, 0],
+    	[0, 0, 0],
+    	[0, 0, 0]], dtype=uint8)
+    ====================
+    b:  array([1, 0, 0], dtype=uint8)
+    
+    
+
+
+The fact that the underlying data of a view is the same as that of the
+original array has another important consequence, namely, that the
+creation of a view is cheap. Both in terms of RAM, and execution time. A
+view is really nothing but a short header with a data array that already
+exists, and is filled up. Hence, creating the view requires only the
+creation of its header. This operation is fast, and uses virtually no
+RAM.
diff --git a/tulip/shared/ulab/docs/manual/source/ulab-programming.rst b/tulip/shared/ulab/docs/manual/source/ulab-programming.rst
new file mode 100644
index 000000000..3abb3ac40
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/ulab-programming.rst
@@ -0,0 +1,911 @@
+
+Programming ulab
+================
+
+Earlier we have seen, how ``ulab``\ ’s functions and methods can be
+accessed in ``micropython``. This last section of the book explains, how
+these functions are implemented. By the end of this chapter, not only
+would you be able to extend ``ulab``, and write your own
+``numpy``-compatible functions, but through a deeper understanding of
+the inner workings of the functions, you would also be able to see what
+the trade-offs are at the ``python`` level.
+
+Code organisation
+-----------------
+
+As mentioned earlier, the ``python`` functions are organised into
+sub-modules at the C level. The C sub-modules can be found in
+``./ulab/code/``.
+
+The ``ndarray`` object
+----------------------
+
+General comments
+~~~~~~~~~~~~~~~~
+
+``ndarrays`` are efficient containers of numerical data of the same type
+(i.e., signed/unsigned chars, signed/unsigned integers or
+``mp_float_t``\ s, which, depending on the platform, are either C
+``float``\ s, or C ``double``\ s). Beyond storing the actual data in the
+void pointer ``*array``, the type definition has eight additional
+members (on top of the ``base`` type). Namely, the ``dtype``, which
+tells us, how the bytes are to be interpreted. Moreover, the
+``itemsize``, which stores the size of a single entry in the array,
+``boolean``, an unsigned integer, which determines, whether the arrays
+is to be treated as a set of Booleans, or as numerical data, ``ndim``,
+the number of dimensions (``uint8_t``), ``len``, the length of the array
+(the number of entries), the shape (``*size_t``), the strides
+(``*int32_t``). The length is simply the product of the numbers in
+``shape``.
+
+The type definition is as follows:
+
+.. code:: c
+
+   typedef struct _ndarray_obj_t {
+       mp_obj_base_t base;
+       uint8_t dtype;
+       uint8_t itemsize;
+       uint8_t boolean;
+       uint8_t ndim;
+       size_t len;
+       size_t shape[ULAB_MAX_DIMS];
+       int32_t strides[ULAB_MAX_DIMS];
+       void *array;
+   } ndarray_obj_t;
+
+Memory layout
+~~~~~~~~~~~~~
+
+The values of an ``ndarray`` are stored in a contiguous segment in the
+RAM. The ``ndarray`` can be dense, meaning that all numbers in the
+linear memory segment belong to a linar combination of coordinates, and
+it can also be sparse, i.e., some elements of the linear storage space
+will be skipped, when the elements of the tensor are traversed.
+
+In the RAM, the position of the item
+:math:`M(n_1, n_2, ..., n_{k-1}, n_k)` in a dense tensor of rank
+:math:`k` is given by the linear combination
+
+:raw-latex:`\begin{equation}
+P(n_1, n_2, ..., n_{k-1}, n_k) = n_1 s_1 + n_2 s_2 + ... + n_{k-1}s_{k-1} + n_ks_k = \sum_{i=1}^{k}n_is_i
+\end{equation}` where :math:`s_i` are the strides of the tensor, defined
+as
+
+:raw-latex:`\begin{equation}
+s_i = \prod_{j=i+1}^k l_j
+\end{equation}`
+
+where :math:`l_j` is length of the tensor along the :math:`j`\ th axis.
+When the tensor is sparse (e.g., when the tensor is sliced), the strides
+along a particular axis will be multiplied by a non-zero integer. If
+this integer is different to :math:`\pm 1`, the linear combination above
+cannot access all elements in the RAM, i.e., some numbers will be
+skipped. Note that :math:`|s_1| > |s_2| > ... > |s_{k-1}| > |s_k|`, even
+if the tensor is sparse. The statement is trivial for dense tensors, and
+it follows from the definition of :math:`s_i`. For sparse tensors, a
+slice cannot have a step larger than the shape along that axis. But for
+dense tensors, :math:`s_i/s_{i+1} = l_i`.
+
+When creating a *view*, we simply re-calculate the ``strides``, and
+re-set the ``*array`` pointer.
+
+Iterating over elements of a tensor
+-----------------------------------
+
+The ``shape`` and ``strides`` members of the array tell us how we have
+to move our pointer, when we want to read out the numbers. For technical
+reasons that will become clear later, the numbers in ``shape`` and in
+``strides`` are aligned to the right, and begin on the right hand side,
+i.e., if the number of possible dimensions is ``ULAB_MAX_DIMS``, then
+``shape[ULAB_MAX_DIMS-1]`` is the length of the last axis,
+``shape[ULAB_MAX_DIMS-2]`` is the length of the last but one axis, and
+so on. If the number of actual dimensions, ``ndim < ULAB_MAX_DIMS``, the
+first ``ULAB_MAX_DIMS - ndim`` entries in ``shape`` and ``strides`` will
+be equal to zero, but they could, in fact, be assigned any value,
+because these will never be accessed in an operation.
+
+With this definition of the strides, the linear combination in
+:math:`P(n_1, n_2, ..., n_{k-1}, n_k)` is a one-to-one mapping from the
+space of tensor coordinates, :math:`(n_1, n_2, ..., n_{k-1}, n_k)`, and
+the coordinate in the linear array,
+:math:`n_1s_1 + n_2s_2 + ... + n_{k-1}s_{k-1} + n_ks_k`, i.e., no two
+distinct sets of coordinates will result in the same position in the
+linear array.
+
+Since the ``strides`` are given in terms of bytes, when we iterate over
+an array, the void data pointer is usually cast to ``uint8_t``, and the
+values are converted using the proper data type stored in
+``ndarray->dtype``. However, there might be cases, when it makes perfect
+sense to cast ``*array`` to a different type, in which case the
+``strides`` have to be re-scaled by the value of ``ndarray->itemsize``.
+
+Iterating using the unwrapped loops
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following macro definition is taken from
+`vector.h <https://github.com/v923z/micropython-ulab/blob/master/code/numpy/vector/vector.h>`__,
+and demonstrates, how we can iterate over a single array in four
+dimensions.
+
+.. code:: c
+
+   #define ITERATE_VECTOR(type, array, source, sarray) do {
+       size_t i=0;
+       do {
+           size_t j = 0;
+           do {
+               size_t k = 0;
+               do {
+                   size_t l = 0;
+                   do {
+                       *(array)++ = f(*((type *)(sarray)));
+                       (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];
+                       l++;
+                   } while(l < (source)->shape[ULAB_MAX_DIMS-1]);
+                   (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1];
+                   (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];
+                   k++;
+               } while(k < (source)->shape[ULAB_MAX_DIMS-2]);
+               (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS-2];
+               (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];
+               j++;
+           } while(j < (source)->shape[ULAB_MAX_DIMS-3]);
+           (sarray) -= (source)->strides[ULAB_MAX_DIMS - 3] * (source)->shape[ULAB_MAX_DIMS-3];
+           (sarray) += (source)->strides[ULAB_MAX_DIMS - 4];
+           i++;
+       } while(i < (source)->shape[ULAB_MAX_DIMS-4]);
+   } while(0)
+
+We start with the innermost loop, the one recursing ``l``. ``array`` is
+already of type ``mp_float_t``, while the source array, ``sarray``, has
+been cast to ``uint8_t`` in the calling function. The numbers contained
+in ``sarray`` have to be read out in the proper type dictated by
+``ndarray->dtype``. This is what happens in the statement
+``*((type *)(sarray))``, and this number is then fed into the function
+``f``. Vectorised mathematical functions produce *dense* arrays, and for
+this reason, we can simply advance the ``array`` pointer.
+
+The advancing of the ``sarray`` pointer is a bit more involving: first,
+in the innermost loop, we simply move forward by the amount given by the
+last stride, which is ``(source)->strides[ULAB_MAX_DIMS - 1]``, because
+the ``shape`` and the ``strides`` are aligned to the right. We move the
+pointer as many times as given by ``(source)->shape[ULAB_MAX_DIMS-1]``,
+which is the length of the very last axis. Hence the the structure of
+the loop
+
+.. code:: c
+
+       size_t l = 0;
+       do {
+           ...
+           l++;
+       } while(l < (source)->shape[ULAB_MAX_DIMS-1]);
+
+Once we have exhausted the last axis, we have to re-wind the pointer,
+and advance it by an amount given by the last but one stride. Keep in
+mind that in the the innermost loop we moved our pointer
+``(source)->shape[ULAB_MAX_DIMS-1]`` times by
+``(source)->strides[ULAB_MAX_DIMS - 1]``, i.e., we re-wind it by moving
+it backwards by
+``(source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1]``.
+In the next step, we move forward by
+``(source)->strides[ULAB_MAX_DIMS - 2]``, which is the last but one
+stride.
+
+.. code:: c
+
+       (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1];
+       (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];
+
+This pattern must be repeated for each axis of the array, and this is
+how we arrive at the four nested loops listed above.
+
+Re-winding arrays by means of a function
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to un-wrapping the iteration loops by means of macros, there
+is another way of traversing all elements of a tensor: we note that,
+since :math:`|s_1| > |s_2| > ... > |s_{k-1}| > |s_k|`,
+:math:`P(n1, n2, ..., n_{k-1}, n_k)` changes most slowly in the last
+coordinate. Hence, if we start from the very beginning, (:math:`n_i = 0`
+for all :math:`i`), and walk along the linear RAM segment, we increment
+the value of :math:`n_k` as long as :math:`n_k < l_k`. Once
+:math:`n_k = l_k`, we have to reset :math:`n_k` to 0, and increment
+:math:`n_{k-1}` by one. After each such round, :math:`n_{k-1}` will be
+incremented by one, as long as :math:`n_{k-1} < l_{k-1}`. Once
+:math:`n_{k-1} = l_{k-1}`, we reset both :math:`n_k`, and
+:math:`n_{k-1}` to 0, and increment :math:`n_{k-2}` by one.
+
+Rewinding the arrays in this way is implemented in the function
+``ndarray_rewind_array`` in
+`ndarray.c <https://github.com/v923z/micropython-ulab/blob/master/code/ndarray.c>`__.
+
+.. code:: c
+
+   void ndarray_rewind_array(uint8_t ndim, uint8_t *array, size_t *shape, int32_t *strides, size_t *coords) {
+       // resets the data pointer of a single array, whenever an axis is full
+       // since we always iterate over the very last axis, we have to keep track of
+       // the last ndim-2 axes only
+       array -= shape[ULAB_MAX_DIMS - 1] * strides[ULAB_MAX_DIMS - 1];
+       array += strides[ULAB_MAX_DIMS - 2];
+       for(uint8_t i=1; i < ndim-1; i++) {
+           coords[ULAB_MAX_DIMS - 1 - i] += 1;
+           if(coords[ULAB_MAX_DIMS - 1 - i] == shape[ULAB_MAX_DIMS - 1 - i]) { // we are at a dimension boundary
+               array -= shape[ULAB_MAX_DIMS - 1 - i] * strides[ULAB_MAX_DIMS - 1 - i];
+               array += strides[ULAB_MAX_DIMS - 2 - i];
+               coords[ULAB_MAX_DIMS - 1 - i] = 0;
+               coords[ULAB_MAX_DIMS - 2 - i] += 1;
+           } else { // coordinates can change only, if the last coordinate changes
+               return;
+           }
+       }
+   }
+
+and the function would be called as in the snippet below. Note that the
+innermost loop is factored out, so that we can save the ``if(...)``
+statement for the last axis.
+
+.. code:: c
+
+       size_t *coords = ndarray_new_coords(results->ndim);
+       for(size_t i=0; i < results->len/results->shape[ULAB_MAX_DIMS -1]; i++) {
+           size_t l = 0;
+           do {
+               ...
+               l++;
+           } while(l < results->shape[ULAB_MAX_DIMS - 1]);
+           ndarray_rewind_array(results->ndim, array, results->shape, strides, coords);
+       } while(0)
+
+The advantage of this method is that the implementation is independent
+of the number of dimensions: the iteration requires more or less the
+same flash space for 2 dimensions as for 22. However, the price we have
+to pay for this convenience is the extra function call.
+
+Iterating over two ndarrays simultaneously: broadcasting
+--------------------------------------------------------
+
+Whenever we invoke a binary operator, call a function with two arguments
+of ``ndarray`` type, or assign something to an ``ndarray``, we have to
+iterate over two views at the same time. The task is trivial, if the two
+``ndarray``\ s in question have the same shape (but not necessarily the
+same set of strides), because in this case, we can still iterate in the
+same loop. All that happens is that we move two data pointers in sync.
+
+The problem becomes a bit more involving, when the shapes of the two
+``ndarray``\ s are not identical. For such cases, ``numpy`` defines
+so-called broadcasting, which boils down to two rules.
+
+1. The shapes in the tensor with lower rank has to be prepended with
+   axes of size 1 till the two ranks become equal.
+2. Along all axes the two tensors should have the same size, or one of
+   the sizes must be 1.
+
+If, after applying the first rule the second is not satisfied, the two
+``ndarray``\ s cannot be broadcast together.
+
+Now, let us suppose that we have two compatible ``ndarray``\ s, i.e.,
+after applying the first rule, the second is satisfied. How do we
+iterate over the elements in the tensors?
+
+We should recall, what exactly we do, when iterating over a single
+array: normally, we move the data pointer by the last stride, except,
+when we arrive at a dimension boundary (when the last axis is
+exhausted). At that point, we move the pointer by an amount dictated by
+the strides. And this is the key: *dictated by the strides*. Now, if we
+have two arrays that are originally not compatible, we define new
+strides for them, and use these in the iteration. With that, we are back
+to the case, where we had two compatible arrays.
+
+Now, let us look at the second broadcasting rule: if the two arrays have
+the same size, we take both ``ndarray``\ s’ strides along that axis. If,
+on the other hand, one of the ``ndarray``\ s is of length 1 along one of
+its axes, we set the corresponding strides to 0. This will ensure that
+that data pointer is not moved, when we iterate over both ``ndarray``\ s
+at the same time.
+
+Thus, in order to implement broadcasting, we first have to check,
+whether the two above-mentioned rules can be satisfied, and if so, we
+have to find the two new sets strides.
+
+The ``ndarray_can_broadcast`` function from
+`ndarray.c <https://github.com/v923z/micropython-ulab/blob/master/code/ndarray.c>`__
+takes two ``ndarray``\ s, and returns ``true``, if the two arrays can be
+broadcast together. At the same time, it also calculates new strides for
+the two arrays, so that they can be iterated over at the same time.
+
+.. code:: c
+
+   bool ndarray_can_broadcast(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t *ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+       // returns True or False, depending on, whether the two arrays can be broadcast together
+       // numpy's broadcasting rules are as follows:
+       //
+       // 1. the two shapes are either equal
+       // 2. one of the shapes is 1
+       memset(lstrides, 0, sizeof(size_t)*ULAB_MAX_DIMS);
+       memset(rstrides, 0, sizeof(size_t)*ULAB_MAX_DIMS);
+       lstrides[ULAB_MAX_DIMS - 1] = lhs->strides[ULAB_MAX_DIMS - 1];
+       rstrides[ULAB_MAX_DIMS - 1] = rhs->strides[ULAB_MAX_DIMS - 1];
+       for(uint8_t i=ULAB_MAX_DIMS; i > 0; i--) {
+           if((lhs->shape[i-1] == rhs->shape[i-1]) || (lhs->shape[i-1] == 0) || (lhs->shape[i-1] == 1) ||
+           (rhs->shape[i-1] == 0) || (rhs->shape[i-1] == 1)) {
+               shape[i-1] = MAX(lhs->shape[i-1], rhs->shape[i-1]);
+               if(shape[i-1] > 0) (*ndim)++;
+               if(lhs->shape[i-1] < 2) {
+                   lstrides[i-1] = 0;
+               } else {
+                   lstrides[i-1] = lhs->strides[i-1];
+               }
+               if(rhs->shape[i-1] < 2) {
+                   rstrides[i-1] = 0;
+               } else {
+                   rstrides[i-1] = rhs->strides[i-1];
+               }
+           } else {
+               return false;
+           }
+       }
+       return true;
+   }
+
+A good example of how the function would be called can be found in
+`vector.c <https://github.com/v923z/micropython-ulab/blob/master/code/numpy/vector/vector.c>`__,
+in the ``vector_arctan2`` function:
+
+.. code:: c
+
+   mp_obj_t vector_arctan2(mp_obj_t y, mp_obj_t x) {
+       ...
+       uint8_t ndim = 0;
+       size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+       int32_t *xstrides = m_new(int32_t, ULAB_MAX_DIMS);
+       int32_t *ystrides = m_new(int32_t, ULAB_MAX_DIMS);
+       if(!ndarray_can_broadcast(ndarray_x, ndarray_y, &ndim, shape, xstrides, ystrides)) {
+           mp_raise_ValueError(translate("operands could not be broadcast together"));
+           m_del(size_t, shape, ULAB_MAX_DIMS);
+           m_del(int32_t, xstrides, ULAB_MAX_DIMS);
+           m_del(int32_t, ystrides, ULAB_MAX_DIMS);
+       }
+
+       uint8_t *xarray = (uint8_t *)ndarray_x->array;
+       uint8_t *yarray = (uint8_t *)ndarray_y->array;
+       
+       ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+       mp_float_t *rarray = (mp_float_t *)results->array;
+       ...
+
+After the new strides have been calculated, the iteration loop is
+identical to what we discussed in the previous section.
+
+Contracting an ``ndarray``
+--------------------------
+
+There are many operations that reduce the number of dimensions of an
+``ndarray`` by 1, i.e., that remove an axis from the tensor. The drill
+is the same as before, with the exception that first we have to remove
+the ``strides`` and ``shape`` that corresponds to the axis along which
+we intend to contract. The ``numerical_reduce_axes`` function from
+`numerical.c <https://github.com/v923z/micropython-ulab/blob/master/code/numerical/numerical.c>`__
+does that.
+
+.. code:: c
+
+   static void numerical_reduce_axes(ndarray_obj_t *ndarray, int8_t axis, size_t *shape, int32_t *strides) {
+       // removes the values corresponding to a single axis from the shape and strides array
+       uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + axis;
+       if((ndarray->ndim == 1) && (axis == 0)) {
+           index = 0;
+           shape[ULAB_MAX_DIMS - 1] = 0;
+           return;
+       }
+       for(uint8_t i = ULAB_MAX_DIMS - 1; i > 0; i--) {
+           if(i > index) {
+               shape[i] = ndarray->shape[i];
+               strides[i] = ndarray->strides[i];
+           } else {
+               shape[i] = ndarray->shape[i-1];
+               strides[i] = ndarray->strides[i-1];
+           }
+       }
+   }
+
+Once the reduced ``strides`` and ``shape`` are known, we place the axis
+in question in the innermost loop, and wrap it with the loops, whose
+coordinates are in the ``strides``, and ``shape`` arrays. The
+``RUN_STD`` macro from
+`numerical.h <https://github.com/v923z/micropython-ulab/blob/master/code/numpy/numerical/numerical.h>`__
+is a good example. The macro is expanded in the
+``numerical_sum_mean_std_ndarray`` function.
+
+.. code:: c
+
+   static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t axis, uint8_t optype, size_t ddof) {
+       uint8_t *array = (uint8_t *)ndarray->array;
+       size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+       memset(shape, 0, sizeof(size_t)*ULAB_MAX_DIMS);
+       int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+       memset(strides, 0, sizeof(uint32_t)*ULAB_MAX_DIMS);
+
+       int8_t ax = mp_obj_get_int(axis);
+       if(ax < 0) ax += ndarray->ndim;
+       if((ax < 0) || (ax > ndarray->ndim - 1)) {
+           mp_raise_ValueError(translate("index out of range"));
+       }
+       numerical_reduce_axes(ndarray, ax, shape, strides);
+       uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + ax;
+       ndarray_obj_t *results = NULL;
+       uint8_t *rarray = NULL;
+       ...
+
+Here is the macro for the three-dimensional case:
+
+.. code:: c
+
+   #define RUN_STD(ndarray, type, array, results, r, shape, strides, index, div) do {
+       size_t k = 0;
+       do {
+           size_t l = 0;
+           do {
+               RUN_STD1((ndarray), type, (array), (results), (r), (index), (div));
+               (array) -= (ndarray)->strides[(index)] * (ndarray)->shape[(index)];
+               (array) += (strides)[ULAB_MAX_DIMS - 1];
+               l++;
+           } while(l < (shape)[ULAB_MAX_DIMS - 1]);
+           (array) -= (strides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];
+           (array) += (strides)[ULAB_MAX_DIMS - 3];
+           k++;
+       } while(k < (shape)[ULAB_MAX_DIMS - 2]);
+   } while(0)
+
+In ``RUN_STD``, we simply move our pointers; the calculation itself
+happens in the ``RUN_STD1`` macro below. (Note that this is the
+implementation of the numerically stable Welford algorithm.)
+
+.. code:: c
+
+   #define RUN_STD1(ndarray, type, array, results, r, index, div)
+   ({
+       mp_float_t M, m, S = 0.0, s = 0.0;
+       M = m = *(mp_float_t *)((type *)(array));
+       for(size_t i=1; i < (ndarray)->shape[(index)]; i++) {
+           (array) += (ndarray)->strides[(index)];
+           mp_float_t value = *(mp_float_t *)((type *)(array));
+           m = M + (value - M) / (mp_float_t)i;
+           s = S + (value - M) * (value - m);
+           M = m;
+           S = s;
+       }
+       (array) += (ndarray)->strides[(index)];
+       *(r)++ = MICROPY_FLOAT_C_FUN(sqrt)((ndarray)->shape[(index)] * s / (div));
+   })
+
+Upcasting
+---------
+
+When in an operation the ``dtype``\ s of two arrays are different, the
+result’s ``dtype`` will be decided by the following upcasting rules:
+
+1. Operations with two ``ndarray``\ s of the same ``dtype`` preserve
+   their ``dtype``, even when the results overflow.
+
+2. if either of the operands is a float, the result automatically
+   becomes a float
+
+3. otherwise
+
+   -  ``uint8`` + ``int8`` => ``int16``,
+
+   -  ``uint8`` + ``int16`` => ``int16``
+
+   -  ``uint8`` + ``uint16`` => ``uint16``
+
+   -  ``int8`` + ``int16`` => ``int16``
+
+   -  ``int8`` + ``uint16`` => ``uint16`` (in numpy, the result is a
+      ``int32``)
+
+   -  ``uint16`` + ``int16`` => ``float`` (in numpy, the result is a
+      ``int32``)
+
+4. When one operand of a binary operation is a generic scalar
+   ``micropython`` variable, i.e., ``mp_obj_int``, or ``mp_obj_float``,
+   it will be converted to a linear array of length 1, and with the
+   smallest ``dtype`` that can accommodate the variable in question.
+   After that the broadcasting rules apply, as described in the section
+   `Iterating over two ndarrays simultaneously:
+   broadcasting <#Iterating_over_two_ndarrays_simultaneously:_broadcasting>`__
+
+Upcasting is resolved in place, wherever it is required. Notable
+examples can be found in
+`ndarray_operators.c <https://github.com/v923z/micropython-ulab/blob/master/code/ndarray_operators.c>`__
+
+Slicing and indexing
+--------------------
+
+An ``ndarray`` can be indexed with three types of objects: integer
+scalars, slices, and another ``ndarray``, whose elements are either
+integer scalars, or Booleans. Since slice and integer indices can be
+thought of as modifications of the ``strides``, these indices return a
+view of the ``ndarray``. This statement does not hold for ``ndarray``
+indices, and therefore, the return a copy of the array.
+
+Extending ulab
+--------------
+
+The ``user`` module is disabled by default, as can be seen from the last
+couple of lines of
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__
+
+.. code:: c
+
+   // user-defined module
+   #ifndef ULAB_USER_MODULE
+   #define ULAB_USER_MODULE                (0)
+   #endif
+
+The module contains a very simple function, ``user_dummy``, and this
+function is bound to the module itself. In other words, even if the
+module is enabled, one has to ``import``:
+
+.. code:: python
+
+
+   import ulab
+   from ulab import user
+
+   user.dummy_function(2.5)
+
+which should just return 5.0. Even if ``numpy``-compatibility is
+required (i.e., if most functions are bound at the top level to ``ulab``
+directly), having to ``import`` the module has a great advantage.
+Namely, only the
+`user.h <https://github.com/v923z/micropython-ulab/blob/master/code/user/user.h>`__
+and
+`user.c <https://github.com/v923z/micropython-ulab/blob/master/code/user/user.c>`__
+files have to be modified, thus it should be relatively straightforward
+to update your local copy from
+`github <https://github.com/v923z/micropython-ulab/blob/master/>`__.
+
+Now, let us see, how we can add a more meaningful function.
+
+Creating a new ndarray
+----------------------
+
+In the `General comments <#General_comments>`__ sections we have seen
+the type definition of an ``ndarray``. This structure can be generated
+by means of a couple of functions listed in
+`ndarray.c <https://github.com/v923z/micropython-ulab/blob/master/code/ndarray.c>`__.
+
+ndarray_new_ndarray
+~~~~~~~~~~~~~~~~~~~
+
+The ``ndarray_new_ndarray`` functions is called by all other
+array-generating functions. It takes the number of dimensions, ``ndim``,
+a ``uint8_t``, the ``shape``, a pointer to ``size_t``, the ``strides``,
+a pointer to ``int32_t``, and ``dtype``, another ``uint8_t`` as its
+arguments, and returns a new array with all entries initialised to 0.
+
+Assuming that ``ULAB_MAX_DIMS > 2``, a new dense array of dimension 3,
+of ``shape`` (3, 4, 5), of ``strides`` (1000, 200, 10), and ``dtype``
+``uint16_t`` can be generated by the following instructions
+
+.. code:: c
+
+   size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+   shape[ULAB_MAX_DIMS - 1] = 5;
+   shape[ULAB_MAX_DIMS - 2] = 4;
+   shape[ULAB_MAX_DIMS - 3] = 3;
+
+   int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+   strides[ULAB_MAX_DIMS - 1] = 10;
+   strides[ULAB_MAX_DIMS - 2] = 200;
+   strides[ULAB_MAX_DIMS - 3] = 1000;
+
+   ndarray_obj_t *new_ndarray = ndarray_new_ndarray(3, shape, strides, NDARRAY_UINT16);
+
+ndarray_new_dense_ndarray
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The functions simply calculates the ``strides`` from the ``shape``, and
+calls ``ndarray_new_ndarray``. Assuming that ``ULAB_MAX_DIMS > 2``, a
+new dense array of dimension 3, of ``shape`` (3, 4, 5), and ``dtype``
+``mp_float_t`` can be generated by the following instructions
+
+.. code:: c
+
+   size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+   shape[ULAB_MAX_DIMS - 1] = 5;
+   shape[ULAB_MAX_DIMS - 2] = 4;
+   shape[ULAB_MAX_DIMS - 3] = 3;
+
+   ndarray_obj_t *new_ndarray = ndarray_new_dense_ndarray(3, shape, NDARRAY_FLOAT);
+
+ndarray_new_linear_array
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since the dimensions of a linear array are known (1), the
+``ndarray_new_linear_array`` takes the ``length``, a ``size_t``, and the
+``dtype``, an ``uint8_t``. Internally, ``ndarray_new_linear_array``
+generates the ``shape`` array, and calls ``ndarray_new_dense_array``
+with ``ndim = 1``.
+
+A linear array of length 100, and ``dtype`` ``uint8`` could be created
+by the function call
+
+.. code:: c
+
+   ndarray_obj_t *new_ndarray = ndarray_new_linear_array(100, NDARRAY_UINT8)
+
+ndarray_new_ndarray_from_tuple
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This function takes a ``tuple``, which should hold the lengths of the
+axes (in other words, the ``shape``), and the ``dtype``, and calls
+internally ``ndarray_new_dense_array``. A new ``ndarray`` can be
+generated by calling
+
+.. code:: c
+
+   ndarray_obj_t *new_ndarray = ndarray_new_ndarray_from_tuple(shape, NDARRAY_FLOAT);
+
+where ``shape`` is a tuple.
+
+ndarray_new_view
+~~~~~~~~~~~~~~~~
+
+This function crates a *view*, and takes the source, an ``ndarray``, the
+number of dimensions, an ``uint8_t``, the ``shape``, a pointer to
+``size_t``, the ``strides``, a pointer to ``int32_t``, and the offset,
+an ``int32_t`` as arguments. The offset is the number of bytes by which
+the void ``array`` pointer is shifted. E.g., the ``python`` statement
+
+.. code:: python
+
+   a = np.array([0, 1, 2, 3, 4, 5], dtype=uint8)
+   b = a[1::2]
+
+produces the array
+
+.. code:: python
+
+   array([1, 3, 5], dtype=uint8)
+
+which holds its data at position ``x0 + 1``, if ``a``\ ’s pointer is at
+``x0``. In this particular case, the offset is 1.
+
+The array ``b`` from the example above could be generated as
+
+.. code:: c
+
+   size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+   shape[ULAB_MAX_DIMS - 1] = 3;
+
+   int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);
+   strides[ULAB_MAX_DIMS - 1] = 2;
+
+   int32_t offset = 1;
+   uint8_t ndim = 1;
+
+   ndarray_obj_t *new_ndarray = ndarray_new_view(ndarray_a, ndim, shape, strides, offset);
+
+ndarray_copy_array
+~~~~~~~~~~~~~~~~~~
+
+The ``ndarray_copy_array`` function can be used for copying the contents
+of an array. Note that the target array has to be created beforehand.
+E.g., a one-to-one copy can be gotten by
+
+.. code:: c
+
+   ndarray_obj_t *new_ndarray = ndarray_new_ndarray(source->ndim, source->shape, source->strides, source->dtype);
+   ndarray_copy_array(source, new_ndarray);
+
+Note that the function cannot be used for forcing type conversion, i.e.,
+the input and output types must be identical, because the function
+simply calls the ``memcpy`` function. On the other hand, the input and
+output ``strides`` do not necessarily have to be equal.
+
+ndarray_copy_view
+~~~~~~~~~~~~~~~~~
+
+The ``ndarray_obj_t *new_ndarray = ...`` instruction can be saved by
+calling the ``ndarray_copy_view`` function with the single ``source``
+argument.
+
+Accessing data in the ndarray
+-----------------------------
+
+Having seen, how arrays can be generated and copied, it is time to look
+at how the data in an ``ndarray`` can be accessed and modified.
+
+For starters, let us suppose that the object in question comes from the
+user (i.e., via the ``micropython`` interface), First, we have to
+acquire a pointer to the ``ndarray`` by calling
+
+.. code:: c
+
+   ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(object_in);
+
+If it is not clear, whether the object is an ``ndarray`` (e.g., if we
+want to write a function that can take ``ndarray``\ s, and other
+iterables as its argument), we find this out by evaluating
+
+.. code:: c
+
+   mp_obj_is_type(object_in, &ulab_ndarray_type)
+
+which should return ``true``. Once the pointer is at our disposal, we
+can get a pointer to the underlying numerical array as discussed
+earlier, i.e.,
+
+.. code:: c
+
+   uint8_t *array = (uint8_t *)ndarray->array;
+
+If you need to find out the ``dtype`` of the array, you can get it by
+accessing the ``dtype`` member of the ``ndarray``, i.e.,
+
+.. code:: c
+
+   ndarray->dtype
+
+should be equal to ``B``, ``b``, ``H``, ``h``, or ``f``. The size of a
+single item is stored in the ``itemsize`` member. This number should be
+equal to 1, if the ``dtype`` is ``B``, or ``b``, 2, if the ``dtype`` is
+``H``, or ``h``, 4, if the ``dtype`` is ``f``, and 8 for ``d``.
+
+Boilerplate
+-----------
+
+In the next section, we will construct a function that generates the
+element-wise square of a dense array, otherwise, raises a ``TypeError``
+exception. Dense arrays can easily be iterated over, since we do not
+have to care about the ``shape`` and the ``strides``. If the array is
+sparse, the section `Iterating over elements of a
+tensor <#Iterating-over-elements-of-a-tensor>`__ should contain hints as
+to how the iteration can be implemented.
+
+The function is listed under
+`user.c <https://github.com/v923z/micropython-ulab/tree/master/code/user/>`__.
+The ``user`` module is bound to ``ulab`` in
+`ulab.c <https://github.com/v923z/micropython-ulab/tree/master/code/ulab.c>`__
+in the lines
+
+.. code:: c
+
+       #if ULAB_USER_MODULE
+           { MP_ROM_QSTR(MP_QSTR_user), MP_ROM_PTR(&ulab_user_module) },
+       #endif
+
+which assumes that at the very end of
+`ulab.h <https://github.com/v923z/micropython-ulab/tree/master/code/ulab.h>`__
+the
+
+.. code:: c
+
+   // user-defined module
+   #ifndef ULAB_USER_MODULE
+   #define ULAB_USER_MODULE                (1)
+   #endif
+
+constant has been set to 1. After compilation, you can call a particular
+``user`` function in ``python`` by importing the module first, i.e.,
+
+.. code:: python
+
+   from ulab import numpy as np
+   from ulab import user
+
+   user.some_function(...)
+
+This separation of user-defined functions from the rest of the code
+ensures that the integrity of the main module and all its functions are
+always preserved. Even in case of a catastrophic failure, you can
+exclude the ``user`` module, and start over.
+
+And now the function:
+
+.. code:: c
+
+   static mp_obj_t user_square(mp_obj_t arg) {
+       // the function takes a single dense ndarray, and calculates the 
+       // element-wise square of its entries
+       
+       // raise a TypeError exception, if the input is not an ndarray
+       if(!mp_obj_is_type(arg, &ulab_ndarray_type)) {
+           mp_raise_TypeError(translate("input must be an ndarray"));
+       }
+       ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(arg);
+       
+       // make sure that the input is a dense array
+       if(!ndarray_is_dense(ndarray)) {
+           mp_raise_TypeError(translate("input must be a dense ndarray"));
+       }
+       
+       // if the input is a dense array, create `results` with the same number of 
+       // dimensions, shape, and dtype
+       ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype);
+       
+       // since in a dense array the iteration over the elements is trivial, we 
+       // can cast the data arrays ndarray->array and results->array to the actual type
+       if(ndarray->dtype == NDARRAY_UINT8) {
+           uint8_t *array = (uint8_t *)ndarray->array;
+           uint8_t *rarray = (uint8_t *)results->array;
+           for(size_t i=0; i < ndarray->len; i++, array++) {
+               *rarray++ = (*array) * (*array);
+           }
+       } else if(ndarray->dtype == NDARRAY_INT8) {
+           int8_t *array = (int8_t *)ndarray->array;
+           int8_t *rarray = (int8_t *)results->array;
+           for(size_t i=0; i < ndarray->len; i++, array++) {
+               *rarray++ = (*array) * (*array);
+           }
+       } else if(ndarray->dtype == NDARRAY_UINT16) {
+           uint16_t *array = (uint16_t *)ndarray->array;
+           uint16_t *rarray = (uint16_t *)results->array;
+           for(size_t i=0; i < ndarray->len; i++, array++) {
+               *rarray++ = (*array) * (*array);
+           }
+       } else if(ndarray->dtype == NDARRAY_INT16) {
+           int16_t *array = (int16_t *)ndarray->array;
+           int16_t *rarray = (int16_t *)results->array;
+           for(size_t i=0; i < ndarray->len; i++, array++) {
+               *rarray++ = (*array) * (*array);
+           }
+       } else { // if we end up here, the dtype is NDARRAY_FLOAT
+           mp_float_t *array = (mp_float_t *)ndarray->array;
+           mp_float_t *rarray = (mp_float_t *)results->array;
+           for(size_t i=0; i < ndarray->len; i++, array++) {
+               *rarray++ = (*array) * (*array);
+           }        
+       }
+       // at the end, return a micropython object
+       return MP_OBJ_FROM_PTR(results);
+   }
+
+To summarise, the steps for *implementing* a function are
+
+1. If necessary, inspect the type of the input object, which is always a
+   ``mp_obj_t`` object
+2. If the input is an ``ndarray_obj_t``, acquire a pointer to it by
+   calling ``ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(arg);``
+3. Create a new array, or modify the existing one; get a pointer to the
+   data by calling ``uint8_t *array = (uint8_t *)ndarray->array;``, or
+   something equivalent
+4. Once the new data have been calculated, return a ``micropython``
+   object by calling ``MP_OBJ_FROM_PTR(...)``.
+
+The listing above contains the implementation of the function, but as
+such, it cannot be called from ``python``: it still has to be bound to
+the name space. This we do by first defining a function object in
+
+.. code:: c
+
+   MP_DEFINE_CONST_FUN_OBJ_1(user_square_obj, user_square);
+
+``micropython`` defines a number of ``MP_DEFINE_CONST_FUN_OBJ_N`` macros
+in
+`obj.h <https://github.com/micropython/micropython/blob/master/py/obj.h>`__.
+``N`` is always the number of arguments the function takes. We had a
+function definition ``static mp_obj_t user_square(mp_obj_t arg)``, i.e.,
+we dealt with a single argument.
+
+Finally, we have to bind this function object in the globals table of
+the ``user`` module:
+
+.. code:: c
+
+   static const mp_rom_map_elem_t ulab_user_globals_table[] = {
+       { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_user) },
+       { MP_OBJ_NEW_QSTR(MP_QSTR_square), (mp_obj_t)&user_square_obj },
+   };
+
+Thus, the three steps required for the definition of a user-defined
+function are
+
+1. The low-level implementation of the function itself
+2. The definition of a function object by calling
+   MP_DEFINE_CONST_FUN_OBJ_N()
+3. Binding this function object to the namespace in the
+   ``ulab_user_globals_table[]``
diff --git a/tulip/shared/ulab/docs/manual/source/ulab-tricks.rst b/tulip/shared/ulab/docs/manual/source/ulab-tricks.rst
new file mode 100644
index 000000000..4c3802ba9
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/ulab-tricks.rst
@@ -0,0 +1,268 @@
+
+Tricks
+======
+
+This section of the book discusses a couple of tricks that can be
+exploited to either speed up computations, or save on RAM. However,
+there is probably no silver bullet, and you have to evaluate your code
+in terms of execution speed (if the execution is time critical), or RAM
+used. You should also keep in mind that, if a particular code snippet is
+optimised on some hardware, there is no guarantee that on another piece
+of hardware, you will get similar improvements. Hardware implementations
+are vastly different. Some microcontrollers do not even have an FPU, so
+you should not be surprised that you get significantly different
+benchmarks. Just to underline this statement, you can study the
+`collection of benchmarks <https://github.com/thiagofe/ulab_samples>`__.
+
+Use an ``ndarray``, if you can
+------------------------------
+
+Many functions in ``ulab`` are implemented in a universal fashion,
+meaning that both generic ``micropython`` iterables, and ``ndarray``\ s
+can be passed as an argument. E.g., both
+
+.. code:: python
+
+   from ulab import numpy as np
+
+   np.sum([1, 2, 3, 4, 5])
+
+and
+
+.. code:: python
+
+   from ulab import numpy as np
+
+   a = np.array([1, 2, 3, 4, 5])
+   np.sum(a)
+
+will return the ``micropython`` variable 15 as the result. Still,
+``np.sum(a)`` is evaluated significantly faster, because in
+``np.sum([1, 2, 3, 4, 5])``, the interpreter has to fetch 5
+``micropython`` variables, convert them to ``float``, and sum the
+values, while the C type of ``a`` is known, thus the interpreter can
+invoke a single ``for`` loop for the evaluation of the ``sum``. In the
+``for`` loop, there are no function calls, the iteration simply walks
+through the pointer holding the values of ``a``, and adds the values to
+an accumulator. If the array ``a`` is already available, then you can
+gain a factor of 3 in speed by calling ``sum`` on the array, instead of
+using the list. Compared to the python implementation of the same
+functionality, the speed-up is around 40 (again, this might depend on
+the hardware).
+
+On the other hand, if the array is not available, then there is not much
+point in converting the list to an ``ndarray`` and passing that to the
+function. In fact, you should expect a slow-down: the constructor has to
+iterate over the list elements, and has to convert them to a numerical
+type. On top of that, it also has to reserve RAM for the ``ndarray``.
+
+Use a reasonable ``dtype``
+--------------------------
+
+Just as in ``numpy``, the default ``dtype`` is ``float``. But this does
+not mean that that is the most suitable one in all scenarios. If data
+are streamed from an 8-bit ADC, and you only want to know the maximum,
+or the sum, then it is quite reasonable to use ``uint8`` for the
+``dtype``. Storing the same data in ``float`` array would cost 4 or 8
+times as much RAM, with absolutely no gain. Do not rely on the default
+value of the constructor’s keyword argument, and choose one that fits!
+
+Beware the axis!
+----------------
+
+Whenever ``ulab`` iterates over multi-dimensional arrays, the outermost
+loop is the first axis, then the second axis, and so on. E.g., when the
+``sum`` of
+
+.. code:: python
+
+   a = array([[1, 2, 3, 4],
+              [5, 6, 7, 8], 
+              [9, 10, 11, 12]], dtype=uint8)
+
+is being calculated, first the data pointer walks along ``[1, 2, 3, 4]``
+(innermost loop, last axis), then is moved back to the position, where 5
+is stored (this is the nesting loop), and traverses ``[5, 6, 7, 8]``,
+and so on. Moving the pointer back to 5 is more expensive, than moving
+it along an axis, because the position of 5 has to be calculated,
+whereas moving from 5 to 6 is simply an addition to the address. Thus,
+while the matrix
+
+.. code:: python
+
+   b = array([[1, 5, 9],
+              [2, 6, 10], 
+              [3, 7, 11],
+              [4, 8, 12]], dtype=uint8)
+
+holds the same data as ``a``, the summation over the entries in ``b`` is
+slower, because the pointer has to be re-wound three times, as opposed
+to twice in ``a``. For small matrices the savings are not significant,
+but you would definitely notice the difference, if you had
+
+::
+
+   a = array(range(2000)).reshape((2, 1000))
+   b = array(range(2000)).reshape((1000, 2))
+
+The moral is that, in order to improve on the execution speed, whenever
+possible, you should try to make the last axis the longest. As a side
+note, ``numpy`` can re-arrange its loops, and puts the longest axis in
+the innermost loop. This is why the longest axis is sometimes referred
+to as the fast axis. In ``ulab``, the order of the axes is fixed.
+
+Reduce the number of artifacts
+------------------------------
+
+Before showing a real-life example, let us suppose that we want to
+interpolate uniformly sampled data, and the absolute magnitude is not
+really important, we only care about the ratios between neighbouring
+value. One way of achieving this is calling the ``interp`` functions.
+However, we could just as well work with slices.
+
+.. code::
+
+    # code to be run in CPython
+    
+    a = array([0, 10, 2, 20, 4], dtype=np.uint8)
+    b = np.zeros(9, dtype=np.uint8)
+    
+    b[::2] = 2 * a
+    b[1::2] = a[:-1] + a[1:]
+    
+    b //= 2
+    b
+
+
+
+.. parsed-literal::
+
+    array([ 0,  5, 10,  6,  2, 11, 20, 12,  4], dtype=uint8)
+
+
+
+``b`` now has values from ``a`` at every even position, and interpolates
+the values on every odd position. If only the relative magnitudes are
+important, then we can even save the division by 2, and we end up with
+
+.. code::
+
+    # code to be run in CPython
+    
+    a = array([0, 10, 2, 20, 4], dtype=np.uint8)
+    b = np.zeros(9, dtype=np.uint8)
+    
+    b[::2] = 2 * a
+    b[1::2] = a[:-1] + a[1:]
+    
+    b
+
+
+
+.. parsed-literal::
+
+    array([ 0, 10, 20, 12,  4, 22, 40, 24,  8], dtype=uint8)
+
+
+
+Importantly, we managed to keep the results in the smaller ``dtype``,
+``uint8``. Now, while the two assignments above are terse and pythonic,
+the code is not the most efficient: the right hand sides are compound
+statements, generating intermediate results. To store them, RAM has to
+be allocated. This takes time, and leads to memory fragmentation. Better
+is to write out the assignments in 4 instructions:
+
+.. code::
+
+    # code to be run in CPython
+    
+    b = np.zeros(9, dtype=np.uint8)
+    
+    b[::2] = a
+    b[::2] += a
+    b[1::2] = a[:-1]
+    b[1::2] += a[1:]
+    
+    b
+
+
+
+.. parsed-literal::
+
+    array([ 0, 10, 20, 12,  4, 22, 40, 24,  8], dtype=uint8)
+
+
+
+The results are the same, but no extra RAM is allocated, except for the
+views ``a[:-1]``, and ``a[1:]``, but those had to be created even in the
+origin implementation.
+
+Upscaling images
+~~~~~~~~~~~~~~~~
+
+And now the example: there are low-resolution thermal cameras out there.
+Low resolution might mean 8 by 8 pixels. Such a small number of pixels
+is just not reasonable to plot, no matter how small the display is. If
+you want to make the camera image a bit more pleasing, you can upscale
+(stretch) it in both dimensions. This can be done exactly as we
+up-scaled the linear array:
+
+.. code::
+
+    # code to be run in CPython
+    
+    b = np.zeros((15, 15), dtype=np.uint8)
+    
+    b[1::2,::2] = a[:-1,:]
+    b[1::2,::2] += a[1:, :]
+    b[1::2,::2] //= 2
+    b[::,1::2] = a[::,:-1:2]
+    b[::,1::2] += a[::,2::2]
+    b[::,1::2] //= 2
+Up-scaling by larger numbers can be done in a similar fashion, you
+simply have more assignments.
+
+There are cases, when one cannot do away with the intermediate results.
+Two prominent cases are the ``where`` function, and indexing by means of
+a Boolean array. E.g., in
+
+.. code::
+
+    # code to be run in CPython
+    
+    a = array([1, 2, 3, 4, 5])
+    b = a[a < 4]
+    b
+
+
+
+.. parsed-literal::
+
+    array([1, 2, 3])
+
+
+
+the expression ``a < 4`` produces the Boolean array,
+
+.. code::
+
+    # code to be run in CPython
+    
+    a < 4
+
+
+
+.. parsed-literal::
+
+    array([ True,  True,  True, False, False])
+
+
+
+If you repeatedly have such conditions in a loop, you might have to
+peridically call the garbage collector to remove the Boolean arrays that
+are used only once.
+
+.. code::
+
+    # code to be run in CPython
+    
diff --git a/tulip/shared/ulab/docs/manual/source/ulab-utils.rst b/tulip/shared/ulab/docs/manual/source/ulab-utils.rst
new file mode 100644
index 000000000..4305dfb90
--- /dev/null
+++ b/tulip/shared/ulab/docs/manual/source/ulab-utils.rst
@@ -0,0 +1,274 @@
+
+ulab utilities
+==============
+
+There might be cases, when the format of your data does not conform to
+``ulab``, i.e., there is no obvious way to map the data to any of the
+five supported ``dtype``\ s. A trivial example is an ADC or microphone
+signal with 32-bit resolution. For such cases, ``ulab`` defines the
+``utils`` module, which, at the moment, has four functions that are not
+``numpy`` compatible, but which should ease interfacing ``ndarray``\ s
+to peripheral devices.
+
+The ``utils`` module can be enabled by setting the
+``ULAB_HAS_UTILS_MODULE`` constant to 1 in
+`ulab.h <https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h>`__:
+
+.. code:: c
+
+   #ifndef ULAB_HAS_UTILS_MODULE
+   #define ULAB_HAS_UTILS_MODULE               (1)
+   #endif
+
+This still does not compile any functions into the firmware. You can add
+a function by setting the corresponding pre-processor constant to 1.
+E.g.,
+
+.. code:: c
+
+   #ifndef ULAB_UTILS_HAS_FROM_INT16_BUFFER
+   #define ULAB_UTILS_HAS_FROM_INT16_BUFFER    (1)
+   #endif
+
+from_int32_buffer, from_uint32_buffer
+-------------------------------------
+
+With the help of ``utils.from_int32_buffer``, and
+``utils.from_uint32_buffer``, it is possible to convert 32-bit integer
+buffers to ``ndarrays`` of float type. These functions have a syntax
+similar to ``numpy.frombuffer``; they support the ``count=-1``, and
+``offset=0`` keyword arguments. However, in addition, they also accept
+``out=None``, and ``byteswap=False``.
+
+Here is an example without keyword arguments
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import utils
+    
+    a = bytearray([1, 1, 0, 0, 0, 0, 0, 255])
+    print('a: ', a)
+    print()
+    print('unsigned integers: ', utils.from_uint32_buffe
+    print('original vector:\n', y)
+    print('\nspectrum:\n', a)r(a))
+    
+    b = bytearray([1, 1, 0, 0, 0, 0, 0, 255])
+    print('\nb:  ', b)
+    print()
+    print('signed integers: ', utils.from_int32_buffer(b))
+
+.. parsed-literal::
+
+    a:  bytearray(b'\x01\x01\x00\x00\x00\x00\x00\xff')
+    
+    unsigned integers:  array([257.0, 4278190080.000001], dtype=float64)
+    
+    b:   bytearray(b'\x01\x01\x00\x00\x00\x00\x00\xff')
+    
+    signed integers:  array([257.0, -16777216.0], dtype=float64)
+    
+    
+
+
+The meaning of ``count``, and ``offset`` is similar to that in
+``numpy.frombuffer``. ``count`` is the number of floats that will be
+converted, while ``offset`` would discard the first ``offset`` number of
+bytes from the buffer before the conversion.
+
+In the example above, repeated calls to either of the functions returns
+a new ``ndarray``. You can save RAM by supplying the ``out`` keyword
+argument with a pre-defined ``ndarray`` of sufficient size, in which
+case the results will be inserted into the ``ndarray``. If the ``dtype``
+of ``out`` is not ``float``, a ``TypeError`` exception will be raised.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import utils
+    
+    a = np.array([1, 2], dtype=np.float)
+    b = bytearray([1, 0, 1, 0, 0, 1, 0, 1])
+    print('b: ', b)
+    utils.from_uint32_buffer(b, out=a)
+    print('a: ', a)
+
+.. parsed-literal::
+
+    b:  bytearray(b'\x01\x00\x01\x00\x00\x01\x00\x01')
+    a:  array([65537.0, 16777472.0], dtype=float64)
+    
+    
+
+
+Finally, since there is no guarantee that the endianness of a particular
+peripheral device supplying the buffer is the same as that of the
+microcontroller, ``from_(u)intbuffer`` allows a conversion via the
+``byteswap`` keyword argument.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import utils
+    
+    a = bytearray([1, 0, 0, 0, 0, 0, 0, 1])
+    print('a: ', a)
+    print('buffer without byteswapping: ', utils.from_uint32_buffer(a))
+    print('buffer with byteswapping: ', utils.from_uint32_buffer(a, byteswap=True))
+
+.. parsed-literal::
+
+    a:  bytearray(b'\x01\x00\x00\x00\x00\x00\x00\x01')
+    buffer without byteswapping:  array([1.0, 16777216.0], dtype=float64)
+    buffer with byteswapping:  array([16777216.0, 1.0], dtype=float64)
+    
+    
+
+
+from_int16_buffer, from_uint16_buffer
+-------------------------------------
+
+These two functions are identical to ``utils.from_int32_buffer``, and
+``utils.from_uint32_buffer``, with the exception that they convert
+16-bit integers to floating point ``ndarray``\ s.
+
+spectrogram
+-----------
+
+In addition to the Fourier transform and its inverse, ``ulab`` also
+sports a function called ``spectrogram``, which returns the absolute
+value of the Fourier transform, also known as the power spectrum. This
+could be used to find the dominant spectral component in a time series.
+The positional arguments are treated in the same way as in ``fft``, and
+``ifft``. This means that, if the firmware was compiled with complex
+support and ``ULAB_FFT_IS_NUMPY_COMPATIBLE`` is defined to be 1 in
+``ulab.h``, the input can also be a complex array.
+
+And easy way to find out if the FFT is ``numpy``-compatible is to check
+the number of values ``fft.fft`` returns, when called with a single real
+argument of length other than 2:
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    if len(np.fft.fft(np.zeros(4))) == 2:
+        print('FFT is NOT numpy compatible (real and imaginary parts are treated separately)')
+    else:
+        print('FFT is numpy compatible (complex inputs/outputs)')
+
+.. parsed-literal::
+
+    FFT is numpy compatible (complex inputs/outputs)
+    
+    
+
+
+Depending on the ``numpy``-compatibility of the FFT, the ``spectrogram``
+function takes one or two positional arguments, and three keyword
+arguments. If the FFT is ``numpy`` compatible, one positional argument
+is allowed, and it is a 1D real or complex ``ndarray``. If the FFT is
+not ``numpy``-compatible, if a single argument is supplied, it will be
+treated as the real part of the input, and if two positional arguments
+are supplied, they are treated as the real and imaginary parts of the
+signal.
+
+The keyword arguments are as follows:
+
+1. ``scratchpad = None``: must be a 1D, dense, floating point array,
+   twice as long as the input array; the ``scratchpad`` will be used as
+   a temporary internal buffer to perform the Fourier transform; the
+   ``scratchpad`` can repeatedly be re-used.
+2. ``out = None``: must be a 1D, not necessarily dense, floating point
+   array that will store the results
+3. ``log = False``: must be either ``True``, or ``False``; if ``True``,
+   the ``spectrogram`` returns the logarithm of the absolute values of
+   the Fourier transform.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import utils as utils
+    
+    x = np.linspace(0, 10, num=1024)
+    y = np.sin(x)
+    
+    a = utils.spectrogram(y)
+    
+    print('original vector:\n', y)
+    print('\nspectrum:\n', a)
+
+.. parsed-literal::
+
+    original vector:
+     array([0.0, 0.009775015390171337, 0.01954909674625918, ..., -0.5275140569487312, -0.5357931822978732, -0.5440211108893697], dtype=float64)
+    
+    spectrum:
+     array([187.8635087634578, 315.3112063607119, 347.8814873399375, ..., 84.45888934298905, 347.8814873399374, 315.3112063607118], dtype=float64)
+    
+    
+
+
+As such, ``spectrogram`` is really just a shorthand for
+``np.abs(np.fft.fft(signal))``, if the FFT is ``numpy``-compatible, or
+``np.sqrt(a*a + b*b)`` if the FFT returns the real (``a``) and imaginary
+(``b``) parts separately. However, ``spectrogram`` saves significant
+amounts of RAM: the expression ``a*a + b*b`` has to allocate memory for
+``a*a``, ``b*b``, and finally, their sum. Similarly, ``np.abs`` returns
+a new array. This issue is compounded even more, if ``np.log()`` is used
+on the absolute value.
+
+In contrast, ``spectrogram`` handles all calculations in the same
+internal arrays, and allows one to re-use previously reserved RAM. This
+can be especially useful in cases, when ``spectogram`` is called
+repeatedly, as in the snippet below.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    from ulab import utils as utils
+    
+    n = 1024
+    t = np.linspace(0, 2 * np.pi, num=1024)
+    scratchpad = np.zeros(2 * n)
+    
+    for _ in range(10):
+        signal = np.sin(t)
+        utils.spectrogram(signal, out=signal, scratchpad=scratchpad, log=True)
+    
+    print('signal: ', signal)
+    
+    for _ in range(10):
+        signal = np.sin(t)
+        out = np.log(utils.spectrogram(signal))
+    
+    print('out: ', out)
+
+.. parsed-literal::
+
+    signal:  array([-27.38260169844543, 6.237834411021073, -0.4038327279002965, ..., -0.9795967096969854, -0.4038327279002969, 6.237834411021073], dtype=float64)
+    out:  array([-27.38260169844543, 6.237834411021073, -0.4038327279002965, ..., -0.9795967096969854, -0.4038327279002969, 6.237834411021073], dtype=float64)
+    
+    
+
+
+Note that ``scratchpad`` is reserved only once, and then is re-used in
+the first loop. By assigning ``signal`` to the output, we save
+additional RAM. This approach avoids the usual problem of memory
+fragmentation, which would happen in the second loop, where both
+``spectrogram``, and ``np.log`` must reserve RAM in each iteration.
+
+
diff --git a/tulip/shared/ulab/docs/numpy-fft.ipynb b/tulip/shared/ulab/docs/numpy-fft.ipynb
new file mode 100644
index 000000000..803c9239c
--- /dev/null
+++ b/tulip/shared/ulab/docs/numpy-fft.ipynb
@@ -0,0 +1,546 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-01T09:27:13.438054Z",
+     "start_time": "2020-05-01T09:27:13.191491Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:24:48.499467Z",
+     "start_time": "2022-01-07T18:24:48.488004Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-07-23T20:31:25.296014Z",
+     "start_time": "2020-07-23T20:31:25.265937Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# numpy.fft\n",
+    "\n",
+    "Functions related to Fourier transforms can be called by prepending them with `numpy.fft.`. The module defines the following two functions:\n",
+    "\n",
+    "1. [numpy.fft.fft](#fft)\n",
+    "1. [numpy.fft.ifft](#ifft)\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.fft.ifft.html\n",
+    "\n",
+    "## fft\n",
+    "\n",
+    "Since `ulab`'s `ndarray` does not support complex numbers, the invocation of the Fourier transform differs from that in `numpy`. In `numpy`, you can simply pass an array or iterable to the function, and it will be treated as a complex array:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 341,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-17T17:33:38.487729Z",
+     "start_time": "2019-10-17T17:33:38.473515Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([20.+0.j,  0.+0.j, -4.+4.j,  0.+0.j, -4.+0.j,  0.+0.j, -4.-4.j,\n",
+       "        0.+0.j])"
+      ]
+     },
+     "execution_count": 341,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fft.fft([1, 2, 3, 4, 1, 2, 3, 4])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**WARNING:** The array returned is also complex, i.e., the real and imaginary components are cast together. In `ulab`, the real and imaginary parts are treated separately: you have to pass two `ndarray`s to the function, although, the second argument is optional, in which case the imaginary part is assumed to be zero.\n",
+    "\n",
+    "**WARNING:** The function, as opposed to `numpy`, returns a 2-tuple, whose elements are two `ndarray`s, holding the real and imaginary parts of the transform separately. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-02-16T18:38:07.294862Z",
+     "start_time": "2020-02-16T18:38:07.233842Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "real part:\t array([5119.996, -5.004663, -5.004798, ..., -5.005482, -5.005643, -5.006577], dtype=float)\r\n",
+      "\r\n",
+      "imaginary part:\t array([0.0, 1631.333, 815.659, ..., -543.764, -815.6588, -1631.333], dtype=float)\r\n",
+      "\r\n",
+      "real part:\t array([5119.996, -5.004663, -5.004798, ..., -5.005482, -5.005643, -5.006577], dtype=float)\r\n",
+      "\r\n",
+      "imaginary part:\t array([0.0, 1631.333, 815.659, ..., -543.764, -815.6588, -1631.333], dtype=float)\r\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.linspace(0, 10, num=1024)\n",
+    "y = np.sin(x)\n",
+    "z = np.zeros(len(x))\n",
+    "\n",
+    "a, b = np.fft.fft(x)\n",
+    "print('real part:\\t', a)\n",
+    "print('\\nimaginary part:\\t', b)\n",
+    "\n",
+    "c, d = np.fft.fft(x, z)\n",
+    "print('\\nreal part:\\t', c)\n",
+    "print('\\nimaginary part:\\t', d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ulab with complex support\n",
+    "\n",
+    "If the `ULAB_SUPPORTS_COMPLEX`, and `ULAB_FFT_IS_NUMPY_COMPATIBLE` pre-processor constants are set to 1 in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h) as \n",
+    "\n",
+    "```c\n",
+    "// Adds support for complex ndarrays\n",
+    "#ifndef ULAB_SUPPORTS_COMPLEX\n",
+    "#define ULAB_SUPPORTS_COMPLEX               (1)\n",
+    "#endif\n",
+    "```\n",
+    "\n",
+    "```c\n",
+    "#ifndef ULAB_FFT_IS_NUMPY_COMPATIBLE\n",
+    "#define ULAB_FFT_IS_NUMPY_COMPATIBLE    (1)\n",
+    "#endif\n",
+    "```\n",
+    "then the FFT routine will behave in a `numpy`-compatible way: the single input array can either be real, in which case the imaginary part is assumed to be zero, or complex. The output is also complex. \n",
+    "\n",
+    "While `numpy`-compatibility might be a desired feature, it has one side effect, namely, the FFT routine consumes approx. 50% more RAM. The reason for this lies in the implementation details."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## ifft\n",
+    "\n",
+    "The above-mentioned rules apply to the inverse Fourier transform. The inverse is also normalised by `N`, the number of elements, as is customary in `numpy`. With the normalisation, we can ascertain that the inverse of the transform is equal to the original array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 459,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-19T13:08:17.647416Z",
+     "start_time": "2019-10-19T13:08:17.597456Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "original vector:\t array([0.0, 0.009775016, 0.0195491, ..., -0.5275068, -0.5357859, -0.5440139], dtype=float)\n",
+      "\n",
+      "real part of inverse:\t array([-2.980232e-08, 0.0097754, 0.0195494, ..., -0.5275064, -0.5357857, -0.5440133], dtype=float)\n",
+      "\n",
+      "imaginary part of inverse:\t array([-2.980232e-08, -1.451171e-07, 3.693752e-08, ..., 6.44871e-08, 9.34986e-08, 2.18336e-07], dtype=float)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.linspace(0, 10, num=1024)\n",
+    "y = np.sin(x)\n",
+    "\n",
+    "a, b = np.fft.fft(y)\n",
+    "\n",
+    "print('original vector:\\t', y)\n",
+    "\n",
+    "y, z = np.fft.ifft(a, b)\n",
+    "# the real part should be equal to y\n",
+    "print('\\nreal part of inverse:\\t', y)\n",
+    "# the imaginary part should be equal to zero\n",
+    "print('\\nimaginary part of inverse:\\t', z)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that unlike in `numpy`, the length of the array on which the Fourier transform is carried out must be a power of 2. If this is not the case, the function raises a `ValueError` exception."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ulab with complex support\n",
+    "\n",
+    "The `fft.ifft` function can also be made `numpy`-compatible by setting the `ULAB_SUPPORTS_COMPLEX`, and `ULAB_FFT_IS_NUMPY_COMPATIBLE` pre-processor constants to 1."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Computation and storage costs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### RAM\n",
+    "\n",
+    "The FFT routine of `ulab` calculates the transform in place. This means that beyond reserving space for the two `ndarray`s that will be returned (the computation uses these two as intermediate storage space), only a handful of temporary variables, all floats or 32-bit integers, are required. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Speed of FFTs\n",
+    "\n",
+    "A comment on the speed: a 1024-point transform implemented in python would cost around 90 ms, and 13 ms in assembly, if the code runs on the pyboard, v.1.1. You can gain a factor of four by moving to the D series \n",
+    "https://github.com/peterhinch/micropython-fourier/blob/master/README.md#8-performance. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 494,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-19T13:25:40.540913Z",
+     "start_time": "2019-10-19T13:25:40.509598Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "execution time:  1985  us\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.linspace(0, 10, num=1024)\n",
+    "y = np.sin(x)\n",
+    "\n",
+    "@timeit\n",
+    "def np_fft(y):\n",
+    "    return np.fft.fft(y)\n",
+    "\n",
+    "a, b = np_fft(y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The C implementation runs in less than 2 ms on the pyboard (we have just measured that), and has been reported to run in under 0.8 ms on the D series board. That is an improvement of at least a factor of four. "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/numpy-functions.ipynb b/tulip/shared/ulab/docs/numpy-functions.ipynb
new file mode 100644
index 000000000..d13278cf5
--- /dev/null
+++ b/tulip/shared/ulab/docs/numpy-functions.ipynb
@@ -0,0 +1,3018 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-13T08:28:06.727371Z",
+     "start_time": "2021-02-13T08:28:04.925338Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-01T17:37:25.505687Z",
+     "start_time": "2022-02-01T17:37:25.493850Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-01T17:37:25.717714Z",
+     "start_time": "2022-02-01T17:37:25.532299Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/build-2/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Numpy functions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This section of the manual discusses those functions that were adapted from `numpy`. Functions with an asterisk accept complex arrays as arguments, if the firmware was compiled with complex support.\n",
+    "\n",
+    "1. [numpy.all*](#all)\n",
+    "1. [numpy.any*](#any)\n",
+    "1. [numpy.argmax](#argmax)\n",
+    "1. [numpy.argmin](#argmin)\n",
+    "1. [numpy.argsort](#argsort)\n",
+    "1. [numpy.asarray*](#asarray)\n",
+    "1. [numpy.bitwise_and](#bitwise_and)\n",
+    "1. [numpy.bitwise_or](#bitwise_and)\n",
+    "1. [numpy.bitwise_xor](#bitwise_and)\n",
+    "1. [numpy.clip](#clip)\n",
+    "1. [numpy.compress*](#compress)\n",
+    "1. [numpy.conjugate*](#conjugate)\n",
+    "1. [numpy.convolve*](#convolve)\n",
+    "1. [numpy.delete](#delete)\n",
+    "1. [numpy.diff](#diff)\n",
+    "1. [numpy.dot](#dot)\n",
+    "1. [numpy.equal](#equal)\n",
+    "1. [numpy.flip*](#flip)\n",
+    "1. [numpy.imag*](#imag)\n",
+    "1. [numpy.interp](#interp)\n",
+    "1. [numpy.isfinite](#isfinite)\n",
+    "1. [numpy.isinf](#isinf)\n",
+    "1. [numpy.left_shift](#left_shift)\n",
+    "1. [numpy.load](#load)\n",
+    "1. [numpy.loadtxt](#loadtxt)\n",
+    "1. [numpy.max](#max)\n",
+    "1. [numpy.maximum](#maximum)\n",
+    "1. [numpy.mean](#mean)\n",
+    "1. [numpy.median](#median)\n",
+    "1. [numpy.min](#min)\n",
+    "1. [numpy.minimum](#minimum)\n",
+    "1. [numpy.nozero](#nonzero)\n",
+    "1. [numpy.not_equal](#equal)\n",
+    "1. [numpy.polyfit](#polyfit)\n",
+    "1. [numpy.polyval](#polyval)\n",
+    "1. [numpy.real*](#real)\n",
+    "1. [numpy.right_shift](#right_shift)\n",
+    "1. [numpy.roll](#roll)\n",
+    "1. [numpy.save](#save)\n",
+    "1. [numpy.savetxt](#savetxt)\n",
+    "1. [numpy.size](#size)\n",
+    "1. [numpy.sort](#sort)\n",
+    "1. [numpy.sort_complex*](#sort_complex)\n",
+    "1. [numpy.std](#std)\n",
+    "1. [numpy.sum](#sum)\n",
+    "1. [numpy.take*](#take)\n",
+    "1. [numpy.trace](#trace)\n",
+    "1. [numpy.trapz](#trapz)\n",
+    "1. [numpy.where](#where)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## all\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.all.html\n",
+    "\n",
+    "The function takes one positional, and one keyword argument, the `axis`, with a default value of `None`, and tests, whether *all* array elements along the given axis evaluate to `True`. If the keyword argument is `None`, the flattened array is inspected. \n",
+    "\n",
+    "Elements of an array evaluate to `True`, if they are not equal to zero, or the Boolean `False`. The return value if a Boolean `ndarray`.\n",
+    "\n",
+    "If the firmware was compiled with complex support, the function can accept complex arrays."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-08T16:54:57.117630Z",
+     "start_time": "2021-02-08T16:54:57.105337Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "all of the flattened array:\n",
+      " False\n",
+      "\n",
+      "all of a along 0th axis:\n",
+      " array([False, True, True, True], dtype=bool)\n",
+      "\n",
+      "all of a along 1st axis:\n",
+      " array([False, True, True], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(12)).reshape((3, 4))\n",
+    "\n",
+    "print('\\na:\\n', a)\n",
+    "\n",
+    "b = np.all(a)\n",
+    "print('\\nall of the flattened array:\\n', b)\n",
+    "\n",
+    "c = np.all(a, axis=0)\n",
+    "print('\\nall of a along 0th axis:\\n', c)\n",
+    "\n",
+    "d = np.all(a, axis=1)\n",
+    "print('\\nall of a along 1st axis:\\n', d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## any\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.any.html\n",
+    "\n",
+    "The function takes one positional, and one keyword argument, the `axis`, with a default value of `None`, and tests, whether *any* array element along the given axis evaluates to `True`. If the keyword argument is `None`, the flattened array is inspected. \n",
+    "\n",
+    "Elements of an array evaluate to `True`, if they are not equal to zero, or the Boolean `False`. The return value if a Boolean `ndarray`.\n",
+    "\n",
+    "If the firmware was compiled with complex support, the function can accept complex arrays."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-08T16:54:14.704132Z",
+     "start_time": "2021-02-08T16:54:14.693700Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "any of the flattened array:\n",
+      " True\n",
+      "\n",
+      "any of a along 0th axis:\n",
+      " array([True, True, True, True], dtype=bool)\n",
+      "\n",
+      "any of a along 1st axis:\n",
+      " array([True, True, True], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(12)).reshape((3, 4))\n",
+    "\n",
+    "print('\\na:\\n', a)\n",
+    "\n",
+    "b = np.any(a)\n",
+    "print('\\nany of the flattened array:\\n', b)\n",
+    "\n",
+    "c = np.any(a, axis=0)\n",
+    "print('\\nany of a along 0th axis:\\n', c)\n",
+    "\n",
+    "d = np.any(a, axis=1)\n",
+    "print('\\nany of a along 1st axis:\\n', d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## argmax\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html\n",
+    "\n",
+    "See [numpy.max](#max)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## argmin\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmin.html\n",
+    "\n",
+    "See [numpy.max](#max)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## argsort\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html\n",
+    "\n",
+    "Similarly to [sort](#sort), `argsort` takes a positional, and a keyword argument, and returns an unsigned short index array of type `ndarray` with the same dimensions as the input, or, if `axis=None`, as a row vector with length equal to the number of elements in the input (i.e., the flattened array). The indices in the output sort the input in ascending order. The routine in `argsort` is the same as in `sort`, therefore, the comments on computational expenses (time and RAM) also apply. In particular, since no copy of the original data is required, virtually no RAM beyond the output array is used. \n",
+    "\n",
+    "Since the underlying container of the output array is of type `uint16_t`, neither of the output dimensions should be larger than 65535. If that happens to be the case, the function will bail out with a `ValueError`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:33:33.292717Z",
+     "start_time": "2021-01-13T16:33:33.280144Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[1.0, 12.0, 3.0, 0.0],\n",
+      "       [5.0, 3.0, 4.0, 1.0],\n",
+      "       [9.0, 11.0, 1.0, 8.0],\n",
+      "       [7.0, 10.0, 0.0, 1.0]], dtype=float64)\n",
+      "\n",
+      "a sorted along vertical axis:\n",
+      " array([[0, 1, 3, 0],\n",
+      "       [1, 3, 2, 1],\n",
+      "       [3, 2, 0, 3],\n",
+      "       [2, 0, 1, 2]], dtype=uint16)\n",
+      "\n",
+      "a sorted along horizontal axis:\n",
+      " array([[3, 0, 2, 1],\n",
+      "       [3, 1, 2, 0],\n",
+      "       [2, 3, 0, 1],\n",
+      "       [2, 3, 0, 1]], dtype=uint16)\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dev/shm/micropython.py\", line 12, in <module>\n",
+      "NotImplementedError: argsort is not implemented for flattened arrays\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.float)\n",
+    "print('\\na:\\n', a)\n",
+    "b = np.argsort(a, axis=0)\n",
+    "print('\\na sorted along vertical axis:\\n', b)\n",
+    "\n",
+    "c = np.argsort(a, axis=1)\n",
+    "print('\\na sorted along horizontal axis:\\n', c)\n",
+    "\n",
+    "c = np.argsort(a, axis=None)\n",
+    "print('\\nflattened a sorted:\\n', c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since during the sorting, only the indices are shuffled, `argsort` does not modify the input array, as one can verify this by the following example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:34:48.446211Z",
+     "start_time": "2021-01-13T16:34:48.424276Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([0, 5, 1, 3, 2, 4], dtype=uint8)\n",
+      "\n",
+      "sorting indices:\n",
+      " array([0, 2, 4, 3, 5, 1], dtype=uint16)\n",
+      "\n",
+      "the original array:\n",
+      " array([0, 5, 1, 3, 2, 4], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([0, 5, 1, 3, 2, 4], dtype=np.uint8)\n",
+    "print('\\na:\\n', a)\n",
+    "b = np.argsort(a, axis=0)\n",
+    "print('\\nsorting indices:\\n', b)\n",
+    "print('\\nthe original array:\\n', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## asarray\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.asarray.html\n",
+    "\n",
+    "The function takes a single positional argument, and an optional keyword argument, `dtype`, with a default value of `None`. \n",
+    "\n",
+    "If the positional argument is an `ndarray`, and its `dtypes` is identical to the value of the keyword argument, or if the keyword argument is `None`, then the positional argument is simply returned. If the original `dtype`, and the value of the keyword argument are different, then a copy is returned, with appropriate `dtype` conversion. \n",
+    "\n",
+    "If the positional argument is an iterable, then the function is simply an alias for `array`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-14T20:05:22.017031Z",
+     "start_time": "2022-01-14T20:05:22.002463Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "b:array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "a == b: True\n",
+      "\n",
+      "c:array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int8)\n",
+      "a == c: False\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "b = np.asarray(a)\n",
+    "c = np.asarray(a, dtype=np.int8)\n",
+    "print('a:{}'.format(a))\n",
+    "print('b:{}'.format(b))\n",
+    "print('a == b: {}'.format(a is b))\n",
+    "\n",
+    "print('\\nc:{}'.format(c))\n",
+    "print('a == c: {}'.format(a is c))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## bitwise_and\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/routines.bitwise.html\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.bitwise_and.html\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.bitwise_or.html\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.bitwise_xor.html\n",
+    "\n",
+    "Each of `bitwise_and`, `bitwise_or`, and `bitwise_xor` takes two integer-type `ndarray`s as arguments, and returns the element-wise results of the `AND`, `OR`, and `XOR` operators. Broadcasting is supported. If the `dtype` of the input arrays is not an integer, and exception will be raised."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint8)\n",
+      "array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "bitwise_and:\n",
+      " array([0, 0, 2, 0, 4, 4, 6, 0], dtype=uint8)\n",
+      "\n",
+      "bitwise_or:\n",
+      " array([1, 3, 3, 7, 5, 7, 7, 15], dtype=uint8)\n",
+      "\n",
+      "bitwise_xor:\n",
+      " array([1, 3, 1, 7, 1, 3, 1, 15], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(8), dtype=np.uint8)\n",
+    "b = a + 1\n",
+    "\n",
+    "print(a)\n",
+    "print(b)\n",
+    "print('\\nbitwise_and:\\n', np.bitwise_and(a, b))\n",
+    "print('\\nbitwise_or:\\n', np.bitwise_or(a, b))\n",
+    "print('\\nbitwise_xor:\\n', np.bitwise_xor(a, b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## clip\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.clip.html\n",
+    "\n",
+    "Clips an array, i.e., values that are outside of an interval are clipped to the interval edges. The function is equivalent to `maximum(a_min, minimum(a, a_max))` broadcasting takes place exactly as in [minimum](#minimum). If the arrays are of different `dtype`, the output is upcast as in [Binary operators](#Binary-operators)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:22:14.147310Z",
+     "start_time": "2021-01-08T13:22:14.123961Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "clipped:\t array([3, 3, 3, 3, 4, 5, 6, 7, 7], dtype=uint8)\n",
+      "\n",
+      "a:\t\t array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "b:\t\t array([3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0], dtype=float64)\n",
+      "clipped:\t array([3.0, 3.0, 3.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "print('a:\\t\\t', a)\n",
+    "print('clipped:\\t', np.clip(a, 3, 7))\n",
+    "\n",
+    "b = 3 * np.ones(len(a), dtype=np.float)\n",
+    "print('\\na:\\t\\t', a)\n",
+    "print('b:\\t\\t', b)\n",
+    "print('clipped:\\t', np.clip(a, b, 7))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## compress\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.compress.html\n",
+    "\n",
+    "The function returns selected slices of an array along given axis. If the axis keyword is `None`, the flattened array is used.\n",
+    "\n",
+    "If the firmware was compiled with complex support, the function can accept complex arguments."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:51:44.994323Z",
+     "start_time": "2022-01-07T19:51:44.978185Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0.0, 1.0, 2.0],\n",
+      "       [3.0, 4.0, 5.0]], dtype=float64)\n",
+      "\n",
+      "compress(a):\n",
+      " array([[3.0, 4.0, 5.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(6)).reshape((2, 3))\n",
+    "\n",
+    "print('a:\\n', a)\n",
+    "print('\\ncompress(a):\\n', np.compress([0, 1], a, axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## conjugate\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.conjugate.html\n",
+    "\n",
+    "If the firmware was compiled with complex support, the function calculates the complex conjugate of the input array. If the input array is of real `dtype`, then the output is simply a copy, preserving the `dtype`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:30:53.394539Z",
+     "start_time": "2022-01-07T19:30:53.374737Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([1, 2, 3, 4], dtype=uint8)\n",
+      "conjugate(a):\t array([1, 2, 3, 4], dtype=uint8)\n",
+      "\n",
+      "b:\t\t array([1.0+1.0j, 2.0-2.0j, 3.0+3.0j, 4.0-4.0j], dtype=complex)\n",
+      "conjugate(b):\t array([1.0-1.0j, 2.0+2.0j, 3.0-3.0j, 4.0+4.0j], dtype=complex)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.uint8)\n",
+    "b = np.array([1+1j, 2-2j, 3+3j, 4-4j], dtype=np.complex)\n",
+    "\n",
+    "print('a:\\t\\t', a)\n",
+    "print('conjugate(a):\\t', np.conjugate(a))\n",
+    "print()\n",
+    "print('b:\\t\\t', b)\n",
+    "print('conjugate(b):\\t', np.conjugate(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## convolve\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html\n",
+    "\n",
+    "Returns the discrete, linear convolution of two one-dimensional arrays.\n",
+    "\n",
+    "Only the ``full`` mode is supported, and the ``mode`` named parameter is not accepted. Note that all other modes can be had by slicing a ``full`` result.\n",
+    "\n",
+    "If the firmware was compiled with complex support, the function can accept complex arrays."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T15:57:39.028884Z",
+     "start_time": "2021-01-13T15:57:39.008749Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([1.0, 12.0, 123.0, 1230.0, 2300.0, 3000.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.array((1, 2, 3))\n",
+    "y = np.array((1, 10, 100, 1000))\n",
+    "\n",
+    "print(np.convolve(x, y))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## delete\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.delete.html\n",
+    "\n",
+    "The function returns a new array with sub-arrays along an axis deleted. It takes two positional arguments, the array, and the indices, which will be removed, as well as the `axis` keyword argument with a default value of `None`. If the `axis` is `None`, the will be flattened first. \n",
+    "\n",
+    "The second positional argument can be a scalar, or any `micropython` iterable. Since `range` can also be passed in place of the indices, slicing can be emulated. If the indices are negative, the elements are counted from the end of the axis.\n",
+    "\n",
+    "Note that the function creates a copy of the indices first, because it is not guaranteed that the indices are ordered. Keep this in mind, when working with large arrays."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-12T17:03:29.099233Z",
+     "start_time": "2022-01-12T17:03:29.084117Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0, 1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8, 9],\n",
+      "       [10, 11, 12, 13, 14],\n",
+      "       [15, 16, 17, 18, 19],\n",
+      "       [20, 21, 22, 23, 24]], dtype=uint8)\n",
+      "\n",
+      "axis = 0\n",
+      " array([[0, 1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8, 9],\n",
+      "       [15, 16, 17, 18, 19],\n",
+      "       [20, 21, 22, 23, 24]], dtype=uint8)\n",
+      "\n",
+      "axis = 1\n",
+      " array([[0, 1, 2, 4],\n",
+      "       [5, 6, 7, 9],\n",
+      "       [10, 11, 12, 14],\n",
+      "       [15, 16, 17, 19],\n",
+      "       [20, 21, 22, 24]], dtype=uint8)\n",
+      "\n",
+      "axis = None\n",
+      " array([3, 4, 5, ..., 21, 23, 24], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(25), dtype=np.uint8).reshape((5,5))\n",
+    "print('a:\\n', a)\n",
+    "print('\\naxis = 0\\n', np.delete(a, 2, axis=0))\n",
+    "print('\\naxis = 1\\n', np.delete(a, -2, axis=1))\n",
+    "print('\\naxis = None\\n', np.delete(a, [0, 1, 2, 22]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## diff\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.diff.html\n",
+    "\n",
+    "The `diff` function returns the numerical derivative of the forward scheme, or more accurately, the differences of an `ndarray` along a given axis. The order of derivative can be stipulated with the `n` keyword argument, which should be between 0, and 9. Default is 1. If higher order derivatives are required, they can be gotten by repeated calls to the function. The `axis` keyword argument should  be -1 (last axis, in `ulab` equivalent to the second axis, and this also happens to be the default value), 0, or 1. \n",
+    "\n",
+    "Beyond the output array, the function requires only a couple of bytes of extra RAM for the differentiation stencil. (The stencil is an `int8` array, one byte longer than `n`. This also explains, why the highest order is 9: the coefficients of a ninth-order stencil all fit in signed bytes, while 10 would require `int16`.) Note that as usual in numerical differentiation (and also in `numpy`), the length of the respective axis will be reduced by `n` after the operation. If `n` is larger than, or equal to the length of the axis, an empty array will be returned.\n",
+    "\n",
+    "**WARNING**: the `diff` function does not implement the `prepend` and `append` keywords that can be found in `numpy`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-14T16:06:27.468909Z",
+     "start_time": "2021-01-14T16:06:27.439067Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([0, 1, 2, 10, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "first derivative:\n",
+      " array([1, 1, 8, 250, 1, 1, 1, 1], dtype=uint8)\n",
+      "\n",
+      "second derivative:\n",
+      " array([0, 249, 14, 249, 0, 0, 0], dtype=uint8)\n",
+      "\n",
+      "c:\n",
+      " array([[1.0, 2.0, 3.0, 4.0],\n",
+      "       [4.0, 3.0, 2.0, 1.0],\n",
+      "       [1.0, 4.0, 9.0, 16.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0]], dtype=float64)\n",
+      "\n",
+      "first derivative, first axis:\n",
+      " array([[3.0, 1.0, -1.0, -3.0],\n",
+      "       [-3.0, 1.0, 7.0, 15.0],\n",
+      "       [-1.0, -4.0, -9.0, -16.0]], dtype=float64)\n",
+      "\n",
+      "first derivative, second axis:\n",
+      " array([[1.0, 1.0, 1.0],\n",
+      "       [-1.0, -1.0, -1.0],\n",
+      "       [3.0, 5.0, 7.0],\n",
+      "       [0.0, 0.0, 0.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "a[3] = 10\n",
+    "print('a:\\n', a)\n",
+    "\n",
+    "print('\\nfirst derivative:\\n', np.diff(a, n=1))\n",
+    "print('\\nsecond derivative:\\n', np.diff(a, n=2))\n",
+    "\n",
+    "c = np.array([[1, 2, 3, 4], [4, 3, 2, 1], [1, 4, 9, 16], [0, 0, 0, 0]])\n",
+    "print('\\nc:\\n', c)\n",
+    "print('\\nfirst derivative, first axis:\\n', np.diff(c, axis=0))\n",
+    "print('\\nfirst derivative, second axis:\\n', np.diff(c, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## dot\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html\n",
+    "\n",
+    "\n",
+    "**WARNING:** numpy applies upcasting rules for the multiplication of matrices, while `ulab` simply returns a float matrix. \n",
+    "\n",
+    "Once you can invert a matrix, you might want to know, whether the inversion is correct. You can simply take the original matrix and its inverse, and multiply them by calling the `dot` function, which takes the two matrices as its arguments. If the matrix dimensions do not match, the function raises a `ValueError`. The result of the multiplication is expected to be the unit matrix, which is demonstrated below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-13T08:32:09.139378Z",
+     "start_time": "2021-02-13T08:32:09.122083Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "m:\n",
+      " array([[1, 2, 3],\n",
+      "       [4, 5, 6],\n",
+      "       [7, 10, 9]], dtype=uint8)\n",
+      "\n",
+      "m^-1:\n",
+      " array([[-1.25, 1.0, -0.25],\n",
+      "       [0.4999999999999998, -1.0, 0.5],\n",
+      "       [0.4166666666666668, 0.3333333333333333, -0.25]], dtype=float64)\n",
+      "\n",
+      "m*m^-1:\n",
+      " array([[1.0, 0.0, 0.0],\n",
+      "       [4.440892098500626e-16, 1.0, 0.0],\n",
+      "       [8.881784197001252e-16, 0.0, 1.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "m = np.array([[1, 2, 3], [4, 5, 6], [7, 10, 9]], dtype=np.uint8)\n",
+    "n = np.linalg.inv(m)\n",
+    "print(\"m:\\n\", m)\n",
+    "print(\"\\nm^-1:\\n\", n)\n",
+    "# this should be the unit matrix\n",
+    "print(\"\\nm*m^-1:\\n\", np.dot(m, n))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that for matrix multiplication you don't necessarily need square matrices, it is enough, if their dimensions are compatible (i.e., the the left-hand-side matrix has as many columns, as does the right-hand-side matrix rows):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-13T08:33:07.630825Z",
+     "start_time": "2021-02-13T08:33:07.608260Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8]], dtype=uint8)\n",
+      "array([[1, 2],\n",
+      "       [3, 4],\n",
+      "       [5, 6],\n",
+      "       [7, 8]], dtype=uint8)\n",
+      "array([[50.0, 60.0],\n",
+      "       [114.0, 140.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "m = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8)\n",
+    "n = np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.uint8)\n",
+    "print(m)\n",
+    "print(n)\n",
+    "print(np.dot(m, n))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## equal\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.equal.html\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.not_equal.html\n",
+    "\n",
+    "In `micropython`, equality of arrays or scalars can be established by utilising the `==`, `!=`, `<`, `>`, `<=`, or `=>` binary operators. In `circuitpython`, `==` and `!=` will produce unexpected results. In order to avoid this discrepancy, and to maintain compatibility with `numpy`, `ulab` implements the `equal` and `not_equal` operators that return the same results, irrespective of the `python` implementation.\n",
+    "\n",
+    "These two functions take two `ndarray`s, or scalars as their arguments. No keyword arguments are implemented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T14:22:13.990898Z",
+     "start_time": "2021-01-08T14:22:13.941896Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "b:  array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=float64)\n",
+      "\n",
+      "a == b:  array([True, False, False, False, False, False, False, False, False], dtype=bool)\n",
+      "a != b:  array([False, True, True, True, True, True, True, True, True], dtype=bool)\n",
+      "a == 2:  array([False, False, True, False, False, False, False, False, False], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9))\n",
+    "b = np.zeros(9)\n",
+    "\n",
+    "print('a: ', a)\n",
+    "print('b: ', b)\n",
+    "print('\\na == b: ', np.equal(a, b))\n",
+    "print('a != b: ', np.not_equal(a, b))\n",
+    "\n",
+    "# comparison with scalars\n",
+    "print('a == 2: ', np.equal(a, 2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## flip\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.flip.html\n",
+    "\n",
+    "The `flip` function takes one positional, an `ndarray`, and one keyword argument, `axis = None`, and reverses the order of elements along the given axis. If the keyword argument is `None`, the matrix' entries are flipped along all axes. `flip` returns a new copy of the array.\n",
+    "\n",
+    "If the firmware was compiled with complex support, the function can accept complex arrays."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:25:08.425583Z",
+     "start_time": "2021-01-13T16:25:08.407004Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \t array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=float64)\n",
+      "a flipped:\t array([5.0, 4.0, 3.0, 2.0, 1.0], dtype=float64)\n",
+      "\n",
+      "a flipped horizontally\n",
+      " array([[3, 2, 1],\n",
+      "       [6, 5, 4],\n",
+      "       [9, 8, 7]], dtype=uint8)\n",
+      "\n",
+      "a flipped vertically\n",
+      " array([[7, 8, 9],\n",
+      "       [4, 5, 6],\n",
+      "       [1, 2, 3]], dtype=uint8)\n",
+      "\n",
+      "a flipped horizontally+vertically\n",
+      " array([9, 8, 7, 6, 5, 4, 3, 2, 1], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5])\n",
+    "print(\"a: \\t\", a)\n",
+    "print(\"a flipped:\\t\", np.flip(a))\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)\n",
+    "print(\"\\na flipped horizontally\\n\", np.flip(a, axis=1))\n",
+    "print(\"\\na flipped vertically\\n\", np.flip(a, axis=0))\n",
+    "print(\"\\na flipped horizontally+vertically\\n\", np.flip(a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## imag\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.imag.html\n",
+    "\n",
+    "The `imag` function returns the imaginary part of an array, or scalar. It cannot accept a generic iterable as its argument. The function is defined only, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:26:42.901258Z",
+     "start_time": "2022-01-07T19:26:42.880755Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([1, 2, 3], dtype=uint16)\n",
+      "imag(a):\t array([0, 0, 0], dtype=uint16)\n",
+      "\n",
+      "b:\t\t array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)\n",
+      "imag(b):\t array([0.0, 1.0, -1.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.uint16)\n",
+    "print(\"a:\\t\\t\", a)\n",
+    "print(\"imag(a):\\t\", np.imag(a))\n",
+    "\n",
+    "b = np.array([1, 2+1j, 3-1j], dtype=np.complex)\n",
+    "print(\"\\nb:\\t\\t\", b)\n",
+    "print(\"imag(b):\\t\", np.imag(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## interp\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/numpy.interp\n",
+    "\n",
+    "The `interp` function returns the linearly interpolated values of a one-dimensional numerical array. It requires three positional arguments,`x`, at which the interpolated values are evaluated, `xp`, the array\n",
+    "of the independent data variable, and `fp`, the array of the dependent values of the data. `xp` must be a monotonically increasing sequence of numbers.\n",
+    "\n",
+    "Two keyword arguments, `left`, and `right` can also be supplied; these determine the return values, if `x < xp[0]`, and `x > xp[-1]`, respectively. If these arguments are not supplied, `left`, and `right` default to `fp[0]`, and `fp[-1]`, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:00:43.505722Z",
+     "start_time": "2021-01-13T16:00:43.489060Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0.8, 1.8, 2.8, 3.8, 4.8], dtype=float64)\n",
+      "array([1.0, 1.8, 2.8, 4.6, 5.0], dtype=float64)\n",
+      "array([0.0, 1.8, 2.8, 4.6, 5.0], dtype=float64)\n",
+      "array([1.0, 1.8, 2.8, 4.6, 10.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.array([1, 2, 3, 4, 5]) - 0.2\n",
+    "xp = np.array([1, 2, 3, 4])\n",
+    "fp = np.array([1, 2, 3, 5])\n",
+    "\n",
+    "print(x)\n",
+    "print(np.interp(x, xp, fp))\n",
+    "print(np.interp(x, xp, fp, left=0.0))\n",
+    "print(np.interp(x, xp, fp, right=10.0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## isfinite\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.isfinite.html\n",
+    "\n",
+    "Returns a Boolean array of the same shape as the input, or a `True/False`, if the input is a scalar.  In the return value, all elements are `True` at positions,  where the input value was finite. Integer types are automatically finite, therefore, if the input is of integer type, the output will be the `True` tensor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-29T21:34:42.026689Z",
+     "start_time": "2021-01-29T21:34:42.010935Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "isfinite(0):  True\n",
+      "\n",
+      "====================\n",
+      "a:\n",
+      " array([1.0, 2.0, nan], dtype=float64)\n",
+      "\n",
+      "isfinite(a):\n",
+      " array([True, True, False], dtype=bool)\n",
+      "\n",
+      "====================\n",
+      "b:\n",
+      " array([1.0, 2.0, inf], dtype=float64)\n",
+      "\n",
+      "isfinite(b):\n",
+      " array([True, True, False], dtype=bool)\n",
+      "\n",
+      "====================\n",
+      "c:\n",
+      " array([1, 2, 3], dtype=uint16)\n",
+      "\n",
+      "isfinite(c):\n",
+      " array([True, True, True], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print('isfinite(0): ', np.isfinite(0))\n",
+    "\n",
+    "a = np.array([1, 2, np.nan])\n",
+    "print('\\n' + '='*20)\n",
+    "print('a:\\n', a)\n",
+    "print('\\nisfinite(a):\\n', np.isfinite(a))\n",
+    "\n",
+    "b = np.array([1, 2, np.inf])\n",
+    "print('\\n' + '='*20)\n",
+    "print('b:\\n', b)\n",
+    "print('\\nisfinite(b):\\n', np.isfinite(b))\n",
+    "\n",
+    "c = np.array([1, 2, 3], dtype=np.uint16)\n",
+    "print('\\n' + '='*20)\n",
+    "print('c:\\n', c)\n",
+    "print('\\nisfinite(c):\\n', np.isfinite(c))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## isinf\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.isinf.html\n",
+    "\n",
+    "Similar to [isfinite](#isfinite), but the output is `True` at positions, where the input is infinite. Integer types return the `False` tensor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-29T21:35:21.938514Z",
+     "start_time": "2021-01-29T21:35:21.923741Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "isinf(0):  False\n",
+      "\n",
+      "====================\n",
+      "a:\n",
+      " array([1.0, 2.0, nan], dtype=float64)\n",
+      "\n",
+      "isinf(a):\n",
+      " array([False, False, False], dtype=bool)\n",
+      "\n",
+      "====================\n",
+      "b:\n",
+      " array([1.0, 2.0, inf], dtype=float64)\n",
+      "\n",
+      "isinf(b):\n",
+      " array([False, False, True], dtype=bool)\n",
+      "\n",
+      "====================\n",
+      "c:\n",
+      " array([1, 2, 3], dtype=uint16)\n",
+      "\n",
+      "isinf(c):\n",
+      " array([False, False, False], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print('isinf(0): ', np.isinf(0))\n",
+    "\n",
+    "a = np.array([1, 2, np.nan])\n",
+    "print('\\n' + '='*20)\n",
+    "print('a:\\n', a)\n",
+    "print('\\nisinf(a):\\n', np.isinf(a))\n",
+    "\n",
+    "b = np.array([1, 2, np.inf])\n",
+    "print('\\n' + '='*20)\n",
+    "print('b:\\n', b)\n",
+    "print('\\nisinf(b):\\n', np.isinf(b))\n",
+    "\n",
+    "c = np.array([1, 2, 3], dtype=np.uint16)\n",
+    "print('\\n' + '='*20)\n",
+    "print('c:\\n', c)\n",
+    "print('\\nisinf(c):\\n', np.isinf(c))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## left_shift\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.left_shift.html\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.right_shift.html\n",
+    "\n",
+    "`left_shift`, and `right_shift` both take two integer-type `ndarray`s, and bit-wise shift the elements of the first array by an amount given by the second array to the left, and right, respectively. Broadcasting is supported. If the `dtype` of the input arrays is not an integer, and exception will be raised."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([1, 1, 1, 1, 1, 1, 1], dtype=uint8)\n",
+      "b:  array([255, 255, 255, 255, 255, 255, 255], dtype=uint8)\n",
+      "c:  array([1, 2, 3, 4, 5, 6, 7], dtype=uint8)\n",
+      "\n",
+      "a left shifted by c:\n",
+      " array([2, 4, 8, 16, 32, 64, 128], dtype=uint8)\n",
+      "\n",
+      "b right shifted by c:\n",
+      " array([127, 63, 31, 15, 7, 3, 1], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.ones(7, dtype=np.uint8)\n",
+    "b = np.zeros(7, dtype=np.uint8) + 255\n",
+    "c = np.array(range(7), dtype=np.uint8) + 1\n",
+    "\n",
+    "print('a: ', a)\n",
+    "print('b: ', b)\n",
+    "print('c: ', c)\n",
+    "print('\\na left shifted by c:\\n', np.left_shift(a, c))\n",
+    "print('\\nb right shifted by c:\\n', np.right_shift(b, c))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## load\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html\n",
+    "\n",
+    "The function reads data from a file in `numpy`'s [platform-independent format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format), and returns the generated array. If the endianness of the data in the file and the microcontroller differ, the bytes are automatically swapped."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-12T19:11:10.361592Z",
+     "start_time": "2022-01-12T19:11:10.342439Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[0.0, 1.0, 2.0, 3.0, 4.0],\n",
+      "       [5.0, 6.0, 7.0, 8.0, 9.0],\n",
+      "       [10.0, 11.0, 12.0, 13.0, 14.0],\n",
+      "       [15.0, 16.0, 17.0, 18.0, 19.0],\n",
+      "       [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.load('a.npy')\n",
+    "print(a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## loadtxt\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html\n",
+    "\n",
+    "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the following keyword arguments:\n",
+    "\n",
+    "1. `comments='#'`\n",
+    "1. `dtype=float`\n",
+    "1. `delimiter=','`\n",
+    "1. `max_rows` (with a default of all rows) \n",
+    "1. `skip_rows=0`\n",
+    "1. `usecols` (with a default of all columns)\n",
+    "\n",
+    "If `dtype` is supplied and is not `float`, the data entries will be converted to the appropriate integer type by rounding the values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-01T17:41:22.384706Z",
+     "start_time": "2022-02-01T17:41:22.362821Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "read all data\n",
+      "array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0],\n",
+      "       [12.0, 13.0, 14.0, 15.0],\n",
+      "       [16.0, 17.0, 18.0, 19.0],\n",
+      "       [20.0, 21.0, 22.0, 23.0],\n",
+      "       [24.0, 25.0, 26.0, 27.0],\n",
+      "       [28.00000000000001, 29.0, 30.0, 31.0],\n",
+      "       [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)\n",
+      "\n",
+      "read maximum 5 rows (first row is a comment line)\n",
+      "array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0],\n",
+      "       [12.0, 13.0, 14.0, 15.0]], dtype=float64)\n",
+      "\n",
+      "read maximum 5 rows, convert dtype (first row is a comment line)\n",
+      "array([[0, 1, 2, 3],\n",
+      "       [4, 5, 6, 7],\n",
+      "       [8, 9, 10, 11],\n",
+      "       [12, 13, 14, 15]], dtype=uint8)\n",
+      "\n",
+      "skip the first 3 rows, convert dtype (first row is a comment line)\n",
+      "array([[8, 9, 10, 11],\n",
+      "       [12, 13, 14, 15],\n",
+      "       [16, 17, 18, 19],\n",
+      "       [20, 21, 22, 23],\n",
+      "       [24, 25, 26, 27],\n",
+      "       [28, 29, 30, 31],\n",
+      "       [32, 33, 34, 35]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print('read all data')\n",
+    "print(np.loadtxt('loadtxt.dat'))\n",
+    "\n",
+    "print('\\nread maximum 5 rows (first row is a comment line)')\n",
+    "print(np.loadtxt('loadtxt.dat', max_rows=5))\n",
+    "\n",
+    "print('\\nread maximum 5 rows, convert dtype (first row is a comment line)')\n",
+    "print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))\n",
+    "\n",
+    "print('\\nskip the first 3 rows, convert dtype (first row is a comment line)')\n",
+    "print(np.loadtxt('loadtxt.dat', skiprows=3, dtype=np.uint8))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mean\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.mean.html\n",
+    "\n",
+    "If the axis keyword is not specified, it assumes the default value of `None`, and returns the result of the computation for the flattened array. Otherwise, the calculation is along the given axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:15:39.921212Z",
+     "start_time": "2021-01-13T16:15:39.908217Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \n",
+      " array([[1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0],\n",
+      "       [7.0, 8.0, 9.0]], dtype=float64)\n",
+      "mean, flat:  5.0\n",
+      "mean, horizontal:  array([2.0, 5.0, 8.0], dtype=float64)\n",
+      "mean, vertical:  array([4.0, 5.0, 6.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
+    "print('a: \\n', a)\n",
+    "print('mean, flat: ', np.mean(a))\n",
+    "print('mean, horizontal: ', np.mean(a, axis=1))\n",
+    "print('mean, vertical: ', np.mean(a, axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## max\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.max.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.min.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmin.html\n",
+    "\n",
+    "**WARNING:** Difference to `numpy`: the `out` keyword argument is not implemented.\n",
+    "\n",
+    "These functions follow the same pattern, and work with generic iterables, and `ndarray`s. `min`, and `max` return the minimum or maximum of a sequence. If the input array is two-dimensional, the `axis` keyword argument can be supplied, in which case the minimum/maximum along the given axis will be returned. If `axis=None` (this is also the default value), the minimum/maximum of the flattened array will be determined.\n",
+    "\n",
+    "`argmin/argmax` return the position (index) of the minimum/maximum in the sequence."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:08:56.986619Z",
+     "start_time": "2021-01-13T16:08:56.964492Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: array([1.0, 2.0, 0.0, 1.0, 10.0], dtype=float64)\n",
+      "min of a: 0.0\n",
+      "argmin of a: 2\n",
+      "\n",
+      "b:\n",
+      " array([[1.0, 2.0, 0.0],\n",
+      "       [1.0, 10.0, -1.0]], dtype=float64)\n",
+      "min of b (flattened): -1.0\n",
+      "min of b (axis=0): array([1.0, 2.0, -1.0], dtype=float64)\n",
+      "min of b (axis=1): array([0.0, -1.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 0, 1, 10])\n",
+    "print('a:', a)\n",
+    "print('min of a:', np.min(a))\n",
+    "print('argmin of a:', np.argmin(a))\n",
+    "\n",
+    "b = np.array([[1, 2, 0], [1, 10, -1]])\n",
+    "print('\\nb:\\n', b)\n",
+    "print('min of b (flattened):', np.min(b))\n",
+    "print('min of b (axis=0):', np.min(b, axis=0))\n",
+    "print('min of b (axis=1):', np.min(b, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## median\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.median.html\n",
+    "\n",
+    "The function computes the median along the specified axis, and returns the median of the array elements. If the `axis` keyword argument is `None`, the arrays is flattened first. The `dtype` of the results is always float."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:31:13.833800Z",
+     "start_time": "2021-01-13T16:31:13.809560Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0, 1, 2, 3],\n",
+      "       [4, 5, 6, 7],\n",
+      "       [8, 9, 10, 11]], dtype=int8)\n",
+      "\n",
+      "median of the flattened array:  5.5\n",
+      "\n",
+      "median along the vertical axis:  array([4.0, 5.0, 6.0, 7.0], dtype=float64)\n",
+      "\n",
+      "median along the horizontal axis:  array([1.5, 5.5, 9.5], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(12), dtype=np.int8).reshape((3, 4))\n",
+    "print('a:\\n', a)\n",
+    "print('\\nmedian of the flattened array: ', np.median(a))\n",
+    "print('\\nmedian along the vertical axis: ', np.median(a, axis=0))\n",
+    "print('\\nmedian along the horizontal axis: ', np.median(a, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## min\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.min.html\n",
+    "\n",
+    "See [numpy.max](#max)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## minimum\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.minimum.html\n",
+    "\n",
+    "See [numpy.maximum](#maximum)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## maximum\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.maximum.html\n",
+    "\n",
+    "Returns the maximum of two arrays, or two scalars, or an array, and a scalar. If the arrays are of different `dtype`, the output is upcast as in [Binary operators](#Binary-operators). If both inputs are scalars, a scalar is returned. Only positional arguments are implemented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:21:17.151280Z",
+     "start_time": "2021-01-08T13:21:17.123768Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "minimum of a, and b:\n",
+      "array([1.0, 2.0, 3.0, 2.0, 1.0], dtype=float64)\n",
+      "\n",
+      "maximum of a, and b:\n",
+      "array([5.0, 4.0, 3.0, 4.0, 5.0], dtype=float64)\n",
+      "\n",
+      "maximum of 1, and 5.5:\n",
+      "5.5\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)\n",
+    "b = np.array([5, 4, 3, 2, 1], dtype=np.float)\n",
+    "print('minimum of a, and b:')\n",
+    "print(np.minimum(a, b))\n",
+    "\n",
+    "print('\\nmaximum of a, and b:')\n",
+    "print(np.maximum(a, b))\n",
+    "\n",
+    "print('\\nmaximum of 1, and 5.5:')\n",
+    "print(np.maximum(1, 5.5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## nonzero\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.nonzero.html\n",
+    "\n",
+    "`nonzero` returns the indices of the elements of an array that are not zero. If the number of dimensions of the array is larger than one, a tuple of arrays is returned, one for each dimension, containing the indices of the non-zero elements in that dimension."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=float64)\n",
+      "(array([0, 1, 2, 3, 4, 6, 7, 8], dtype=uint16),)\n",
+      "\n",
+      "a:\n",
+      " array([[-5.0, -4.0, -3.0],\n",
+      "       [-2.0, -1.0, 0.0],\n",
+      "       [1.0, 2.0, 3.0]], dtype=float64)\n",
+      "(array([0, 0, 0, 1, 1, 2, 2, 2], dtype=uint16), array([0, 1, 2, 0, 1, 0, 1, 2], dtype=uint16))\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9)) - 5\n",
+    "print('a:\\n', a)\n",
+    "print(np.nonzero(a))\n",
+    "\n",
+    "a = a.reshape((3,3))\n",
+    "print('\\na:\\n', a)\n",
+    "print(np.nonzero(a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## not_equal\n",
+    "\n",
+    "See [numpy.equal](#equal)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## polyfit\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyfit.html\n",
+    "\n",
+    "`polyfit` takes two, or three arguments. The last one is the degree of the polynomial that will be fitted, the last but one is an array or iterable with the `y` (dependent) values, and the first one, an array or iterable with the `x` (independent) values, can be dropped. If that is the case, `x` will be generated in the function as `range(len(y))`.\n",
+    "\n",
+    "If the lengths of `x`, and `y` are not the same, the function raises a `ValueError`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:23:39.238450Z",
+     "start_time": "2021-01-13T18:23:39.221063Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "independent values:\t array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=float64)\n",
+      "dependent values:\t array([9.0, 4.0, 1.0, 0.0, 1.0, 4.0, 9.0], dtype=float64)\n",
+      "fitted values:\t\t array([1.0, -6.0, 9.000000000000004], dtype=float64)\n",
+      "\n",
+      "dependent values:\t array([9.0, 4.0, 1.0, 0.0, 1.0, 4.0, 9.0], dtype=float64)\n",
+      "fitted values:\t\t array([1.0, -6.0, 9.000000000000004], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.array([0, 1, 2, 3, 4, 5, 6])\n",
+    "y = np.array([9, 4, 1, 0, 1, 4, 9])\n",
+    "print('independent values:\\t', x)\n",
+    "print('dependent values:\\t', y)\n",
+    "print('fitted values:\\t\\t', np.polyfit(x, y, 2))\n",
+    "\n",
+    "# the same with missing x\n",
+    "print('\\ndependent values:\\t', y)\n",
+    "print('fitted values:\\t\\t', np.polyfit(y, 2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Execution time\n",
+    "\n",
+    "`polyfit` is based on the inversion of a matrix (there is more on the background in  https://en.wikipedia.org/wiki/Polynomial_regression), and it requires the intermediate storage of `2*N*(deg+1)` floats, where `N` is the number of entries in the input array, and `deg` is the fit's degree. The additional computation costs of the matrix inversion discussed in [linalg.inv](#inv) also apply. The example from above needs around 150 microseconds to return:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:31:40.919764Z",
+     "start_time": "2021-01-13T18:31:40.912817Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "execution time:  153  us\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "@timeit\n",
+    "def time_polyfit(x, y, n):\n",
+    "    return np.polyfit(x, y, n)\n",
+    "\n",
+    "x = np.array([0, 1, 2, 3, 4, 5, 6])\n",
+    "y = np.array([9, 4, 1, 0, 1, 4, 9])\n",
+    "\n",
+    "time_polyfit(x, y, 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## polyval\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyval.html\n",
+    "\n",
+    "`polyval` takes two arguments, both arrays or generic `micropython` iterables returning scalars."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:12:56.736643Z",
+     "start_time": "2021-01-13T18:12:56.668042Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "coefficients:  [1, 1, 1, 0]\n",
+      "independent values:  [0, 1, 2, 3, 4]\n",
+      "\n",
+      "values of p(x):  array([0.0, 3.0, 14.0, 39.0, 84.0], dtype=float64)\n",
+      "\n",
+      "ndarray (a):  array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float64)\n",
+      "value of p(a):  array([0.0, 3.0, 14.0, 39.0, 84.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "p = [1, 1, 1, 0]\n",
+    "x = [0, 1, 2, 3, 4]\n",
+    "print('coefficients: ', p)\n",
+    "print('independent values: ', x)\n",
+    "print('\\nvalues of p(x): ', np.polyval(p, x))\n",
+    "\n",
+    "# the same works with one-dimensional ndarrays\n",
+    "a = np.array(x)\n",
+    "print('\\nndarray (a): ', a)\n",
+    "print('value of p(a): ', np.polyval(p, a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## real\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.real.html\n",
+    "\n",
+    "The `real` function returns the real part of an array, or scalar. It cannot accept a generic iterable as its argument. The function is defined only, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:27:22.141930Z",
+     "start_time": "2022-01-07T19:27:22.122577Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([1, 2, 3], dtype=uint16)\n",
+      "real(a):\t array([1, 2, 3], dtype=uint16)\n",
+      "\n",
+      "b:\t\t array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)\n",
+      "real(b):\t array([1.0, 2.0, 3.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.uint16)\n",
+    "print(\"a:\\t\\t\", a)\n",
+    "print(\"real(a):\\t\", np.real(a))\n",
+    "\n",
+    "b = np.array([1, 2+1j, 3-1j], dtype=np.complex)\n",
+    "print(\"\\nb:\\t\\t\", b)\n",
+    "print(\"real(b):\\t\", np.real(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## roll\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.roll.html\n",
+    "\n",
+    "The roll function shifts the content of a vector by the positions given as the second argument. If the `axis` keyword is supplied, the shift is applied to the given axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:18:30.387043Z",
+     "start_time": "2021-01-13T16:18:30.363374Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "a rolled to the left:\t array([7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=float64)\n",
+      "a rolled to the right:\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5, 6, 7, 8])\n",
+    "print(\"a:\\t\\t\\t\", a)\n",
+    "\n",
+    "a = np.roll(a, 2)\n",
+    "print(\"a rolled to the left:\\t\", a)\n",
+    "\n",
+    "# this should be the original vector\n",
+    "a = np.roll(a, -2)\n",
+    "print(\"a rolled to the right:\\t\", a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Rolling works with matrices, too. If the `axis` keyword is 0, the matrix is rolled along its vertical axis, otherwise, horizontally. \n",
+    "\n",
+    "Horizontal rolls are faster, because they require fewer steps, and larger memory chunks are copied, however, they also require more RAM: basically the whole row must be stored internally. Most expensive are the `None` keyword values, because with `axis = None`, the array is flattened first, hence the row's length is the size of the whole matrix.\n",
+    "\n",
+    "Vertical rolls require two internal copies of single columns. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:23:52.025977Z",
+     "start_time": "2021-01-13T16:23:52.001252Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "a rolled up:\n",
+      " array([[4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0],\n",
+      "       [0.0, 1.0, 2.0, 3.0]], dtype=float64)\n",
+      "a:\n",
+      " array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "a rolled to the left:\n",
+      " array([[1.0, 2.0, 3.0, 0.0],\n",
+      "       [5.0, 6.0, 7.0, 4.0],\n",
+      "       [9.0, 10.0, 11.0, 8.0]], dtype=float64)\n",
+      "a:\n",
+      " array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "a rolled with None:\n",
+      " array([[11.0, 0.0, 1.0, 2.0],\n",
+      "       [3.0, 4.0, 5.0, 6.0],\n",
+      "       [7.0, 8.0, 9.0, 10.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(12)).reshape((3, 4))\n",
+    "print(\"a:\\n\", a)\n",
+    "a = np.roll(a, 2, axis=0)\n",
+    "print(\"\\na rolled up:\\n\", a)\n",
+    "\n",
+    "a = np.array(range(12)).reshape((3, 4))\n",
+    "print(\"a:\\n\", a)\n",
+    "a = np.roll(a, -1, axis=1)\n",
+    "print(\"\\na rolled to the left:\\n\", a)\n",
+    "\n",
+    "a = np.array(range(12)).reshape((3, 4))\n",
+    "print(\"a:\\n\", a)\n",
+    "a = np.roll(a, 1, axis=None)\n",
+    "print(\"\\na rolled with None:\\n\", a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## save\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html\n",
+    "\n",
+    "With the help of this function, numerical array can be saved in `numpy`'s [platform-independent format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format).\n",
+    "\n",
+    "The function takes two positional arguments, the name of the output file, and the array. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-15T08:51:08.827144Z",
+     "start_time": "2022-01-15T08:51:08.813813Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "a = np.array(range(25)).reshape((5, 5))\n",
+    "np.save('a.npy', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## savetxt\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.savetxt.html\n",
+    "\n",
+    "With the help of this function, numerical array can be saved in a text file. The function takes two positional arguments, the name of the output file, and the array, and also implements the `comments='#'`\n",
+    "`delimiter=' '`, the `header=''`, and `footer=''` keyword arguments. The input is treated as of type `float`, i.e., the output is always in the floating point representation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-28T18:56:06.933706Z",
+     "start_time": "2022-01-28T18:56:06.872547Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.000000000000000 1.000000000000000 2.000000000000000 3.000000000000000\n",
+      "4.000000000000000 5.000000000000000 6.000000000000000 7.000000000000000\n",
+      "8.000000000000000 9.000000000000000 10.000000000000000 11.000000000000000\n",
+      "\n",
+      "!col1;col2;col3;col4\n",
+      "0.000000000000000;1.000000000000000;2.000000000000000;3.000000000000000\n",
+      "4.000000000000000;5.000000000000000;6.000000000000000;7.000000000000000\n",
+      "8.000000000000000;9.000000000000000;10.000000000000000;11.000000000000000\n",
+      "!saved data\n",
+      "\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(12), dtype=np.uint8).reshape((3, 4))\n",
+    "np.savetxt('savetxt.dat', a)\n",
+    "\n",
+    "with open('savetxt.dat', 'r') as fin:\n",
+    "    print(fin.read())\n",
+    "    \n",
+    "np.savetxt('savetxt.dat', a, \n",
+    "           comments='!', \n",
+    "           delimiter=';', \n",
+    "           header='col1;col2;col3;col4', \n",
+    "           footer='saved data')\n",
+    "\n",
+    "with open('savetxt.dat', 'r') as fin:\n",
+    "    print(fin.read())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## size\n",
+    "\n",
+    "The function takes a single positional argument, and an optional keyword argument, `axis`, with a default value of `None`, and returns the size of an array along that axis. If `axis` is `None`, the total length of the array (the product of the elements of its shape) is returned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-15T08:50:57.254168Z",
+     "start_time": "2022-01-15T08:50:57.245772Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[1.0, 1.0, 1.0],\n",
+      "       [1.0, 1.0, 1.0]], dtype=float64)\n",
+      "size(a, axis=0):  2\n",
+      "size(a, axis=1):  3\n",
+      "size(a, axis=None):  6\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.ones((2, 3))\n",
+    "\n",
+    "print(a)\n",
+    "print('size(a, axis=0): ', np.size(a, axis=0))\n",
+    "print('size(a, axis=1): ', np.size(a, axis=1))\n",
+    "print('size(a, axis=None): ', np.size(a, axis=None))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sort\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.sort.html\n",
+    "\n",
+    "The sort function takes an ndarray, and sorts its elements in ascending order along the specified axis using a heap sort algorithm. As opposed to the `.sort()` method discussed earlier, this function creates a copy of its input before sorting, and at the end, returns this copy. Sorting takes place in place, without auxiliary storage. The `axis` keyword argument takes on the possible values of -1 (the last axis, in `ulab` equivalent to the second axis, and this also happens to be the default value), 0, 1, or `None`. The first three cases are identical to those in [diff](#diff), while the last one flattens the array before sorting. \n",
+    "\n",
+    "If descending order is required, the result can simply be `flip`ped, see [flip](#flip).\n",
+    "\n",
+    "**WARNING:** `numpy` defines the `kind`, and `order` keyword arguments that are not implemented here. The function in `ulab` always uses heap sort, and since `ulab` does not have the concept of data fields, the `order` keyword argument would have no meaning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:32:07.748972Z",
+     "start_time": "2021-01-13T16:32:07.730498Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[1.0, 12.0, 3.0, 0.0],\n",
+      "       [5.0, 3.0, 4.0, 1.0],\n",
+      "       [9.0, 11.0, 1.0, 8.0],\n",
+      "       [7.0, 10.0, 0.0, 1.0]], dtype=float64)\n",
+      "\n",
+      "a sorted along vertical axis:\n",
+      " array([[1.0, 3.0, 0.0, 0.0],\n",
+      "       [5.0, 10.0, 1.0, 1.0],\n",
+      "       [7.0, 11.0, 3.0, 1.0],\n",
+      "       [9.0, 12.0, 4.0, 8.0]], dtype=float64)\n",
+      "\n",
+      "a sorted along horizontal axis:\n",
+      " array([[0.0, 1.0, 3.0, 12.0],\n",
+      "       [1.0, 3.0, 4.0, 5.0],\n",
+      "       [1.0, 8.0, 9.0, 11.0],\n",
+      "       [0.0, 1.0, 7.0, 10.0]], dtype=float64)\n",
+      "\n",
+      "flattened a sorted:\n",
+      " array([0.0, 0.0, 1.0, ..., 10.0, 11.0, 12.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.float)\n",
+    "print('\\na:\\n', a)\n",
+    "b = np.sort(a, axis=0)\n",
+    "print('\\na sorted along vertical axis:\\n', b)\n",
+    "\n",
+    "c = np.sort(a, axis=1)\n",
+    "print('\\na sorted along horizontal axis:\\n', c)\n",
+    "\n",
+    "c = np.sort(a, axis=None)\n",
+    "print('\\nflattened a sorted:\\n', c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Heap sort requires $\\sim N\\log N$ operations, and notably, the worst case costs only 20% more time than the average. In order to get an order-of-magnitude estimate, we will take the sine of 1000 uniformly spaced numbers between 0, and two pi, and sort them:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "@timeit\n",
+    "def sort_time(array):\n",
+    "    return nup.sort(array)\n",
+    "\n",
+    "b = np.sin(np.linspace(0, 6.28, num=1000))\n",
+    "print('b: ', b)\n",
+    "sort_time(b)\n",
+    "print('\\nb sorted:\\n', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sort_complex\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.sort_complex.html\n",
+    "\n",
+    "If the firmware was compiled with complex support, the functions sorts the input array first according to its real part, and then the imaginary part. The input must be a one-dimensional array. The output is always of `dtype` complex, even if the input was real integer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:36:15.750029Z",
+     "start_time": "2022-01-07T19:36:15.732210Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t\t array([5, 4, 3, 2, 1], dtype=int16)\n",
+      "sort_complex(a):\t array([1.0+0.0j, 2.0+0.0j, 3.0+0.0j, 4.0+0.0j, 5.0+0.0j], dtype=complex)\n",
+      "\n",
+      "b:\t\t\t array([5.0+0.0j, 4.0+3.0j, 4.0-2.0j, 0.0+0.0j, 0.0+1.0j], dtype=complex)\n",
+      "sort_complex(b):\t array([0.0+0.0j, 0.0+1.0j, 4.0-2.0j, 4.0+3.0j, 5.0+0.0j], dtype=complex)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([5, 4, 3, 2, 1], dtype=np.int16)\n",
+    "print('a:\\t\\t\\t', a)\n",
+    "print('sort_complex(a):\\t', np.sort_complex(a))\n",
+    "print()\n",
+    "\n",
+    "b = np.array([5, 4+3j, 4-2j, 0, 1j], dtype=np.complex)\n",
+    "print('b:\\t\\t\\t', b)\n",
+    "print('sort_complex(b):\\t', np.sort_complex(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## std\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html\n",
+    "\n",
+    "If the axis keyword is not specified, it assumes the default value of `None`, and returns the result of the computation for the flattened array. Otherwise, the calculation is along the given axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:14:54.051061Z",
+     "start_time": "2021-01-13T16:14:54.029924Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \n",
+      " array([[1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0],\n",
+      "       [7.0, 8.0, 9.0]], dtype=float64)\n",
+      "sum, flat array:  2.581988897471611\n",
+      "std, vertical:  array([2.449489742783178, 2.449489742783178, 2.449489742783178], dtype=float64)\n",
+      "std, horizonal:  array([0.8164965809277261, 0.8164965809277261, 0.8164965809277261], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
+    "print('a: \\n', a)\n",
+    "print('sum, flat array: ', np.std(a))\n",
+    "print('std, vertical: ', np.std(a, axis=0))\n",
+    "print('std, horizonal: ', np.std(a, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sum\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html\n",
+    "\n",
+    "If the axis keyword is not specified, it assumes the default value of `None`, and returns the result of the computation for the flattened array. Otherwise, the calculation is along the given axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:14:34.576723Z",
+     "start_time": "2021-01-13T16:14:34.556304Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \n",
+      " array([[1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0],\n",
+      "       [7.0, 8.0, 9.0]], dtype=float64)\n",
+      "sum, flat array:  45.0\n",
+      "sum, horizontal:  array([6.0, 15.0, 24.0], dtype=float64)\n",
+      "std, vertical:  array([12.0, 15.0, 18.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
+    "print('a: \\n', a)\n",
+    "\n",
+    "print('sum, flat array: ', np.sum(a))\n",
+    "print('sum, horizontal: ', np.sum(a, axis=1))\n",
+    "print('std, vertical: ', np.sum(a, axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## take\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.take.html\n",
+    "\n",
+    "The `take` method takes elements from an array along an axis. The function accepts two positional arguments, the array, and the indices, which is either a `python` iterable, or a one-dimensional `ndarray`, as well as three keyword arguments, the `axis`, which can be `None`, or an integer, `out`, which can be `None`, or an `ndarray` with the proper dimensions, and `mode`, which can be one of the strings `raise`, `wrap`, or `clip`. This last argument determines how out-of-bounds indices will be treated. The default value is `raise`, which raises an exception. `wrap` takes the indices modulo the length of the `axis`, while `clip` pegs the values at the 0, and the length of the `axis`. If `axis` is `None`, then `take` operates on the flattened array.\n",
+    "\n",
+    "The function can be regarded as a method of advanced slicing: as opposed to standard slicing, where the indices are distributed uniformly and in either increasing or decreasing order, `take` can take indices in an arbitrary order."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a: array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "slices taken along first axis\n",
+      "array([[0.0, 1.0, 2.0, 3.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0],\n",
+      "       [8.0, 9.0, 10.0, 11.0],\n",
+      "       [4.0, 5.0, 6.0, 7.0]], dtype=float64)\n",
+      "\n",
+      "slices taken along second axis\n",
+      "array([[0.0, 2.0, 2.0, 1.0],\n",
+      "       [2.0, 3.0, 4.0, 5.0],\n",
+      "       [6.0, 7.0, 8.0, 9.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(12)).reshape((3, 4))\n",
+    "print('\\na:', a)\n",
+    "\n",
+    "print('\\nslices taken along first axis')\n",
+    "print(np.take(a, (0, 2, 2, 1), axis=0))\n",
+    "\n",
+    "print('\\nslices taken along second axis')\n",
+    "print(np.take(a, (0, 2, 2, 1), axis=1))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## trace\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.trace.html\n",
+    "\n",
+    "The `trace` function returns the sum of the diagonal elements of a square matrix. If the input argument is not a square matrix, an exception will be raised.\n",
+    "\n",
+    "The scalar so returned will inherit the type of the input array, i.e., integer arrays have integer trace, and floating point arrays a floating point trace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-13T08:30:25.211965Z",
+     "start_time": "2021-02-13T08:30:25.195102Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([[25, 15, -5],\n",
+      "       [15, 18, 0],\n",
+      "       [-5, 0, 11]], dtype=int8)\n",
+      "\n",
+      "trace of a:  54\n",
+      "====================\n",
+      "b:  array([[25.0, 15.0, -5.0],\n",
+      "       [15.0, 18.0, 0.0],\n",
+      "       [-5.0, 0.0, 11.0]], dtype=float64)\n",
+      "\n",
+      "trace of b:  54.0\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]], dtype=np.int8)\n",
+    "print('a: ', a)\n",
+    "print('\\ntrace of a: ', np.trace(a))\n",
+    "\n",
+    "b = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]], dtype=np.float)\n",
+    "\n",
+    "print('='*20 + '\\nb: ', b)\n",
+    "print('\\ntrace of b: ', np.trace(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## trapz\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.trapz.html\n",
+    "\n",
+    "The function takes one or two one-dimensional `ndarray`s, and integrates the dependent values (`y`) using the trapezoidal rule. If the independent variable (`x`) is given, that is taken as the sample points corresponding to `y`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T16:03:42.566302Z",
+     "start_time": "2021-01-13T16:03:42.545630Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], dtype=float64)\n",
+      "y:  array([0.0, 1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0, 81.0], dtype=float64)\n",
+      "============================\n",
+      "integral of y:  244.5\n",
+      "integral of y at x:  244.5\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.linspace(0, 9, num=10)\n",
+    "y = x*x\n",
+    "\n",
+    "print('x: ',  x)\n",
+    "print('y: ',  y)\n",
+    "print('============================')\n",
+    "print('integral of y: ', np.trapz(y))\n",
+    "print('integral of y at x: ', np.trapz(y, x=x))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## where\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.where.html\n",
+    "\n",
+    "The function takes three positional arguments, `condition`, `x`, and `y`, and returns a new `ndarray`, whose values are taken from either `x`, or `y`, depending on the truthness of `condition`. The three arguments are broadcast together, and the function raises a `ValueError` exception, if broadcasting is not possible.\n",
+    "\n",
+    "The function is implemented for `ndarray`s only: other iterable types can be passed after casting them to an `ndarray` by calling the `array` constructor.\n",
+    "\n",
+    "If the `dtype`s of `x`, and `y` differ, the output is upcast as discussed earlier.  \n",
+    "\n",
+    "Note that the `condition` is expanded into an Boolean `ndarray`. This means that the storage required to hold the condition should be taken into account, whenever the function is called."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following example returns an `ndarray`  of length 4, with 1 at positions, where `condition` is smaller than 3, and with -1 otherwise."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-03-23T16:18:14.396840Z",
+     "start_time": "2021-03-23T16:18:14.385134Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([1, 1, -1, -1], dtype=int16)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "condition = np.array([1, 2, 3, 4], dtype=np.uint8)\n",
+    "print(np.where(condition < 3, 1, -1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next snippet shows, how values from two arrays can be fed into the output:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-03-23T16:15:29.954224Z",
+     "start_time": "2021-03-23T16:15:29.937205Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([11, 22, 3, 4], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "condition = np.array([1, 2, 3, 4], dtype=np.uint8)\n",
+    "x = np.array([11, 22, 33, 44], dtype=np.uint8)\n",
+    "y = np.array([1, 2, 3, 4], dtype=np.uint8)\n",
+    "print(np.where(condition < 3, x, y))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.5 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9e4ec6f642f986afcc9e252c165e44859a62defc5c697cae6f82c2943465ec10"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/numpy-linalg.ipynb b/tulip/shared/ulab/docs/numpy-linalg.ipynb
new file mode 100644
index 000000000..e57e63a31
--- /dev/null
+++ b/tulip/shared/ulab/docs/numpy-linalg.ipynb
@@ -0,0 +1,811 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "%pylab inline"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:16:40.844266Z",
+     "start_time": "2021-01-13T06:16:39.992092Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Notebook magic"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ],
+   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:16:40.857076Z",
+     "start_time": "2021-01-13T06:16:40.852721Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ],
+   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:16:40.947944Z",
+     "start_time": "2021-01-13T06:16:40.865720Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## pyboard"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ],
+   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ],
+   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "__END_OF_DEFS__"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# numpy.linalg\n",
+    "\n",
+    "Functions in the `linalg` module can be called by prepending them by `numpy.linalg.`. The module defines the following seven functions:\n",
+    "\n",
+    "1. [numpy.linalg.cholesky](#cholesky)\n",
+    "1. [numpy.linalg.det](#det)\n",
+    "1. [numpy.linalg.eig](#eig)\n",
+    "1. [numpy.linalg.inv](#inv)\n",
+    "1. [numpy.linalg.norm](#norm)\n",
+    "1. [numpy.linalg.qr](#qr)"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## cholesky\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.linalg.cholesky.html\n",
+    "\n",
+    "The function of the Cholesky decomposition takes a positive definite, symmetric square matrix as its single argument, and returns the *square root matrix* in the lower triangular form. If the input argument does not fulfill the positivity or symmetry condition, a `ValueError` is raised."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]])\n",
+    "print('a: ', a)\n",
+    "print('\\n' + '='*20 + '\\nCholesky decomposition\\n', np.linalg.cholesky(a))"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "a:  array([[25.0, 15.0, -5.0],\n",
+      "\t [15.0, 18.0, 0.0],\n",
+      "\t [-5.0, 0.0, 11.0]], dtype=float)\n",
+      "\n",
+      "====================\n",
+      "Cholesky decomposition\n",
+      " array([[5.0, 0.0, 0.0],\n",
+      "\t [3.0, 3.0, 0.0],\n",
+      "\t [-1.0, 1.0, 3.0]], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-03-10T19:25:21.754166Z",
+     "start_time": "2020-03-10T19:25:21.740726Z"
+    },
+    "scrolled": true
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## det\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.det.html\n",
+    "\n",
+    "The `det` function takes a square matrix as its single argument, and calculates the determinant. The calculation is based on successive elimination of the matrix elements, and the return value is a float, even if the input array was of integer type."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 495,
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2], [3, 4]], dtype=np.uint8)\n",
+    "print(np.linalg.det(a))"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "-2.0\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-19T13:27:24.246995Z",
+     "start_time": "2019-10-19T13:27:24.228698Z"
+    },
+    "scrolled": true
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Benchmark\n",
+    "\n",
+    "Since the routine for calculating the determinant is pretty much the same as for finding the [inverse of a matrix](#inv), the execution times are similar:"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 557,
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "@timeit\n",
+    "def matrix_det(m):\n",
+    "    return np.linalg.inv(m)\n",
+    "\n",
+    "m = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [0, 5, 6, 4, 5, 6, 4, 5], \n",
+    "              [0, 0, 9, 7, 8, 9, 7, 8], [0, 0, 0, 10, 11, 12, 11, 12], \n",
+    "             [0, 0, 0, 0, 4, 6, 7, 8], [0, 0, 0, 0, 0, 5, 6, 7], \n",
+    "             [0, 0, 0, 0, 0, 0, 7, 6], [0, 0, 0, 0, 0, 0, 0, 2]])\n",
+    "\n",
+    "matrix_det(m)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "execution time:  294  us\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-20T07:14:59.778987Z",
+     "start_time": "2019-10-20T07:14:59.740021Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## eig\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html\n",
+    "\n",
+    "The `eig` function calculates the eigenvalues and the eigenvectors of a real, symmetric square matrix. If the matrix is not symmetric, a `ValueError` will be raised. The function takes a single argument, and returns a tuple with the eigenvalues, and eigenvectors. With the help of the eigenvectors, amongst other things, you can implement sophisticated stabilisation routines for robots."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 1, 4], [2, 5, 3, 5], [1, 3, 6, 1], [4, 5, 1, 7]], dtype=np.uint8)\n",
+    "x, y = np.linalg.eig(a)\n",
+    "print('eigenvectors of a:\\n', y)\n",
+    "print('\\neigenvalues of a:\\n', x)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "eigenvectors of a:\n",
+      " array([[0.8151560042509081, -0.4499411232970823, -0.1644660242574522, 0.3256141906686505],\n",
+      "       [0.2211334179893007, 0.7846992598235538, 0.08372081379922657, 0.5730077734355189],\n",
+      "       [-0.1340114162071679, -0.3100776411558949, 0.8742786816656, 0.3486109343758527],\n",
+      "       [-0.5183258053659028, -0.292663481927148, -0.4489749870391468, 0.6664142156731531]], dtype=float)\n",
+      "\n",
+      "eigenvalues of a:\n",
+      " array([-1.165288365404889, 0.8029365530314914, 5.585625756072663, 13.77672605630074], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-03T20:25:26.952290Z",
+     "start_time": "2020-11-03T20:25:26.930184Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "The same matrix diagonalised with `numpy` yields:"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "source": [
+    "a = array([[1, 2, 1, 4], [2, 5, 3, 5], [1, 3, 6, 1], [4, 5, 1, 7]], dtype=np.uint8)\n",
+    "x, y = eig(a)\n",
+    "print('eigenvectors of a:\\n', y)\n",
+    "print('\\neigenvalues of a:\\n', x)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "eigenvectors of a:\n",
+      " [[ 0.32561419  0.815156    0.44994112 -0.16446602]\n",
+      " [ 0.57300777  0.22113342 -0.78469926  0.08372081]\n",
+      " [ 0.34861093 -0.13401142  0.31007764  0.87427868]\n",
+      " [ 0.66641421 -0.51832581  0.29266348 -0.44897499]]\n",
+      "\n",
+      "eigenvalues of a:\n",
+      " [13.77672606 -1.16528837  0.80293655  5.58562576]\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-03T20:13:27.236159Z",
+     "start_time": "2020-11-03T20:13:27.069967Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "When comparing results, we should keep two things in mind: \n",
+    "\n",
+    "1. the eigenvalues and eigenvectors are not necessarily sorted in the same way\n",
+    "2. an eigenvector can be multiplied by an arbitrary non-zero scalar, and it is still an eigenvector with the same eigenvalue. This is why all signs of the eigenvector belonging to 5.58, and 0.80 are flipped in `ulab` with respect to `numpy`. This difference, however, is of absolutely no consequence. "
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Computation expenses\n",
+    "\n",
+    "Since the function is based on [Givens rotations](https://en.wikipedia.org/wiki/Givens_rotation) and runs till convergence is achieved, or till the maximum number of allowed rotations is exhausted, there is no universal estimate for the time required to find the eigenvalues. However, an order of magnitude can, at least, be guessed based on the measurement below:"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 559,
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "@timeit\n",
+    "def matrix_eig(a):\n",
+    "    return np.linalg.eig(a)\n",
+    "\n",
+    "a = np.array([[1, 2, 1, 4], [2, 5, 3, 5], [1, 3, 6, 1], [4, 5, 1, 7]], dtype=np.uint8)\n",
+    "\n",
+    "matrix_eig(a)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "execution time:  111  us\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-20T07:18:52.520515Z",
+     "start_time": "2019-10-20T07:18:52.499653Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## inv\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.linalg.inv.html\n",
+    "\n",
+    "A square matrix, provided that it is not singular, can be inverted by calling the `inv` function that takes a single argument. The inversion is based on successive elimination of elements in the lower left triangle, and raises a `ValueError` exception, if the matrix turns out to be singular (i.e., one of the diagonal entries is zero)."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "m = np.array([[1, 2, 3, 4], [4, 5, 6, 4], [7, 8.6, 9, 4], [3, 4, 5, 6]])\n",
+    "\n",
+    "print(np.linalg.inv(m))"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "array([[-2.166666666666667, 1.500000000000001, -0.8333333333333337, 1.0],\n",
+      "       [1.666666666666667, -3.333333333333335, 1.666666666666668, -0.0],\n",
+      "       [0.1666666666666666, 2.166666666666668, -0.8333333333333337, -1.0],\n",
+      "       [-0.1666666666666667, -0.3333333333333333, 0.0, 0.5]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:17:13.053816Z",
+     "start_time": "2021-01-13T06:17:13.038403Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Computation expenses\n",
+    "\n",
+    "Note that the cost of inverting a matrix is approximately twice as many floats (RAM), as the number of entries in the original matrix, and approximately as many operations, as the number of entries. Here are a couple of numbers: "
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 552,
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "@timeit\n",
+    "def invert_matrix(m):\n",
+    "    return np.linalg.inv(m)\n",
+    "\n",
+    "m = np.array([[1, 2,], [4, 5]])\n",
+    "print('2 by 2 matrix:')\n",
+    "invert_matrix(m)\n",
+    "\n",
+    "m = np.array([[1, 2, 3, 4], [4, 5, 6, 4], [7, 8.6, 9, 4], [3, 4, 5, 6]])\n",
+    "print('\\n4 by 4 matrix:')\n",
+    "invert_matrix(m)\n",
+    "\n",
+    "m = np.array([[1, 2, 3, 4, 5, 6, 7, 8], [0, 5, 6, 4, 5, 6, 4, 5], \n",
+    "              [0, 0, 9, 7, 8, 9, 7, 8], [0, 0, 0, 10, 11, 12, 11, 12], \n",
+    "             [0, 0, 0, 0, 4, 6, 7, 8], [0, 0, 0, 0, 0, 5, 6, 7], \n",
+    "             [0, 0, 0, 0, 0, 0, 7, 6], [0, 0, 0, 0, 0, 0, 0, 2]])\n",
+    "print('\\n8 by 8 matrix:')\n",
+    "invert_matrix(m)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "2 by 2 matrix:\n",
+      "execution time:  65  us\n",
+      "\n",
+      "4 by 4 matrix:\n",
+      "execution time:  105  us\n",
+      "\n",
+      "8 by 8 matrix:\n",
+      "execution time:  299  us\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-20T07:10:39.190734Z",
+     "start_time": "2019-10-20T07:10:39.138872Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "The above-mentioned scaling is not obeyed strictly. The reason for the discrepancy is that the function call is still the same for all three cases: the input must be inspected, the output array must be created, and so on. "
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## norm\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html\n",
+    "\n",
+    "The function takes a vector or matrix without options, and returns its 2-norm, i.e., the square root of the sum of the square of the elements."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5])\n",
+    "b = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
+    "\n",
+    "print('norm of a:', np.linalg.norm(a))\n",
+    "print('norm of b:', np.linalg.norm(b))"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "norm of a: 7.416198487095663\n",
+      "norm of b: 16.88194301613414\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-07-23T20:41:10.341349Z",
+     "start_time": "2020-07-23T20:41:10.327624Z"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## qr\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.linalg.qr.html\n",
+    "\n",
+    "\n",
+    "The function computes the QR decomposition of a matrix `m` of dimensions `(M, N)`, i.e., it returns two such matrices, `q`', and `r`, that `m = qr`, where `q` is orthonormal, and `r` is upper triangular. In addition to the input matrix, which is the first positional argument, the function accepts the `mode` keyword argument with a default value of `reduced`. If `mode` is `reduced`, `q`, and `r` are returned in the reduced representation. Otherwise, the outputs will have dimensions `(M, M)`, and `(M, N)`, respectively."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "A = np.arange(6).reshape((3, 2))\n",
+    "print('A: \\n', A)\n",
+    "\n",
+    "print('complete decomposition')\n",
+    "q, r = np.linalg.qr(A, mode='complete')\n",
+    "print('q: \\n', q)\n",
+    "print()\n",
+    "print('r: \\n', r)\n",
+    "\n",
+    "print('\\n\\nreduced decomposition')\n",
+    "q, r = np.linalg.qr(A, mode='reduced')\n",
+    "print('q: \\n', q)\n",
+    "print()\n",
+    "print('r: \\n', r)\n"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "A: \n",
+      " array([[0, 1],\n",
+      "       [2, 3],\n",
+      "       [4, 5]], dtype=int16)\n",
+      "complete decomposition\n",
+      "q: \n",
+      " array([[0.0, -0.9128709291752768, 0.408248290463863],\n",
+      "       [-0.447213595499958, -0.3651483716701107, -0.8164965809277261],\n",
+      "       [-0.8944271909999159, 0.1825741858350553, 0.408248290463863]], dtype=float64)\n",
+      "\n",
+      "r: \n",
+      " array([[-4.47213595499958, -5.813776741499454],\n",
+      "       [0.0, -1.095445115010332],\n",
+      "       [0.0, 0.0]], dtype=float64)\n",
+      "\n",
+      "\n",
+      "reduced decomposition\n",
+      "q: \n",
+      " array([[0.0, -0.9128709291752768],\n",
+      "       [-0.447213595499958, -0.3651483716701107],\n",
+      "       [-0.8944271909999159, 0.1825741858350553]], dtype=float64)\n",
+      "\n",
+      "r: \n",
+      " array([[-4.47213595499958, -5.813776741499454],\n",
+      "       [0.0, -1.095445115010332]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.8.5 64-bit ('base': conda)"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  },
+  "interpreter": {
+   "hash": "ce9a02f9f7db620716422019cafa4bc1786ca85daa298b819f6da075e7993842"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/tulip/shared/ulab/docs/numpy-random.ipynb b/tulip/shared/ulab/docs/numpy-random.ipynb
new file mode 100644
index 000000000..4c9aa2a41
--- /dev/null
+++ b/tulip/shared/ulab/docs/numpy-random.ipynb
@@ -0,0 +1,492 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-01T09:27:13.438054Z",
+     "start_time": "2020-05-01T09:27:13.191491Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:24:48.499467Z",
+     "start_time": "2022-01-07T18:24:48.488004Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-07-23T20:31:25.296014Z",
+     "start_time": "2020-07-23T20:31:25.265937Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/build-2/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# numpy.random\n",
+    "\n",
+    "Random numbers drawn specific distributions can be generated by instantiating a `Generator` object, and calling its methods. The module defines the following three functions:\n",
+    "\n",
+    "1. [numpy.random.Generator.normal](#normal)\n",
+    "1. [numpy.random.Generator.random](#random)\n",
+    "1. [numpy.random.Generator.uniform](#uniform)\n",
+    "\n",
+    "\n",
+    "The `Generator` object, when instantiated, takes a single integer as its argument. This integer is the seed, which will be fed to the 32-bit or 64-bit routine. More details can be found under https://www.pcg-random.org/index.html. The generator is a standard `python` object that keeps track of its state.\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/random/index.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## normal\n",
+    "\n",
+    "A random set of number from the `normal` distribution can be generated by calling the generator's `normal` method. The method takes three optional arguments, `loc=0.0`, the centre of the distribution, `scale=1.0`, the width of the distribution, and `size=None`, a tuple containing the shape of the returned array. In case `size` is `None`, a single floating point number is returned.\n",
+    "\n",
+    "The `normal` method of the `Generator` object is based on the [Box-Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform).\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.normal.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-19T13:08:17.647416Z",
+     "start_time": "2019-10-19T13:08:17.597456Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Gnerator() at 0x7fa9dae05340\n",
+      "-6.285246229407202\n",
+      "array([[24.95816273705659, 15.2670302229426, 14.81001577336041],\n",
+      "       [20.17589833056986, 23.14539083787544, 26.37772041367461],\n",
+      "       [41.94894234387275, 37.11027030608206, 25.65889562100477]], dtype=float64)\n",
+      "array([[21.52562779033434, 12.74685887865834, 24.08404670765186],\n",
+      "       [4.728112596365396, 7.667757906857082, 21.61576094228444],\n",
+      "       [2.432338873595267, 27.75945683572574, 5.730827584659245]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "rng = np.random.Generator(123456)\n",
+    "print(rng)\n",
+    "\n",
+    "# return single number from a distribution of scale 1, and location 0\n",
+    "print(rng.normal())\n",
+    "\n",
+    "print(rng.normal(loc=20.0, scale=10.0, size=(3,3)))\n",
+    "# same as above, with positional arguments\n",
+    "print(rng.normal(20.0, 10.0, (3,3)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## random\n",
+    "\n",
+    "A random set of number from the uniform distribution in the interval [0, 1] can be generated by calling the generator's `random` method. The method takes two optional arguments, `size=None`, a tuple containing the shape of the returned array, and `out`. In case `size` is `None`, a single floating point number is returned. \n",
+    "\n",
+    "`out` can be used, if a floating point array is available. An exception will be raised, if the array is not of `float` `dtype`, or if both `size` and `out` are supplied, and there is a conflict in their shapes.\n",
+    "\n",
+    "If `size` is `None`, a single floating point number will be returned.\n",
+    "\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.random.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Gnerator() at 0x7f299de05340\n",
+      "6.384615058863119e-11\n",
+      "\n",
+      " array([[0.4348157846574171, 0.7906325931024071, 0.878697619856133],\n",
+      "       [0.8738606263361598, 0.4946080034142021, 0.7765890156101152],\n",
+      "       [0.1770783715717074, 0.02080447648492112, 0.1053837559005948]], dtype=float64)\n",
+      "\n",
+      "buffer array before:\n",
+      " array([[0.0, 1.0, 2.0],\n",
+      "       [3.0, 4.0, 5.0],\n",
+      "       [6.0, 7.0, 8.0]], dtype=float64)\n",
+      "\n",
+      "buffer array after:\n",
+      " array([[0.8508024287393201, 0.9848489829156055, 0.7598167589604003],\n",
+      "       [0.782995698302952, 0.2866337782847831, 0.7915884498022229],\n",
+      "       [0.4614071706315902, 0.4792657443088592, 0.1581582066230718]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "rng = np.random.Generator(123456)\n",
+    "print(rng)\n",
+    "\n",
+    "# returning new objects\n",
+    "print(rng.random())\n",
+    "print('\\n', rng.random(size=(3,3)))\n",
+    "\n",
+    "# supplying a buffer\n",
+    "a = np.array(range(9), dtype=np.float).reshape((3,3))\n",
+    "print('\\nbuffer array before:\\n', a)\n",
+    "rng.random(out=a)\n",
+    "print('\\nbuffer array after:\\n', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## uniform\n",
+    "\n",
+    "`uniform` is similar to `random`, except that the interval over which the numbers are distributed can be specified, while the `out` argument cannot. In addition to `size` specifying the shape of the output, `low=0.0`, and `high=1.0` are accepted arguments. With the indicated defaults, `uniform` is identical to `random`, which can be seen from the fact that the first 3-by-3 tensor below is the same as the one produced by `rng.random(size=(3,3))` above.\n",
+    "\n",
+    "\n",
+    "If `size` is `None`, a single floating point number will be returned.\n",
+    "\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.uniform.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Gnerator() at 0x7f1891205340\n",
+      "6.384615058863119e-11\n",
+      "\n",
+      " array([[0.4348157846574171, 0.7906325931024071, 0.878697619856133],\n",
+      "       [0.8738606263361598, 0.4946080034142021, 0.7765890156101152],\n",
+      "       [0.1770783715717074, 0.02080447648492112, 0.1053837559005948]], dtype=float64)\n",
+      "\n",
+      " array([[18.5080242873932, 19.84848982915605, 17.598167589604],\n",
+      "       [17.82995698302952, 12.86633778284783, 17.91588449802223],\n",
+      "       [14.6140717063159, 14.79265744308859, 11.58158206623072]], dtype=float64)\n",
+      "\n",
+      " array([[14.3380400319162, 12.72487657409978, 15.77119643621117],\n",
+      "       [13.61835831436355, 18.96062889255558, 15.78847796795966],\n",
+      "       [12.59435855187034, 17.68262037443622, 14.77943040598734]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "rng = np.random.Generator(123456)\n",
+    "print(rng)\n",
+    "\n",
+    "print(rng.uniform())\n",
+    "# returning numbers between 0, and 1\n",
+    "print('\\n', rng.uniform(size=(3,3)))\n",
+    "\n",
+    "# returning numbers between 10, and 20\n",
+    "print('\\n', rng.uniform(low=10, high=20, size=(3,3)))\n",
+    "\n",
+    "# same as above, without the keywords\n",
+    "print('\\n', rng.uniform(10, 20, (3,3)))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/numpy-universal.ipynb b/tulip/shared/ulab/docs/numpy-universal.ipynb
new file mode 100644
index 000000000..1d5764b89
--- /dev/null
+++ b/tulip/shared/ulab/docs/numpy-universal.ipynb
@@ -0,0 +1,888 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:54:58.722373Z",
+     "start_time": "2021-01-13T18:54:57.178438Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:10:30.696795Z",
+     "start_time": "2022-01-07T19:10:30.690003Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:10:30.785887Z",
+     "start_time": "2022-01-07T19:10:30.710912Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/build-2/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Universal functions\n",
+    "\n",
+    "Standard mathematical functions can be calculated on any scalar, scalar-valued iterable (ranges, lists, tuples containing numbers), and on `ndarray`s without having to change the call signature. In all cases the functions return a new `ndarray` of typecode `float` (since these functions usually generate float values, anyway). The only exceptions to this rule are the `exp`, and `sqrt` functions, which, if `ULAB_SUPPORTS_COMPLEX` is set to 1 in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h), can return complex arrays, depending on the argument. All functions execute faster with `ndarray` arguments than with iterables, because the values of the input vector can be extracted faster. \n",
+    "\n",
+    "At present, the following functions are supported (starred functions can operate on, or can return complex arrays):\n",
+    "\n",
+    "`acos`, `acosh`, `arctan2`, `around`, `asin`, `asinh`, `atan`, `arctan2`, `atanh`, `ceil`, `cos`, `degrees`, `exp*`, `expm1`, `floor`, `log`, `log10`, `log2`, `radians`, `sin`, `sinc`, `sinh`, `sqrt*`, `tan`, `tanh`.\n",
+    "\n",
+    "These functions are applied element-wise to the arguments, thus, e.g., the exponential of a matrix cannot be calculated in this way, only the exponential of the matrix entries.\n",
+    "\n",
+    "In order to avoid repeated memory allocations, functions can take the `out=None` optional argument, which must be a floating point `ndarray` of the same size as the input `array`. If these conditions are not fulfilled, and exception will be raised. If `out=None`, a new array will be created upon each invocation of the function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T19:11:07.579601Z",
+     "start_time": "2021-01-13T19:11:07.554672Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t range(0, 9)\n",
+      "exp(a):\t array([1.0, 2.718281828459045, 7.38905609893065, 20.08553692318767, 54.59815003314424, 148.4131591025766, 403.4287934927351, 1096.633158428459, 2980.957987041728], dtype=float64)\n",
+      "\n",
+      "=============\n",
+      "b:\n",
+      " array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "exp(b):\n",
+      " array([1.0, 2.718281828459045, 7.38905609893065, 20.08553692318767, 54.59815003314424, 148.4131591025766, 403.4287934927351, 1096.633158428459, 2980.957987041728], dtype=float64)\n",
+      "\n",
+      "=============\n",
+      "c:\n",
+      " array([[0.0, 1.0, 2.0],\n",
+      "       [3.0, 4.0, 5.0],\n",
+      "       [6.0, 7.0, 8.0]], dtype=float64)\n",
+      "exp(c):\n",
+      " array([[1.0, 2.718281828459045, 7.38905609893065],\n",
+      "       [20.08553692318767, 54.59815003314424, 148.4131591025766],\n",
+      "       [403.4287934927351, 1096.633158428459, 2980.957987041728]], dtype=float64)\n",
+      "\n",
+      "d before invoking the function:\n",
+      " array([[0.0, 1.0, 2.0],\n",
+      "       [3.0, 4.0, 5.0],\n",
+      "       [6.0, 7.0, 8.0]], dtype=float64)\n",
+      "\n",
+      "d afteri nvoking the function:\n",
+      " array([[1.0, 2.718281828459045, 7.38905609893065],\n",
+      "       [20.08553692318767, 54.59815003314424, 148.4131591025766],\n",
+      "       [403.4287934927351, 1096.633158428459, 2980.957987041728]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = range(9)\n",
+    "b = np.array(a)\n",
+    "\n",
+    "# works with ranges, lists, tuples etc.\n",
+    "print('a:\\t', a)\n",
+    "print('exp(a):\\t', np.exp(a))\n",
+    "\n",
+    "# with 1D arrays\n",
+    "print('\\n=============\\nb:\\n', b)\n",
+    "print('exp(b):\\n', np.exp(b))\n",
+    "\n",
+    "# as well as with matrices\n",
+    "c = np.array(range(9)).reshape((3, 3))\n",
+    "print('\\n=============\\nc:\\n', c)\n",
+    "print('exp(c):\\n', np.exp(c))\n",
+    "\n",
+    "# using the `out` argument\n",
+    "d = np.array(range(9)).reshape((3, 3))\n",
+    "\n",
+    "print('\\nd before invoking the function:\\n', d)\n",
+    "np.exp(c, out=d)\n",
+    "print('\\nd afteri nvoking the function:\\n', d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Computation expenses\n",
+    "\n",
+    "The overhead for calculating with micropython iterables is quite significant: for the 1000 samples below, the difference is more than 800 microseconds, because internally the function has to create the `ndarray` for the output, has to fetch the iterable's items of unknown type, and then convert them to floats. All these steps are skipped for `ndarray`s, because these pieces of information are already known. \n",
+    "\n",
+    "Doing the same with `list` comprehension requires 30 times more time than with the `ndarray`, which would become even more, if we converted the resulting list to an `ndarray`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:45.696282Z",
+     "start_time": "2020-05-07T07:35:45.629909Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "iterating over ndarray in ulab\r\n",
+      "execution time:  441  us\r\n",
+      "\r\n",
+      "iterating over list in ulab\r\n",
+      "execution time:  1266  us\r\n",
+      "\r\n",
+      "iterating over list in python\r\n",
+      "execution time:  11379  us\r\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "import math\n",
+    "\n",
+    "a = [0]*1000\n",
+    "b = np.array(a)\n",
+    "\n",
+    "@timeit\n",
+    "def timed_vector(iterable):\n",
+    "    return np.exp(iterable)\n",
+    "\n",
+    "@timeit\n",
+    "def timed_list(iterable):\n",
+    "    return [math.exp(i) for i in iterable]\n",
+    "\n",
+    "print('iterating over ndarray in ulab')\n",
+    "timed_vector(b)\n",
+    "\n",
+    "print('\\niterating over list in ulab')\n",
+    "timed_vector(a)\n",
+    "\n",
+    "print('\\niterating over list in python')\n",
+    "timed_list(a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## arctan2\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.arctan2.html\n",
+    "\n",
+    "The two-argument inverse tangent function is also part of the `vector` sub-module. The function implements broadcasting as discussed in the section on `ndarray`s. Scalars (`micropython` integers or floats) are also allowed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T19:15:08.215912Z",
+     "start_time": "2021-01-13T19:15:08.189806Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([1.0, 2.2, 33.33, 444.444], dtype=float64)\n",
+      "\n",
+      "arctan2(a, 1.0)\n",
+      " array([0.7853981633974483, 1.14416883366802, 1.5408023243361, 1.568546328341769], dtype=float64)\n",
+      "\n",
+      "arctan2(1.0, a)\n",
+      " array([0.7853981633974483, 0.426627493126876, 0.02999400245879636, 0.002249998453127392], dtype=float64)\n",
+      "\n",
+      "arctan2(a, a): \n",
+      " array([0.7853981633974483, 0.7853981633974483, 0.7853981633974483, 0.7853981633974483], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2.2, 33.33, 444.444])\n",
+    "print('a:\\n', a)\n",
+    "print('\\narctan2(a, 1.0)\\n', np.arctan2(a, 1.0))\n",
+    "print('\\narctan2(1.0, a)\\n', np.arctan2(1.0, a))\n",
+    "print('\\narctan2(a, a): \\n', np.arctan2(a, a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## around\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.around.html\n",
+    "\n",
+    "`numpy`'s `around` function can also be found in the `vector` sub-module. The function implements the `decimals` keyword argument with default value `0`. The first argument must be an `ndarray`. If this is not the case, the function raises a `TypeError` exception. Note that `numpy` accepts general iterables. The `out` keyword argument known from `numpy` is not accepted. The function always returns an ndarray of type `mp_float_t`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T19:19:46.728823Z",
+     "start_time": "2021-01-13T19:19:46.703348Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([1.0, 2.2, 33.33, 444.444], dtype=float64)\n",
+      "\n",
+      "decimals = 0\t array([1.0, 2.0, 33.0, 444.0], dtype=float64)\n",
+      "\n",
+      "decimals = 1\t array([1.0, 2.2, 33.3, 444.4], dtype=float64)\n",
+      "\n",
+      "decimals = -1\t array([0.0, 0.0, 30.0, 440.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2.2, 33.33, 444.444])\n",
+    "print('a:\\t\\t', a)\n",
+    "print('\\ndecimals = 0\\t', np.around(a, decimals=0))\n",
+    "print('\\ndecimals = 1\\t', np.around(a, decimals=1))\n",
+    "print('\\ndecimals = -1\\t', np.around(a, decimals=-1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## exp\n",
+    "\n",
+    "If `ULAB_SUPPORTS_COMPLEX` is set to 1 in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h), the exponential function can also take complex arrays as its argument, in which case the return value is also complex."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:41:51.865779Z",
+     "start_time": "2022-01-07T18:41:51.843897Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([1.0, 2.0, 3.0], dtype=float64)\n",
+      "exp(a):\t\t array([2.718281828459045, 7.38905609893065, 20.08553692318767], dtype=float64)\n",
+      "\n",
+      "b:\t\t array([1.0+1.0j, 2.0+2.0j, 3.0+3.0j], dtype=complex)\n",
+      "exp(b):\t\t array([1.468693939915885+2.287355287178842j, -3.074932320639359+6.71884969742825j, -19.88453084414699+2.834471132487004j], dtype=complex)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3])\n",
+    "print('a:\\t\\t', a)\n",
+    "print('exp(a):\\t\\t', np.exp(a))\n",
+    "print()\n",
+    "\n",
+    "b = np.array([1+1j, 2+2j, 3+3j], dtype=np.complex)\n",
+    "print('b:\\t\\t', b)\n",
+    "print('exp(b):\\t\\t', np.exp(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sqrt\n",
+    "\n",
+    "If `ULAB_SUPPORTS_COMPLEX` is set to 1 in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h), the exponential function can also take complex arrays as its argument, in which case the return value is also complex. If the input is real, but the results might be complex, the user is supposed to specify the output `dtype` in the function call. Otherwise, the square roots of negative numbers will result in `NaN`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:45:26.554520Z",
+     "start_time": "2022-01-07T18:45:26.543552Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([1.0, -1.0], dtype=float64)\n",
+      "sqrt(a):\t\t array([1.0, nan], dtype=float64)\n",
+      "sqrt(a):\t\t array([1.0+0.0j, 0.0+1.0j], dtype=complex)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, -1])\n",
+    "print('a:\\t\\t', a)\n",
+    "print('sqrt(a):\\t\\t', np.sqrt(a))\n",
+    "print('sqrt(a):\\t\\t', np.sqrt(a, dtype=np.complex))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Vectorising generic python functions\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.vectorize.html\n",
+    "\n",
+    "The examples above use factory functions. In fact, they are nothing but the vectorised versions of the standard mathematical functions. User-defined `python` functions can also be vectorised by help of `vectorize`. This function takes a positional argument, namely, the `python` function that you want to vectorise, and a non-mandatory keyword argument, `otypes`, which determines the `dtype` of the output array. The `otypes` must be `None` (default), or any of the `dtypes` defined in `ulab`. With `None`, the output is automatically turned into a float array. \n",
+    "\n",
+    "The return value of `vectorize` is a `micropython` object that can be called as a standard function, but which now accepts either a scalar, an `ndarray`, or a generic `micropython` iterable as its sole argument. Note that the function that is to be vectorised must have a single argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T19:16:55.709617Z",
+     "start_time": "2021-01-13T19:16:55.688222Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "f on a scalar:       array([1936.0], dtype=float64)\n",
+      "f on an ndarray:     array([1.0, 4.0, 9.0, 16.0], dtype=float64)\n",
+      "f on a list:         array([4.0, 9.0, 16.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "def f(x):\n",
+    "    return x*x\n",
+    "\n",
+    "vf = np.vectorize(f)\n",
+    "\n",
+    "# calling with a scalar\n",
+    "print('{:20}'.format('f on a scalar: '), vf(44.0))\n",
+    "\n",
+    "# calling with an ndarray\n",
+    "a = np.array([1, 2, 3, 4])\n",
+    "print('{:20}'.format('f on an ndarray: '), vf(a))\n",
+    "\n",
+    "# calling with a list\n",
+    "print('{:20}'.format('f on a list: '), vf([2, 3, 4]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As mentioned, the `dtype` of the resulting `ndarray` can be specified via the `otypes` keyword. The value is bound to the function object that `vectorize` returns, therefore, if the same function is to be vectorised with different output types, then for each type a new function object must be created."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T19:19:36.090837Z",
+     "start_time": "2021-01-13T19:19:36.069088Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "output is uint8:     array([1, 4, 9, 16], dtype=uint8)\n",
+      "output is float:     array([1.0, 4.0, 9.0, 16.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "l = [1, 2, 3, 4]\n",
+    "def f(x):\n",
+    "    return x*x\n",
+    "\n",
+    "vf1 = np.vectorize(f, otypes=np.uint8)\n",
+    "vf2 = np.vectorize(f, otypes=np.float)\n",
+    "\n",
+    "print('{:20}'.format('output is uint8: '), vf1(l))\n",
+    "print('{:20}'.format('output is float: '), vf2(l))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `otypes` keyword argument cannot be used for type coercion: if the function evaluates to a float, but `otypes` would dictate an integer type, an exception will be raised:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-06T22:21:43.616220Z",
+     "start_time": "2020-05-06T22:21:43.601280Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "integer list:        array([1, 4, 9, 16], dtype=uint8)\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dev/shm/micropython.py\", line 14, in <module>\n",
+      "TypeError: can't convert float to int\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "int_list = [1, 2, 3, 4]\n",
+    "float_list = [1.0, 2.0, 3.0, 4.0]\n",
+    "def f(x):\n",
+    "    return x*x\n",
+    "\n",
+    "vf = np.vectorize(f, otypes=np.uint8)\n",
+    "\n",
+    "print('{:20}'.format('integer list: '), vf(int_list))\n",
+    "# this will raise a TypeError exception\n",
+    "print(vf(float_list))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Benchmarks\n",
+    "\n",
+    "It should be pointed out that the `vectorize` function produces the pseudo-vectorised version of the `python` function that is fed into it, i.e., on the C level, the same `python` function is called, with the all-encompassing `mp_obj_t` type arguments, and all that happens is that the `for` loop in `[f(i) for i in iterable]` runs purely in C. Since type checking and type conversion in `f()` is expensive, the speed-up is not so spectacular as when iterating over an `ndarray` with a factory function: a gain of approximately 30% can be expected, when a native `python` type (e.g., `list`) is returned by the function, and this becomes around 50% (a factor of 2), if conversion to an `ndarray` is also counted.\n",
+    "\n",
+    "The following code snippet calculates the square of a 1000 numbers with the vectorised function (which returns an `ndarray`), with `list` comprehension, and with `list` comprehension followed by conversion to an `ndarray`. For comparison, the execution time is measured also for the case, when the square is calculated entirely in `ulab`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:32:20.048553Z",
+     "start_time": "2020-05-07T07:32:19.951851Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "vectorised function\r\n",
+      "execution time:  7237  us\r\n",
+      "\r\n",
+      "list comprehension\r\n",
+      "execution time:  10248  us\r\n",
+      "\r\n",
+      "list comprehension + ndarray conversion\r\n",
+      "execution time:  12562  us\r\n",
+      "\r\n",
+      "squaring an ndarray entirely in ulab\r\n",
+      "execution time:  560  us\r\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "def f(x):\n",
+    "    return x*x\n",
+    "\n",
+    "vf = np.vectorize(f)\n",
+    "\n",
+    "@timeit\n",
+    "def timed_vectorised_square(iterable):\n",
+    "    return vf(iterable)\n",
+    "\n",
+    "@timeit\n",
+    "def timed_python_square(iterable):\n",
+    "    return [f(i) for i in iterable]\n",
+    "\n",
+    "@timeit\n",
+    "def timed_ndarray_square(iterable):\n",
+    "    return np.array([f(i) for i in iterable])\n",
+    "\n",
+    "@timeit\n",
+    "def timed_ulab_square(ndarray):\n",
+    "    return ndarray**2\n",
+    "\n",
+    "print('vectorised function')\n",
+    "squares = timed_vectorised_square(range(1000))\n",
+    "\n",
+    "print('\\nlist comprehension')\n",
+    "squares = timed_python_square(range(1000))\n",
+    "\n",
+    "print('\\nlist comprehension + ndarray conversion')\n",
+    "squares = timed_ndarray_square(range(1000))\n",
+    "\n",
+    "print('\\nsquaring an ndarray entirely in ulab')\n",
+    "a = np.array(range(1000))\n",
+    "squares = timed_ulab_square(a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From the comparisons above, it is obvious that `python` functions should only be vectorised, when the same effect cannot be gotten in `ulab` only. However, although the time savings are not significant, there is still a good reason for caring about vectorised functions. Namely, user-defined `python` functions become universal, i.e., they can accept generic iterables as well as `ndarray`s as their arguments. A vectorised function is still a one-liner, resulting in transparent and elegant code.\n",
+    "\n",
+    "A final comment on this subject: the `f(x)` that we defined is a *generic* `python` function. This means that it is not required that it just crunches some numbers. It has to return a number object, but it can still access the hardware in the meantime. So, e.g., \n",
+    "\n",
+    "```python\n",
+    "\n",
+    "led = pyb.LED(2)\n",
+    "\n",
+    "def f(x):\n",
+    "    if x < 100:\n",
+    "        led.toggle()\n",
+    "    return x*x\n",
+    "```\n",
+    "\n",
+    "is perfectly valid code."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/scipy-integrate.ipynb b/tulip/shared/ulab/docs/scipy-integrate.ipynb
new file mode 100644
index 000000000..232202318
--- /dev/null
+++ b/tulip/shared/ulab/docs/scipy-integrate.ipynb
@@ -0,0 +1,510 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:11:12.111639Z",
+     "start_time": "2021-01-12T16:11:11.914041Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T20:50:20.813162Z",
+     "start_time": "2022-01-29T20:50:20.794562Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T20:50:21.613220Z",
+     "start_time": "2022-01-29T20:50:21.557819Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# scipy.integrate\n",
+    "\n",
+    "This module provides a simplified subset of CPython's `scipy.integrate` module. The algorithms were not ported from CPython's `scipy.integrate` for the sake of resource usage, but derived from a paper found in https://www.genivia.com/qthsh.html. There are four numerical integration algorithms:\n",
+    "\n",
+    "1. [scipy.integrate.quad](#quad)\n",
+    "2. [scipy.integrate.romberg](#romberg)\n",
+    "3. [scipy.integrate.simpson](#simpson)\n",
+    "4. [scipy.integrate.tanhsinh](#tanhsinh)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Introduction\n",
+    "\n",
+    "Numerical integration works best with float64 math enabled. If you require float64 math, be sure to set `MICROPY_OBJ_REPR_A` and `MICROPY_FLOAT_IMPL_DOUBLE`. This being said, the modules work equally well using float32, albeit with reduced precision. The required error tolerance can be specified for each of the function calls using the \"eps=\" option, defaulting to the compiled in `etolerance` value (1e-14 for fp64, 1e-8 for fp32).\n",
+    "\n",
+    "The submodule can be enabled by setting `ULAB_SCIPY_HAS_INTEGRATE_MODULE` in `code/ulab.h`. As for the individual integration algorithms, you can select which to include by setting one or more of `ULAB_INTEGRATE_HAS_QUAD`, `ULAB_INTEGRATE_HAS_ROMBERG`, `ULAB_INTEGRATE_HAS_SIMPSON`, and `ULAB_INTEGRATE_HAS_TANHSINH`.\n",
+    "\n",
+    "Also note that these algorithms do not support complex numbers, although it is certainly possible to implement complex integration in MicroPython on top of this module, e.g. as in https://stackoverflow.com/questions/5965583/use-scipy-integrate-quad-to-integrate-complex-numbers.  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## quad\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html \n",
+    "\n",
+    "In CPython `scipy.integrate`, `quad` is a wrapper implementing many algorithms based on the Fortran QUADPACK package. Gauss-Kronrod is just one of them, and it is useful for most general-purpose tasks. This particular function implements an Adaptive Gauss-Kronrod (G10,K21) quadrature algorithm. The Gauss–Kronrod quadrature formula is a variant of Gaussian quadrature, in which the evaluation points are chosen so that an accurate approximation can be computed by re-using the information produced by the computation of a less accurate approximation (https://en.wikipedia.org/wiki/Gauss%E2%80%93Kronrod_quadrature_formula). \n",
+    "\n",
+    "The function takes three to five arguments: \n",
+    "\n",
+    "* f, a callable,\n",
+    "* a and b, the lower and upper integration limit, \n",
+    "* order=, the order of integration (default 5),\n",
+    "* eps=, the error tolerance (default etolerance) \n",
+    "\n",
+    "The function returns the result and the error estimate as a tuple of floats.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-19T20:24:10.529668Z",
+     "start_time": "2020-06-19T20:24:10.520389Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "UsageError: Cell magic `%%micropython` not found.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy\n",
+    "\n",
+    "f = lambda x: x**2 + 2*x + 1\n",
+    "result = scipy.integrate.quad(f, 0, 5, order=5, eps=1e-10)\n",
+    "print (f\"result = {result}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## romberg\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.romberg.html \n",
+    "\n",
+    "This function implements the Romberg quadrature algorithm.  Romberg's method is a Newton–Cotes formula – it evaluates the integrand at equally spaced points. The integrand must have continuous derivatives, though fairly good results may be obtained if only a few derivatives exist. If it is possible to evaluate the integrand at unequally spaced points, then other methods such as Gaussian quadrature and Clenshaw–Curtis quadrature are generally more accurate (https://en.wikipedia.org/wiki/Romberg%27s_method). \n",
+    "\n",
+    "Please note: This function is deprecated as of SciPy 1.12.0 and will be removed in SciPy 1.15.0. Please use `scipy.integrate.quad` instead. \n",
+    "\n",
+    "The function takes three to five arguments: \n",
+    "\n",
+    "* f, a callable,\n",
+    "* a and b, the lower and upper integration limit, \n",
+    "* steps=, the number of steps taken to calculate (default 100),\n",
+    "* eps=, the error tolerance (default etolerance) \n",
+    "\n",
+    "The function returns the result as a float.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "UsageError: Cell magic `%%micropython` not found.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy\n",
+    "\n",
+    "f = lambda x: x**2 + 2*x + 1\n",
+    "result = scipy.integrate.romberg(f, 0, 5)\n",
+    "print (f\"result = {result}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## simpson\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.simpson.html \n",
+    "\n",
+    "This function is different from CPython's `simpson` method in that it does not take an array of function values but determines the optimal spacing of samples itself. Adaptive Simpson's method, also called adaptive Simpson's rule, is a method of numerical integration proposed by G.F. Kuncir in 1962. It is probably the first recursive adaptive algorithm for numerical integration to appear in print, although more modern adaptive methods based on Gauss–Kronrod quadrature and Clenshaw–Curtis quadrature are now generally preferred (https://en.wikipedia.org/wiki/Adaptive_Simpson%27s_method). \n",
+    "\n",
+    "The function takes three to five arguments: \n",
+    "\n",
+    "* f, a callable,\n",
+    "* a and b, the lower and upper integration limit, \n",
+    "* steps=, the number of steps taken to calculate (default 100),\n",
+    "* eps=, the error tolerance (default etolerance) \n",
+    "\n",
+    "The function returns the result as a float."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "UsageError: Cell magic `%%micropython` not found.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy\n",
+    "\n",
+    "f = lambda x: x**2 + 2*x + 1\n",
+    "result = scipy.integrate.simpson(f, 0, 5)\n",
+    "print (f\"result = {result}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## tanhsinh\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html \n",
+    "\n",
+    "In CPython `scipy.integrate`, `tanhsinh` is written in Python (https://github.com/scipy/scipy/blob/main/scipy/integrate/_tanhsinh.py). It is used in cases where Newton-Cotes, Gauss-Kronrod, and other formulae do not work due to properties of the integrand or the integration limits. (In SciPy v1.14.1, it is not a public function but it has been marked as public in SciPy v1.15.0rc1). \n",
+    "\n",
+    "This particular function implements an optimized Tanh-Sinh, Sinh-Sinh and Exp-Sinh quadrature algorithm. It is especially applied where singularities or infinite derivatives exist at one or both endpoints. The method uses hyperbolic functions in a change of variables to transform an integral on the interval x ∈ (−1, 1) to an integral on the entire real line t ∈ (−∞, ∞), the two integrals having the same value. After this transformation, the integrand decays with a double exponential rate, and thus, this method is also known as the double exponential (DE) formula (https://en.wikipedia.org/wiki/Tanh-sinh_quadrature). \n",
+    "\n",
+    "As opposed to the three algorithms mentioned before, it also supports integrals with infinite limits like the Gaussian integral (https://en.wikipedia.org/wiki/Gaussian_integral), as shown below. \n",
+    "\n",
+    "The function takes three to five  arguments: \n",
+    "\n",
+    "* f, a callable,\n",
+    "* a and b, the lower and upper integration limit, \n",
+    "* levels=, the number of loops taken to calculate (default 6),\n",
+    "* eps=, the error tolerance (default: etolerance)\n",
+    "\n",
+    "The function returns the result and the error estimate as a tuple of floats.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "UsageError: Cell magic `%%micropython` not found.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy, numpy as np\n",
+    "from math import *\n",
+    "f = lambda x: exp(- x**2)\n",
+    "result = scipy.integrate.tanhsinh(f, -np.inf, np.inf)\n",
+    "print (f\"result = {result}\")\n",
+    "exact = sqrt(pi)   # which is the exact value\n",
+    "print (f\"exact value = {exact}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/scipy-linalg.ipynb b/tulip/shared/ulab/docs/scipy-linalg.ipynb
new file mode 100644
index 000000000..6adaa11b6
--- /dev/null
+++ b/tulip/shared/ulab/docs/scipy-linalg.ipynb
@@ -0,0 +1,474 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:54:58.722373Z",
+     "start_time": "2021-01-13T18:54:57.178438Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-05-09T05:37:22.600510Z",
+     "start_time": "2021-05-09T05:37:22.595924Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-05-09T05:37:26.429136Z",
+     "start_time": "2021-05-09T05:37:26.403191Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# scipy.linalg\n",
+    "\n",
+    "`scipy`'s `linalg` module contains two functions, `solve_triangular`, and `cho_solve`. The functions can be called by prepending them by `scipy.linalg.`.\n",
+    "\n",
+    "1. [scipy.linalg.solve_cho](#cho_solve)\n",
+    "2. [scipy.linalg.solve_triangular](#solve_triangular)"
+   ]
+  },
+  {
+   "source": [
+    "## cho_solve\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.cho_solve.html\n",
+    "\n",
+    "Solve the linear equations \n",
+    "\n",
+    "\n",
+    "\\begin{equation}\n",
+    "\\mathbf{A}\\cdot\\mathbf{x} = \\mathbf{b}\n",
+    "\\end{equation}\n",
+    "\n",
+    "given the Cholesky factorization of $\\mathbf{A}$. As opposed to `scipy`, the function simply takes the Cholesky-factorised matrix, $\\mathbf{A}$, and $\\mathbf{b}$ as inputs."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "array([-0.01388888888888906, -0.6458333333333331, 2.677083333333333, -0.01041666666666667], dtype=float64)\n\n\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "A = np.array([[3, 0, 0, 0], [2, 1, 0, 0], [1, 0, 1, 0], [1, 2, 1, 8]])\n",
+    "b = np.array([4, 2, 4, 2])\n",
+    "\n",
+    "print(spy.linalg.cho_solve(A, b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## solve_triangular\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.solve_triangular.html \n",
+    "\n",
+    "Solve the linear equation \n",
+    "\n",
+    "\\begin{equation}\n",
+    "\\mathbf{a}\\cdot\\mathbf{x} = \\mathbf{b}\n",
+    "\\end{equation}\n",
+    "\n",
+    "with the assumption that $\\mathbf{a}$ is a triangular matrix. The two position arguments are $\\mathbf{a}$, and $\\mathbf{b}$, and the optional keyword argument is `lower` with a default value of `False`. `lower` determines, whether data are taken from the lower, or upper triangle of $\\mathbf{a}$. \n",
+    "\n",
+    "Note that $\\mathbf{a}$ itself does not have to be a triangular matrix: if it is not, then the values are simply taken to be 0 in the upper or lower triangle, as dictated by `lower`. However, $\\mathbf{a}\\cdot\\mathbf{x}$ will yield $\\mathbf{b}$ only, when $\\mathbf{a}$ is triangular. You should keep this in mind, when trying to establish the validity of the solution by back substitution."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-05-09T05:56:57.449996Z",
+     "start_time": "2021-05-09T05:56:57.422515Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      "\n",
+      "array([[3.0, 0.0, 0.0, 0.0],\n",
+      "       [2.0, 1.0, 0.0, 0.0],\n",
+      "       [1.0, 0.0, 1.0, 0.0],\n",
+      "       [1.0, 2.0, 1.0, 8.0]], dtype=float64)\n",
+      "\n",
+      "b:  array([4.0, 2.0, 4.0, 2.0], dtype=float64)\n",
+      "====================\n",
+      "x:  array([1.333333333333333, -0.6666666666666665, 2.666666666666667, -0.08333333333333337], dtype=float64)\n",
+      "\n",
+      "dot(a, x):  array([4.0, 2.0, 4.0, 2.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "a = np.array([[3, 0, 0, 0], [2, 1, 0, 0], [1, 0, 1, 0], [1, 2, 1, 8]])\n",
+    "b = np.array([4, 2, 4, 2])\n",
+    "\n",
+    "print('a:\\n')\n",
+    "print(a)\n",
+    "print('\\nb: ', b)\n",
+    "\n",
+    "x = spy.linalg.solve_triangular(a, b, lower=True)\n",
+    "\n",
+    "print('='*20)\n",
+    "print('x: ', x)\n",
+    "print('\\ndot(a, x): ', np.dot(a, x))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With get the same solution, $\\mathbf{x}$, with the following matrix, but the dot product of $\\mathbf{a}$, and $\\mathbf{x}$ is no longer $\\mathbf{b}$:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-05-09T06:03:30.853054Z",
+     "start_time": "2021-05-09T06:03:30.841500Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      "\n",
+      "array([[3.0, 2.0, 1.0, 0.0],\n",
+      "       [2.0, 1.0, 0.0, 1.0],\n",
+      "       [1.0, 0.0, 1.0, 4.0],\n",
+      "       [1.0, 2.0, 1.0, 8.0]], dtype=float64)\n",
+      "\n",
+      "b:  array([4.0, 2.0, 4.0, 2.0], dtype=float64)\n",
+      "====================\n",
+      "x:  array([1.333333333333333, -0.6666666666666665, 2.666666666666667, -0.08333333333333337], dtype=float64)\n",
+      "\n",
+      "dot(a, x):  array([5.333333333333334, 1.916666666666666, 3.666666666666667, 2.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "a = np.array([[3, 2, 1, 0], [2, 1, 0, 1], [1, 0, 1, 4], [1, 2, 1, 8]])\n",
+    "b = np.array([4, 2, 4, 2])\n",
+    "\n",
+    "print('a:\\n')\n",
+    "print(a)\n",
+    "print('\\nb: ', b)\n",
+    "\n",
+    "x = spy.linalg.solve_triangular(a, b, lower=True)\n",
+    "\n",
+    "print('='*20)\n",
+    "print('x: ', x)\n",
+    "print('\\ndot(a, x): ', np.dot(a, x))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/tulip/shared/ulab/docs/scipy-optimize.ipynb b/tulip/shared/ulab/docs/scipy-optimize.ipynb
new file mode 100644
index 000000000..eea97b7e8
--- /dev/null
+++ b/tulip/shared/ulab/docs/scipy-optimize.ipynb
@@ -0,0 +1,515 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:50:51.417613Z",
+     "start_time": "2021-01-08T12:50:51.208257Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:50:52.581876Z",
+     "start_time": "2021-01-08T12:50:52.567901Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:50:53.516712Z",
+     "start_time": "2021-01-08T12:50:53.454984Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# scipy.optimize\n",
+    "\n",
+    "Functions in the `optimize` module can be called by prepending them by `scipy.optimize.`. The module defines the following three functions:\n",
+    "\n",
+    "1. [scipy.optimize.bisect](#bisect)\n",
+    "1. [scipy.optimize.fmin](#fmin)\n",
+    "1. [scipy.optimize.newton](#newton)\n",
+    "\n",
+    "Note that routines that work with user-defined functions still have to call the underlying `python` code, and therefore, gains in speed are not as significant as with other vectorised operations. As a rule of thumb, a factor of two can be expected, when compared to an optimised `python` implementation."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## bisect \n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.bisect.html\n",
+    "\n",
+    "`bisect` finds the root of a function of one variable using a simple bisection routine. It takes three positional arguments, the function itself, and two starting points. The function must have opposite signs\n",
+    "at the starting points. Returned is the position of the root.\n",
+    "\n",
+    "Two keyword arguments, `xtol`, and `maxiter` can be supplied to control the accuracy, and the number of bisections, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:58:28.444300Z",
+     "start_time": "2021-01-08T12:58:28.421989Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9999997615814209\n",
+      "only 8 bisections:  0.984375\n",
+      "with 0.1 accuracy:  0.9375\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "    \n",
+    "def f(x):\n",
+    "    return x*x - 1\n",
+    "\n",
+    "print(spy.optimize.bisect(f, 0, 4))\n",
+    "\n",
+    "print('only 8 bisections: ',  spy.optimize.bisect(f, 0, 4, maxiter=8))\n",
+    "\n",
+    "print('with 0.1 accuracy: ',  spy.optimize.bisect(f, 0, 4, xtol=0.1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Performance\n",
+    "\n",
+    "Since the `bisect` routine calls user-defined `python` functions, the speed gain is only about a factor of two, if compared to a purely `python` implementation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:08:24.750562Z",
+     "start_time": "2020-05-19T19:08:24.682959Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bisect running in python\r\n",
+      "execution time:  1270  us\r\n",
+      "bisect running in C\r\n",
+      "execution time:  642  us\r\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "def f(x):\n",
+    "    return (x-1)*(x-1) - 2.0\n",
+    "\n",
+    "def bisect(f, a, b, xtol=2.4e-7, maxiter=100):\n",
+    "    if f(a) * f(b) > 0:\n",
+    "        raise ValueError\n",
+    "\n",
+    "    rtb = a if f(a) < 0.0 else b\n",
+    "    dx = b - a if f(a) < 0.0 else a - b\n",
+    "    for i in range(maxiter):\n",
+    "        dx *= 0.5\n",
+    "        x_mid = rtb + dx\n",
+    "        mid_value = f(x_mid)\n",
+    "        if mid_value < 0:\n",
+    "            rtb = x_mid\n",
+    "        if abs(dx) < xtol:\n",
+    "            break\n",
+    "\n",
+    "    return rtb\n",
+    "\n",
+    "@timeit\n",
+    "def bisect_scipy(f, a, b):\n",
+    "    return spy.optimize.bisect(f, a, b)\n",
+    "\n",
+    "@timeit\n",
+    "def bisect_timed(f, a, b):\n",
+    "    return bisect(f, a, b)\n",
+    "\n",
+    "print('bisect running in python')\n",
+    "bisect_timed(f, 3, 2)\n",
+    "\n",
+    "print('bisect running in C')\n",
+    "bisect_scipy(f, 3, 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## fmin\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin.html\n",
+    "\n",
+    "The `fmin` function finds the position of the minimum of a user-defined function by using the downhill simplex method. Requires two positional arguments, the function, and the initial value. Three keyword arguments, `xatol`, `fatol`, and `maxiter` stipulate conditions for stopping."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:00:26.729947Z",
+     "start_time": "2021-01-08T13:00:26.702748Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9996093749999952\n",
+      "1.199999999999996\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "def f(x):\n",
+    "    return (x-1)**2 - 1\n",
+    "\n",
+    "print(spy.optimize.fmin(f, 3.0))\n",
+    "print(spy.optimize.fmin(f, 3.0, xatol=0.1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## newton\n",
+    "\n",
+    "`scipy`:https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.newton.html\n",
+    "\n",
+    "`newton` finds a zero of a real, user-defined function using the Newton-Raphson (or secant or Halley’s) method. The routine requires two positional arguments, the function, and the initial value. Three keyword\n",
+    "arguments can be supplied to control the iteration. These are the absolute and relative tolerances `tol`, and `rtol`, respectively, and the number of iterations before stopping, `maxiter`. The function retuns a single scalar, the position of the root."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:56:35.139958Z",
+     "start_time": "2021-01-08T12:56:35.119712Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.260135727246117\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "    \n",
+    "def f(x):\n",
+    "    return x*x*x - 2.0\n",
+    "\n",
+    "print(spy.optimize.newton(f, 3., tol=0.001, rtol=0.01))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/scipy-signal.ipynb b/tulip/shared/ulab/docs/scipy-signal.ipynb
new file mode 100644
index 000000000..ec10d2e62
--- /dev/null
+++ b/tulip/shared/ulab/docs/scipy-signal.ipynb
@@ -0,0 +1,387 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:11:12.111639Z",
+     "start_time": "2021-01-12T16:11:11.914041Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T20:50:20.813162Z",
+     "start_time": "2022-01-29T20:50:20.794562Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T20:50:21.613220Z",
+     "start_time": "2022-01-29T20:50:21.557819Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# scipy.signal\n",
+    "\n",
+    "This module defines the single function:\n",
+    "\n",
+    "1. [scipy.signal.sosfilt](#sosfilt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sosfilt\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.sosfilt.html \n",
+    "\n",
+    "Filter data along one dimension using cascaded second-order sections.\n",
+    "\n",
+    "The function takes two positional arguments, `sos`, the filter segments of length 6, and the one-dimensional, uniformly sampled data set to be filtered. Returns the filtered data, or the filtered data and the final filter delays, if the `zi` keyword arguments is supplied. The keyword argument must be a float `ndarray` of shape `(n_sections, 2)`. If `zi` is not passed to the function, the initial values are assumed to be 0."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-19T20:24:10.529668Z",
+     "start_time": "2020-06-19T20:24:10.520389Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "y:  array([0.0, 1.0, -4.0, 24.0, -104.0, 440.0, -1728.0, 6532.000000000001, -23848.0, 84864.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
+    "sos = [[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]]\n",
+    "y = spy.signal.sosfilt(sos, x)\n",
+    "print('y: ', y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-19T20:27:39.508508Z",
+     "start_time": "2020-06-19T20:27:39.498256Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "y:  array([4.0, -16.0, 63.00000000000001, -227.0, 802.9999999999999, -2751.0, 9271.000000000001, -30775.0, 101067.0, -328991.0000000001], dtype=float)\n",
+      "\n",
+      "========================================\n",
+      "zf:  array([[37242.0, 74835.0],\n",
+      "\t [1026187.0, 1936542.0]], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
+    "sos = [[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]]\n",
+    "# initial conditions of the filter\n",
+    "zi = np.array([[1, 2], [3, 4]])\n",
+    "\n",
+    "y, zf = spy.signal.sosfilt(sos, x, zi=zi)\n",
+    "print('y: ', y)\n",
+    "print('\\n' + '='*40 + '\\nzf: ', zf)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/scipy-special.ipynb b/tulip/shared/ulab/docs/scipy-special.ipynb
new file mode 100644
index 000000000..c3a0cf849
--- /dev/null
+++ b/tulip/shared/ulab/docs/scipy-special.ipynb
@@ -0,0 +1,344 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:54:58.722373Z",
+     "start_time": "2021-01-13T18:54:57.178438Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:57:41.555892Z",
+     "start_time": "2021-01-13T18:57:41.551121Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T18:57:42.313231Z",
+     "start_time": "2021-01-13T18:57:42.288402Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# scipy.special\n",
+    "\n",
+    "`scipy`'s `special` module defines several functions that behave as do the standard mathematical functions of the `numpy`, i.e., they can be called on any scalar, scalar-valued iterable (ranges, lists, tuples containing numbers), and on `ndarray`s without having to change the call signature. In all cases the functions return a new `ndarray` of typecode `float` (since these functions usually generate float values, anyway). \n",
+    "\n",
+    "At present, `ulab`'s `special` module contains the following functions:\n",
+    "\n",
+    "`erf`, `erfc`, `gamma`, and `gammaln`, and they can be called by prepending them by `scipy.special.`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T19:06:54.640444Z",
+     "start_time": "2021-01-13T19:06:54.623467Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  range(0, 9)\n",
+      "array([0.0, 0.8427007929497149, 0.9953222650189527, 0.9999779095030014, 0.9999999845827421, 1.0, 1.0, 1.0, 1.0], dtype=float64)\n",
+      "\n",
+      "b:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "array([1.0, 0.1572992070502851, 0.004677734981047265, 2.209049699858544e-05, 1.541725790028002e-08, 1.537459794428035e-12, 2.151973671249892e-17, 4.183825607779414e-23, 1.122429717298293e-29], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "a = range(9)\n",
+    "b = np.array(a)\n",
+    "\n",
+    "print('a: ', a)\n",
+    "print(spy.special.erf(a))\n",
+    "\n",
+    "print('\\nb: ', b)\n",
+    "print(spy.special.erfc(b))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/templates/manual.tpl b/tulip/shared/ulab/docs/templates/manual.tpl
new file mode 100644
index 000000000..ba6b73e90
--- /dev/null
+++ b/tulip/shared/ulab/docs/templates/manual.tpl
@@ -0,0 +1,113 @@
+
+{%- extends 'display_priority.tpl' -%}
+
+
+{% block in_prompt %}
+{% endblock in_prompt %}
+
+{% block output_prompt %}
+{% endblock output_prompt %}
+
+{% block input scoped%}
+
+{%- if cell.source.split('\n')[0].startswith('%%micropython') -%}
+.. code::
+        
+{{ '\n'.join(['# code to be run in micropython'] + cell.source.strip().split('\n')[1:]) | indent}}
+
+{%- else -%}
+.. code::
+
+{{ '\n'.join(['# code to be run in CPython\n'] + cell.source.strip().split('\n')) | indent}}
+{%- endif -%}
+{% endblock input %}
+
+{% block error %}
+::
+
+{{ super() }}
+{% endblock error %}
+
+{% block traceback_line %}
+{{ line | indent | strip_ansi }}
+{% endblock traceback_line %}
+
+{% block execute_result %}
+{% block data_priority scoped %}
+{{ super() }}
+{% endblock %}
+{% endblock execute_result %}
+
+{% block stream %}
+.. parsed-literal::
+
+{{ output.text | indent }}
+{% endblock stream %}
+
+{% block data_svg %}
+.. image:: {{ output.metadata.filenames['image/svg+xml'] | urlencode }}
+{% endblock data_svg %}
+
+{% block data_png %}
+.. image:: {{ output.metadata.filenames['image/png'] | urlencode }}
+{%- set width=output | get_metadata('width', 'image/png') -%}
+{%- if width is not none %}
+   :width: {{ width }}px
+{%- endif %}
+{%- set height=output | get_metadata('height', 'image/png') -%}
+{%- if height is not none %}
+   :height: {{ height }}px
+{%- endif %}
+{% endblock data_png %}
+
+{% block data_jpg %}
+.. image:: {{ output.metadata.filenames['image/jpeg'] | urlencode }}
+{%- set width=output | get_metadata('width', 'image/jpeg') -%}
+{%- if width is not none %}
+   :width: {{ width }}px
+{%- endif %}
+{%- set height=output | get_metadata('height', 'image/jpeg') -%}
+{%- if height is not none %}
+   :height: {{ height }}px
+{%- endif %}
+{% endblock data_jpg %}
+
+{% block data_markdown %}
+{{ output.data['text/markdown'] | convert_pandoc("markdown", "rst") }}
+{% endblock data_markdown %}
+
+{% block data_latex %}
+.. math::
+
+{{ output.data['text/latex'] | strip_dollars | indent }}
+{% endblock data_latex %}
+
+{% block data_text scoped %}
+.. parsed-literal::
+
+{{ output.data['text/plain'] | indent }}
+{% endblock data_text %}
+
+{% block data_html scoped %}
+.. raw:: html
+
+{{ output.data['text/html'] | indent }}
+{% endblock data_html %}
+
+{% block markdowncell scoped %}
+{{ cell.source | convert_pandoc("markdown", "rst") }}
+{% endblock markdowncell %}
+
+{%- block rawcell scoped -%}
+{%- if cell.metadata.get('raw_mimetype', '').lower() in resources.get('raw_mimetypes', ['']) %}
+{{cell.source}}
+{% endif -%}
+{%- endblock rawcell -%}
+
+{% block headingcell scoped %}
+{{ ("#" * cell.level + cell.source) | replace('\n', ' ') | convert_pandoc("markdown", "rst") }}
+{% endblock headingcell %}
+
+{% block unknowncell scoped %}
+unknown type  {{cell.type}}
+{% endblock unknowncell %}
diff --git a/tulip/shared/ulab/docs/templates/rst.tpl b/tulip/shared/ulab/docs/templates/rst.tpl
new file mode 100644
index 000000000..479a69fcb
--- /dev/null
+++ b/tulip/shared/ulab/docs/templates/rst.tpl
@@ -0,0 +1,144 @@
+
+{%- extends 'display_priority.tpl' -%}
+
+
+{% block in_prompt %}
+{% endblock in_prompt %}
+
+{% block output_prompt %}
+{% endblock output_prompt %}
+
+{% block input scoped%}
+
+{%- if '%%ccode' in cell.source.strip().split('\n')[0] -%}
+
+{{ 'https://github.com/v923z/micropython-ulab/tree/master/code/' + cell.source.strip().split('\n')[0].split()[-1] }}
+
+.. code:: cpp
+        
+{{ '\n'.join( cell.source.strip().split('\n')[1:] ) | indent }}
+
+{%- elif '%%makefile' in cell.source.strip().split('\n')[0] -%}
+
+{{ 'https://github.com/v923z/micropython-ulab/tree/master/code/' + cell.source.strip().split('\n')[0].split()[-1].split('/')[1] + '/micropython.mk' }}
+
+.. code:: make
+        
+{{ '\n'.join( cell.source.strip().split('\n')[1:] ) | indent }}
+
+{%- elif cell.source.strip().split('\n')[0].startswith('!') -%}
+
+.. code:: bash
+
+{{ cell.source | indent }}
+
+{%- else -%}
+{%- if 'magics_language' in cell.metadata  -%}
+    {{ cell.metadata.magics_language}}
+{%- elif 'pygments_lexer' in nb.metadata.get('language_info', {}) -%}
+    {{ nb.metadata.language_info.pygments_lexer }}
+{%- elif 'name' in nb.metadata.get('language_info', {}) -%}
+    {{ nb.metadata.language_info.name }}
+{%- endif -%}
+
+.. code ::
+        
+{{ cell.source | indent}}
+{%- endif -%}
+{% endblock input %}
+
+{% block error %}
+::
+
+{{ super() }}
+{% endblock error %}
+
+{% block traceback_line %}
+{{ line | indent | strip_ansi }}
+{% endblock traceback_line %}
+
+{% block execute_result %}
+{% block data_priority scoped %}
+{{ super() }}
+{% endblock %}
+{% endblock execute_result %}
+
+{% block stream %}
+{%- if '%%ccode' in cell.source.strip().split('\n')[0] -%}
+{%- else -%}
+
+.. parsed-literal::
+
+{{ output.text | indent }}
+{%- endif -%}
+{% endblock stream %}
+
+{% block data_svg %}
+.. image:: {{ output.metadata.filenames['image/svg+xml'] | urlencode }}
+{% endblock data_svg %}
+
+{% block data_png %}
+.. image:: {{ output.metadata.filenames['image/png'] | urlencode }}
+{%- set width=output | get_metadata('width', 'image/png') -%}
+{%- if width is not none %}
+   :width: {{ width }}px
+{%- endif %}
+{%- set height=output | get_metadata('height', 'image/png') -%}
+{%- if height is not none %}
+   :height: {{ height }}px
+{%- endif %}
+{% endblock data_png %}
+
+{% block data_jpg %}
+.. image:: {{ output.metadata.filenames['image/jpeg'] | urlencode }}
+{%- set width=output | get_metadata('width', 'image/jpeg') -%}
+{%- if width is not none %}
+   :width: {{ width }}px
+{%- endif %}
+{%- set height=output | get_metadata('height', 'image/jpeg') -%}
+{%- if height is not none %}
+   :height: {{ height }}px
+{%- endif %}
+{% endblock data_jpg %}
+
+{% block data_markdown %}
+{{ output.data['text/markdown'] | convert_pandoc("markdown", "rst") }}
+{% endblock data_markdown %}
+
+{% block data_latex %}
+.. math::
+
+{{ output.data['text/latex'] | strip_dollars | indent }}
+{% endblock data_latex %}
+
+{% block data_text scoped %}
+
+.. parsed-literal::
+
+{{ output.data['text/plain'] | indent }}
+{% endblock data_text %}
+
+{% block data_html scoped %}
+.. raw:: html
+
+{{ output.data['text/html'] | indent }}
+{% endblock data_html %}
+
+{% block markdowncell scoped %}
+{{ cell.source | convert_pandoc("markdown", "rst") }}
+{% endblock markdowncell %}
+
+{%- block rawcell scoped -%}
+{%- if cell.metadata.get('raw_mimetype', '').lower() in resources.get('raw_mimetypes', ['']) %}
+{{cell.source}}
+{% endif -%}
+{%- endblock rawcell -%}
+
+{% block headingcell scoped %}
+{{ ("#" * cell.level + cell.source) | replace('\n', ' ') | convert_pandoc("markdown", "rst") }}
+
+{% endblock headingcell %}
+
+{% block unknowncell scoped %}
+unknown type  {{cell.type}}
+{% endblock unknowncell %}
diff --git a/tulip/shared/ulab/docs/ulab-approx.ipynb b/tulip/shared/ulab/docs/ulab-approx.ipynb
new file mode 100644
index 000000000..52dc205e4
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-approx.ipynb
@@ -0,0 +1,613 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:50:51.417613Z",
+     "start_time": "2021-01-08T12:50:51.208257Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:50:52.581876Z",
+     "start_time": "2021-01-08T12:50:52.567901Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:50:53.516712Z",
+     "start_time": "2021-01-08T12:50:53.454984Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Approximation methods\n",
+    "\n",
+    "`ulab` implements five functions that can be used for interpolating, root finding, and minimising arbitrary `python` functions in one dimension. Two of these functions, namely, `interp`, and `trapz` are defined in `numpy`, while the other three are parts of `scipy`'s `optimize` module. \n",
+    "\n",
+    "Note that routines that work with user-defined functions still have to call the underlying `python` code, and therefore, gains in speed are not as significant as with other vectorised operations. As a rule of thumb, a factor of two can be expected, when compared to an optimised `python` implementation."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## interp\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/numpy.interp\n",
+    "\n",
+    "The `interp` function returns the linearly interpolated values of a one-dimensional numerical array. It requires three positional arguments,`x`, at which the interpolated values are evaluated, `xp`, the array\n",
+    "of the independent data variable, and `fp`, the array of the dependent values of the data. `xp` must be a monotonically increasing sequence of numbers.\n",
+    "\n",
+    "Two keyword arguments, `left`, and `right` can also be supplied; these determine the return values, if `x < xp[0]`, and `x > xp[-1]`, respectively. If these arguments are not supplied, `left`, and `right` default to `fp[0]`, and `fp[-1]`, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:54:58.895801Z",
+     "start_time": "2021-01-08T12:54:58.869338Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0.8, 1.8, 2.8, 3.8, 4.8], dtype=float64)\n",
+      "array([1.0, 1.8, 2.8, 4.6, 5.0], dtype=float64)\n",
+      "array([0.0, 1.8, 2.8, 4.6, 5.0], dtype=float64)\n",
+      "array([1.0, 1.8, 2.8, 4.6, 10.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.array([1, 2, 3, 4, 5]) - 0.2\n",
+    "xp = np.array([1, 2, 3, 4])\n",
+    "fp = np.array([1, 2, 3, 5])\n",
+    "\n",
+    "print(x)\n",
+    "print(np.interp(x, xp, fp))\n",
+    "print(np.interp(x, xp, fp, left=0.0))\n",
+    "print(np.interp(x, xp, fp, right=10.0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## newton\n",
+    "\n",
+    "`scipy`:https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.newton.html\n",
+    "\n",
+    "`newton` finds a zero of a real, user-defined function using the Newton-Raphson (or secant or Halley’s) method. The routine requires two positional arguments, the function, and the initial value. Three keyword\n",
+    "arguments can be supplied to control the iteration. These are the absolute and relative tolerances `tol`, and `rtol`, respectively, and the number of iterations before stopping, `maxiter`. The function retuns a single scalar, the position of the root."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:56:35.139958Z",
+     "start_time": "2021-01-08T12:56:35.119712Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.260135727246117\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "    \n",
+    "def f(x):\n",
+    "    return x*x*x - 2.0\n",
+    "\n",
+    "print(spy.optimize.newton(f, 3., tol=0.001, rtol=0.01))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## bisect \n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.bisect.html\n",
+    "\n",
+    "`bisect` finds the root of a function of one variable using a simple bisection routine. It takes three positional arguments, the function itself, and two starting points. The function must have opposite signs\n",
+    "at the starting points. Returned is the position of the root.\n",
+    "\n",
+    "Two keyword arguments, `xtol`, and `maxiter` can be supplied to control the accuracy, and the number of bisections, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:58:28.444300Z",
+     "start_time": "2021-01-08T12:58:28.421989Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9999997615814209\n",
+      "only 8 bisections:  0.984375\n",
+      "with 0.1 accuracy:  0.9375\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "    \n",
+    "def f(x):\n",
+    "    return x*x - 1\n",
+    "\n",
+    "print(spy.optimize.bisect(f, 0, 4))\n",
+    "\n",
+    "print('only 8 bisections: ',  spy.optimize.bisect(f, 0, 4, maxiter=8))\n",
+    "\n",
+    "print('with 0.1 accuracy: ',  spy.optimize.bisect(f, 0, 4, xtol=0.1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Performance\n",
+    "\n",
+    "Since the `bisect` routine calls user-defined `python` functions, the speed gain is only about a factor of two, if compared to a purely `python` implementation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:08:24.750562Z",
+     "start_time": "2020-05-19T19:08:24.682959Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "bisect running in python\r\n",
+      "execution time:  1270  us\r\n",
+      "bisect running in C\r\n",
+      "execution time:  642  us\r\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "def f(x):\n",
+    "    return (x-1)*(x-1) - 2.0\n",
+    "\n",
+    "def bisect(f, a, b, xtol=2.4e-7, maxiter=100):\n",
+    "    if f(a) * f(b) > 0:\n",
+    "        raise ValueError\n",
+    "\n",
+    "    rtb = a if f(a) < 0.0 else b\n",
+    "    dx = b - a if f(a) < 0.0 else a - b\n",
+    "    for i in range(maxiter):\n",
+    "        dx *= 0.5\n",
+    "        x_mid = rtb + dx\n",
+    "        mid_value = f(x_mid)\n",
+    "        if mid_value < 0:\n",
+    "            rtb = x_mid\n",
+    "        if abs(dx) < xtol:\n",
+    "            break\n",
+    "\n",
+    "    return rtb\n",
+    "\n",
+    "@timeit\n",
+    "def bisect_scipy(f, a, b):\n",
+    "    return spy.optimize.bisect(f, a, b)\n",
+    "\n",
+    "@timeit\n",
+    "def bisect_timed(f, a, b):\n",
+    "    return bisect(f, a, b)\n",
+    "\n",
+    "print('bisect running in python')\n",
+    "bisect_timed(f, 3, 2)\n",
+    "\n",
+    "print('bisect running in C')\n",
+    "bisect_scipy(f, 3, 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## fmin\n",
+    "\n",
+    "`scipy`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin.html\n",
+    "\n",
+    "The `fmin` function finds the position of the minimum of a user-defined function by using the downhill simplex method. Requires two positional arguments, the function, and the initial value. Three keyword arguments, `xatol`, `fatol`, and `maxiter` stipulate conditions for stopping."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:00:26.729947Z",
+     "start_time": "2021-01-08T13:00:26.702748Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9996093749999952\n",
+      "1.199999999999996\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "def f(x):\n",
+    "    return (x-1)**2 - 1\n",
+    "\n",
+    "print(spy.optimize.fmin(f, 3.0))\n",
+    "print(spy.optimize.fmin(f, 3.0, xatol=0.1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## trapz\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.trapz.html\n",
+    "\n",
+    "The function takes one or two one-dimensional `ndarray`s, and integrates the dependent values (`y`) using the trapezoidal rule. If the independent variable (`x`) is given, that is taken as the sample points corresponding to `y`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:01:29.515166Z",
+     "start_time": "2021-01-08T13:01:29.494285Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "x:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], dtype=float64)\n",
+      "y:  array([0.0, 1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0, 81.0], dtype=float64)\n",
+      "============================\n",
+      "integral of y:  244.5\n",
+      "integral of y at x:  244.5\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.linspace(0, 9, num=10)\n",
+    "y = x*x\n",
+    "\n",
+    "print('x: ',  x)\n",
+    "print('y: ',  y)\n",
+    "print('============================')\n",
+    "print('integral of y: ', np.trapz(y))\n",
+    "print('integral of y at x: ', np.trapz(y, x=x))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-change-log.md b/tulip/shared/ulab/docs/ulab-change-log.md
new file mode 100644
index 000000000..c857f96d5
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-change-log.md
@@ -0,0 +1,1261 @@
+Sun, 24 Nov 2024
+
+version 6.6.1
+
+    fix compilation error, for complexes
+
+Wed, 9 Oct 2024
+
+version 6.6.0
+
+    add numpy.take
+
+Sat, 14 Sep 2024
+
+version 6.5.5
+
+    add scratchpad, out, log keyword arguments to spectrum
+
+Sat, 14 Sep 2024
+
+version 6.5.4
+
+    fix roll, when shift is 0
+
+Wed, 6 Mar 2024
+
+version 6.5.2
+
+    allow loadtxt to parse numbers, even if built-in complexes are not supported
+
+Tue, 9 Jan 2024
+
+version 6.5.0
+
+    add random module
+
+Mon, 25 Dec 2023
+
+version 6.4.3
+
+    fix the 'np.delete' error that occurs when passing an empty iterable object as the second positional argument (#653)
+
+Thu, 11 Dec 2023
+
+version 6.4.2
+
+    fix upcasting with two uint8 operands (#650)
+
+Thu, 10 Aug 2023
+
+version 6.4.1
+
+  fix BOOLEAN issue, which would cause numpy.where funciton abnormally on RP2040(#643)
+
+Thu, 20 Jul 2023
+
+version 6.4.0
+
+    implement AND, OR, and XOR binary operators
+
+Sat, 1 Jul 2023
+
+version 6.3.5
+
+    allow function itertor in math functions with the out keyword
+
+Fri, 12 May 2023
+
+version 6.3.4
+
+    fix compile error when COMPLEX support not enabled
+
+version 6.3.3
+
+    Polyval handles non-array as second argument (#601)
+
+version 6.3.2
+
+    fix out of bound read
+
+version 6.3.1
+
+    fix integer overflows
+
+version 6.3.0
+
+    add bitwise operators
+
+Wed, 17 May 2023
+
+version 6.1.1
+
+    fix ndarray subscription, when value is NULL
+
+Tue, 16 May 2023
+
+version 6.1.0
+
+    add sinc function
+
+Fri, 12 May 2023
+
+version 6.0.13
+
+    add bitwise operators
+
+Sun, 7 May 2023
+
+version 6.0.12
+
+    ndarray_from_mp_obj correctly treats Boolean arguments
+
+Sat, 6 May 2023
+
+version 6.0.11
+
+    .reshape can now interpret unknown shape dimension
+
+Sat, 6 May 2023
+
+version 6.0.10
+
+    fix binary division
+
+Sun, 21 Jan 2023
+
+version 6.0.6
+
+    raise proper exception in arange
+
+Sun, 21 Jan 2023
+
+version 6.0.7
+
+    treat empty arrays in sort_complex correctly
+
+Sun, 21 Jan 2023
+
+version 6.0.5
+
+    fix ones()/zeros() method when the amount of memory to allocate overflows
+
+Sun, 15 Jan 2023
+
+version 6.0.4
+
+    fix dot function
+
+Sat, 14 Jan 2023
+
+version 6.0.3
+
+    fix how concatenate deals with scalar inputs
+
+Tue, 3 Jan 2023
+
+version 6.0.2
+
+    fix vectorize
+
+Sat, 5 Nov 2022
+
+version 6.0.1
+
+    fix fft.ifft
+
+Wed, 21 Sep 2022
+
+version 6.0.0
+
+    bring ulab in line with the latest version of micropython
+
+Thu, 4 Aug 2022
+
+version 5.1.1
+
+    fix how arctan2 treats scalars
+
+Mon, 25 July 2022
+
+version 5.1.0
+
+    add nonzero
+
+Mon, 16 May 2022
+
+version 5.0.7
+
+    fix in-place assignment from slices
+
+Thu, 14 Apr 2022
+
+version 5.0.6
+
+    use m_new0 conditionally
+
+Thu, 14 Apr 2022
+
+version 5.0.5
+
+    fix sorting on empty arrays
+
+Fri, 18 Feb 2022
+
+version 5.0.4
+
+    fix the handling of empty arrays in binary_op
+
+Thu, 10 Feb 2022
+
+version 5.0.3
+
+    fix complex slicing
+
+Tue, 8 Feb 2022
+
+version 5.0.2
+
+    fix np.diag
+
+Thu, 3 Feb 2022
+
+version 5.0.1
+
+    add optional ULAB_HASH string
+
+Tue, 1 Feb 2022
+
+version 5.0.0
+
+    move scipy.signal.spectrogram to utils.spectrogram
+
+Tue, 1 Feb 2022
+
+version 4.4.2
+
+    add skiprows keyword to loadtxt
+
+Sat, 29 Jan 2022
+
+version 4.4.1
+
+    add dtype keyword to loadtxt
+
+Thu, 27 Jan 2022
+
+version 4.4.0
+
+    implement numpy.savetxt, numpy.loadtxt
+
+Tue, 15 Jan 2022
+
+version 4.3.2
+
+    fix rp2 port compilation
+
+Wed, 19 Jan 2022
+
+version 4.3.1
+
+    fix signal.sosfilt
+
+Wed, 19 Jan 2022
+
+version 4.3.0
+
+    implement numpy.save, numpy.load
+
+Tue, 18 Jan 2022
+
+version 4.2.1
+
+    fix ndarray_copy_view for Boolean dtypes
+
+Fri, 14 Jan 2022
+
+version 4.2.0
+
+    add numpy.size, asarray
+
+Wed, 12 Jan 2022
+
+    version 4.2.0
+    
+    implement numpy.save, numpy.load
+
+Wed, 12 Jan 2022
+
+version 4.1.1
+
+    fix complex printout for long arrays
+
+Wed, 12 Jan 2022
+
+version 4.1.0
+
+    add numpy.delete
+
+Sat, 8 Jan 2022
+
+version 4.0.0
+
+    add complex support, .tolist() method, .imag, .real array properties, compress, conjugate, imag, real, sort_complex functions
+
+Fri, 3 Dec 2021
+
+version 3.3.8
+
+    fix any/all function
+
+Tue, 30 Nov 2021
+
+version 3.3.7
+
+    fix sum() for integer/Boolean types
+
+Sat, 20 Nov 2021
+
+version 3.3.6
+
+    fix .shape for arrays of zero length (#454)
+
+Sun, 07 Nov 2021
+
+version 3.3.5
+
+    fix cast in numpy/compare.c:compare_function()
+
+Sat, 07 Aug 2021
+
+version 3.3.4
+
+    change default keyword value in linalg.qr
+
+Fri, 23 Jul 2021
+
+version 3.3.3
+
+    fix compilation for one dimension
+
+Thu, 22 Jul 2021
+
+version 3.3.2
+
+    fix compilation error on SAMD devices
+
+Thu, 22 Jul 2021
+
+version 3.3.1
+
+    fix sum for 4D arrays
+
+Thu, 22 Jul 2021
+
+version 3.3.0
+
+    add QR decomposition
+
+Tue, 13 Jul 2021
+
+version 3.2.0
+
+    add flatiter/flat to ndarray methods
+
+Tue, 22 Jun 2021
+
+version 3.1.1
+
+    fix float comparison in scipy/linalg.c
+
+Sat, 19 Jun 2021
+
+version 3.1.0
+
+    ndarray.shape can now be assigned to
+
+Thu, 17 Jun 2021
+
+version 3.0.1
+
+    add the .T ndarray property
+
+Wed, 9 Jun 2021
+
+version 3.0.0
+
+    implement property getter/setter for micropython
+
+Thu, 3 Jun 2021
+
+version 2.9.0
+
+    add empty as alias for zeros
+
+Thu, 3 Jun 2021
+
+version 2.8.8
+
+    allow functions in approx to take iterables as argument
+
+Thu, 3 Jun 2021
+
+version 2.8.7
+
+    simplify vectorised function code
+
+Wed, 2 Jun 2021
+
+version 2.8.6
+
+    factor out array creation from iterables, so that generic iterables can be passed to numerical functions
+
+Tue, 1 Jun 2021
+
+version 2.8.5
+
+    fix upcasting rules for ndarray + scalar
+
+Mon, 31 May 2021
+
+version 2.8.4
+
+    initialise arange values via macro
+
+Mon, 24 May 2021
+
+version 2.8.3
+
+    fix nan return value
+
+Sat, 22 May 2021
+
+version 2.8.2
+
+    fix all/any/median for empty arrays
+
+Tue, 18 May 2021
+
+version 2.8.1
+
+    fix array initialisation/print with empty iterables
+
+Sun, 16 May 2021
+
+version 2.8.0
+
+    added cho_solve function in scipy.linalg module
+
+Thu, 13 May 2021
+
+version 2.7.1
+
+    fix garbage collection problem
+
+Wed, 5 May 2021
+
+version 2.7.0
+
+    added linalg module in scipy with solve_triangular function
+
+Mon, 26 Apr 2021
+
+version 2.6.2
+
+    fix optimize zero condition
+
+Sat, 23 Apr 2021
+
+version 2.6.1
+
+    fix implementation of math constants
+
+
+Mon, 22 Mar 2021
+
+version 2.6.0
+
+    add where function
+
+Mon, 8 Mar 2021
+
+version 2.5.1
+
+    fix linspace/logspace/arange for Boolean dtypes
+
+Wed, 03 Mar 2021
+
+version 2.5.0
+
+    added utils sub-module with from_intbuffer function
+
+Tue, 23 Feb 2021
+
+version 2.4.5
+
+    fix dot function
+
+Sun, 21 Feb 2021
+
+version 2.4.3
+
+    re-introduce ndarray_get_buffer, and buffer protocol
+
+Sun, 21 Feb 2021
+
+version 2.4.2
+
+    fix ndarray_is_dense, eye, ones, full, and zeros for Boolean type
+
+Sat, 13 Feb 2021
+
+version 2.4.1
+
+    fixed dot error
+
+Fri, 12 Feb 2021
+
+version 2.4.0
+
+    added byteswap method
+
+Sun, 14 Feb 2021
+
+version 2.3.7
+
+    fixed frombuffer implementation glitch
+
+Sat, 13 Feb 2021
+
+version 2.3.6
+
+    moved trace and dot to the top level
+
+Wed, 10 Feb 2021
+
+version 2.3.5
+
+    fixed invisible error in tools_reduce_axes, simplified the implementation of all/any
+
+Tue, 9 Feb 2021
+
+version 2.3.4
+
+    removed redundant exception from linalg.norm, fixed exception message in tools_reduce_axes
+
+Tue, 9 Feb 2021
+
+version 2.3.3
+
+    linalg.norm should now work with the axis keyword argument
+
+Mon, 8 Feb 2021
+
+version 2.3.2
+
+    improved the accuracy of linalg.norm, and extended it to generic iterables
+
+Mon, 8 Feb 2021
+
+version 2.3.1
+
+    partially fix https://github.com/v923z/micropython-ulab/issues/304, and len unary operator
+
+Mon, 8 Feb 2021
+
+version 2.3.0
+
+    added any and all functions
+
+Fri, 29 Jan 2021
+
+version 2.2.0
+
+    added isinf/infinite functions
+
+Fri, 29 Jan 2021
+
+version 2.1.5
+
+    fixed error, when calculating standard deviation of iterables
+
+wed, 27 Jan 2021
+
+version 2.1.4
+
+    arrays can now be initialised from nested iterables
+
+Thu, 21 Jan 2021
+
+version 2.1.3
+
+    added ifndef/endif wrappers in ulab.h
+
+Fri, 15 Jan 2021
+
+version 2.1.2
+
+    fixed small error in frombuffer
+
+Thu, 14 Jan 2021
+
+version 2.1.1
+
+    fixed bad error in diff
+
+Thu, 26 Nov 2020
+
+version 2.1.0
+
+    implemented frombuffer
+
+Tue, 24 Nov 2020
+
+version 2.0.0
+
+    implemented numpy/scipy compatibility
+
+Tue, 24 Nov 2020
+
+version 1.6.0
+
+    added Boolean initialisation option
+
+Mon, 23 Nov 2020
+
+version 1.5.1
+
+    fixed nan definition
+
+version 1.5.0
+
+    added nan/inf class level constants
+
+version 1.4.10
+
+    fixed sosfilt
+
+version 1.4.9
+
+    added in-place sort
+
+version 1.4.8
+
+    fixed convolve
+
+version 1.4.7.
+
+    fixed iteration loop in norm
+
+Fri, 20 Nov 2020
+
+version 1.4.6
+
+    fixed interp
+
+Thu, 19 Nov 2020
+
+version 1.4.5
+
+    eliminated fatal micropython error in ndarray_init_helper
+
+version 1.4.4
+
+    fixed min, max
+
+version 1.4.3
+
+    fixed full, zeros, ones
+
+version 1.4.2
+
+    fixed dtype
+
+Wed, 18 Nov 2020
+
+version 1.4.1.
+
+    fixed std
+
+version 1.4.0
+
+    removed size from linalg
+
+version 1.3.8
+
+    fixed trapz
+
+Tue, 17 Nov 2020
+
+version 1.3.7
+
+    fixed in-place power, in-place divide, roll
+
+Mon, 16 Nov 2020
+
+version 1.3.6
+
+    fixed eye
+
+Mon, 16 Nov 2020
+
+version 1.3.5
+
+    fixed trace
+
+Mon, 16 Nov 2020
+
+version 1.3.4
+
+    fixed clip
+
+Mon, 16 Nov 2020
+
+version 1.3.3
+
+    added function pointer option to some binary operators
+
+Fri, 13 Nov 2020
+
+version 1.3.2
+
+    implemented function pointer option in vectorise
+
+Thu, 12 Nov 2020
+
+version 1.3.1
+
+    factored out some of the math functions in re-usable form
+
+Wed, 11 Nov 2020
+
+version 1.3.0
+
+    added dtype function/method/property
+
+Wed, 11 Nov 2020
+
+version 1.2.8
+
+    improved the accuracy of sum for float types
+
+Wed, 11 Nov 2020
+
+version 1.2.7
+
+    fixed transpose
+    improved the accuracy of trapz
+
+Tue, 10 Nov 2020
+
+version 1.2.6
+
+    fixed slicing
+
+Mon, 9 Nov 2020
+
+version 1.2.5
+
+    fixed array casting glitch in make_new_core
+
+Mon, 9 Nov 2020
+
+version 1.2.4
+
+    sum/mean/std can flatten the arrays now
+
+Tue, 3 Nov 2020
+
+version 1.2.1
+
+    fixed pointer issue in eig, and corrected the docs
+
+Tue, 3 Nov 2020
+
+version 1.2.0
+
+    added median function
+
+Tue, 3 Nov 2020
+
+version 1.1.4
+
+    fixed norm and shape
+
+Mon, 2 Nov 2020
+
+version 1.1.3
+
+    fixed small glitch in diagonal, and ndarray_make_new_core
+
+Sun, 1 Nov 2020
+
+version 1.1.1
+
+    fixed compilation error for 4D
+
+Sat, 31 Oct 2020
+
+version 1.1.0
+
+    added the diagonal function
+
+Fri, 30 Oct 2020
+
+version 1.0.0
+
+    added :
+        support for tensors of rank 4
+        proper broadcasting
+        views
+        .tobytes()
+        concatenate
+        cross
+        full
+        logspace
+        in-place operators
+
+Sat, 25 Oct 2020
+
+version 0.54.5
+
+    wrong type in slices raise TypeError exception
+
+Fri, 23 Oct 2020
+
+version 0.54.4
+
+    fixed indexing error in slices
+
+Mon, 17 Aug 2020
+
+version 0.54.3
+
+    fixed small error in linalg
+
+Mon, 03 Aug 2020
+
+version 0.54.2
+
+    argsort throws an error, if the array is longer than 65535
+
+Wed, 29 Jul 2020
+
+version 0.54.1
+
+    changed to size_t for the length of arrays
+
+Thu, 23 Jul 2020
+
+version 0.54.0
+
+    added norm to linalg
+
+Wed, 22 Jul 2020
+
+version 0.53.2
+
+    added circuitpython documentation stubs to the source files
+
+Wed, 22 Jul 2020
+
+version 0.53.1
+
+    fixed arange with negative steps
+
+Mon, 20 Jul 2020
+
+version 0.53.0
+
+    added arange to create.c
+
+Thu, 16 Jul 2020
+
+version 0.52.0
+
+    added trapz to approx
+
+Mon, 29 Jun 2020
+
+version 0.51.1
+
+    fixed argmin/argmax issue
+
+Fri, 19 Jun 2020
+
+version 0.51.0
+
+    add sosfilt to the filter sub-module
+
+Fri, 12 Jun 2020
+
+version 0.50.2
+
+    fixes compilation error in openmv
+
+Mon, 1 Jun 2020
+
+version 0.50.1
+
+    fixes error in numerical max/min
+
+Mon, 18 May 2020
+
+version 0.50.0
+
+    move interp to the approx sub-module
+
+Wed, 06 May 2020
+
+version 0.46.0
+
+    add curve_fit to the approx sub-module
+
+version 0.44.0
+
+    add approx sub-module with newton, fmin, and bisect functions
+
+Thu, 30 Apr 2020
+
+version 0.44.0
+
+    add approx sub-module with newton, fmin, and bisect functions
+
+Tue, 19 May 2020
+
+version 0.46.1
+
+    fixed bad error in binary_op
+
+Wed, 6 May 2020
+
+version 0.46
+
+    added vectorisation of python functions
+
+Sat, 2 May 2020
+
+version 0.45.0
+
+    add equal/not_equal to the compare module
+
+Tue, 21 Apr 2020
+
+version 0.42.0
+
+    add minimum/maximum/clip functions
+
+Mon, 20 Apr 2020
+
+version 0.41.6
+
+    argument handling improvement in polyfit
+
+Mon, 20 Apr 2020
+
+version 0.41.5
+
+    fix compilation errors due to https://github.com/micropython/micropython/commit/30840ebc9925bb8ef025dbc2d5982b1bfeb75f1b
+
+Sat, 18 Apr 2020
+
+version 0.41.4
+
+    fix compilation error on hardware ports
+
+Tue, 14 Apr 2020
+
+version 0.41.3
+
+    fix indexing error in dot function
+
+Thu, 9 Apr 2020
+
+version 0.41.2
+
+    fix transpose function
+
+Tue, 7 Apr 2020
+
+version 0.41.2
+
+    fix discrepancy in argmin/argmax behaviour
+
+Tue, 7 Apr 2020
+
+version 0.41.1
+
+    fix error in argsort
+
+Sat, 4 Apr 2020
+
+version 0.41.0
+
+    implemented == and != binary operators
+
+Fri, 3 Apr 2020
+
+version 0.40.0
+
+    added trace to linalg
+
+Thu, 2 Apr 2020
+
+version 0.39.0
+
+    added the ** operator, and operand swapping in binary operators
+
+Thu, 2 Apr 2020
+
+version 0.38.1
+
+    added fast option, when initialising from ndarray_properties
+
+Thu, 12 Mar 2020
+
+version 0.38.0
+
+    added initialisation from ndarray, and the around function
+
+Tue, 10 Mar 2020
+
+version 0.37.0
+
+    added Cholesky decomposition to linalg.c
+
+Thu, 27 Feb 2020
+
+version 0.36.0
+
+    moved zeros, ones, eye and linspace into separate module (they are still bound at the top level)
+
+Thu, 27 Feb 2020
+
+version 0.35.0
+
+    Move zeros, ones back into top level ulab module
+
+Tue, 18 Feb 2020
+
+version 0.34.0
+
+    split ulab into multiple modules
+
+Sun, 16 Feb 2020
+
+version 0.33.2
+
+    moved properties into ndarray_properties.h, implemented pointer arithmetic in fft.c to save some time
+
+Fri, 14 Feb 2020
+
+version 0.33.1
+
+    added the __name__attribute to all sub-modules
+
+Thu, 13 Feb 2020
+
+version 0.33.0
+
+    sub-modules are now proper sub-modules of ulab
+
+Mon, 17 Feb 2020
+
+version 0.32.1
+
+    temporary fix for issue #40
+
+Tue, 11 Feb 2020
+
+version 0.32.0
+
+    added itemsize, size and shape attributes to ndarrays, and removed rawsize
+
+Mon, 10 Feb 2020
+
+version 0.31.0
+
+    removed asbytearray, and added buffer protocol to ndarrays, fixed bad error in filter.c
+
+Sun, 09 Feb 2020
+
+version 0.30.2
+
+    fixed slice_length in ndarray.c
+
+Sat, 08 Feb 2020
+
+version 0.30.1
+
+    fixed typecode error, added variable inspection, and replaced ternary operators in filter.c
+
+Fri, 07 Feb 2020
+
+version 0.30.0
+
+    ulab functions can arbitrarily be excluded from the firmware via the ulab.h configuration file
+
+Thu, 06 Feb 2020
+
+version 0.27.0
+
+    add convolve, the start of a 'filter' functionality group
+
+Wed, 29 Jan 2020
+
+version 0.26.7
+
+    fixed indexing error in linalg.dot
+
+Mon, 20 Jan 2020
+
+version 0.26.6
+
+    replaced MP_ROM_PTR(&mp_const_none_obj), so that module can be compiled for the nucleo board
+
+Tue, 7 Jan 2020
+
+version 0.26.5
+
+    fixed glitch in numerical.c, numerical.h
+
+Mon, 6 Jan 2020
+
+version 0.26.4
+
+    switched version constant to string
+
+Tue, 31 Dec 2019
+
+version 0.263
+
+    changed declaration of ulab_ndarray_type to extern
+
+Fri, 29 Nov 2019
+
+version 0.262
+
+    fixed error in macro in vectorise.h
+
+Thu, 28 Nov 2019
+
+version 0.261
+
+    fixed bad indexing error in linalg.dot
+
+Tue, 6 Nov 2019
+
+version 0.26
+
+    added in-place sorting (method of ndarray), and argsort
+
+Mon, 4 Nov 2019
+
+version 0.25
+
+    added first implementation of sort, and fixed section on compiling the module in the manual
+
+Thu, 31 Oct 2019
+
+version 0.24
+
+    added diff to numerical.c
+
+Tue, 29 Oct 2019
+
+version 0.23
+
+    major revamp of subscription method
+
+Sat, 19 Oct 2019
+
+version 0.21
+
+    fixed trivial bug in .rawsize()
+
+Sat, 19 Oct 2019
+
+version 0.22
+
+    fixed small error in linalg_det, and implemented linalg_eig.
+
+
+Thu, 17 Oct 2019
+
+version 0.21
+
+    implemented uniform interface for fft, and spectrum, and added ifft.
+
+Wed, 16 Oct 2019
+
+version 0.20
+
+    Added flip function to numerical.c, and moved the size function to linalg. In addition,
+    size is a function now, and not a method.
+
+Tue, 15 Oct 2019
+
+version 0.19
+
+    fixed roll in numerical.c: it can now accept the axis=None keyword argument, added determinant to linalg.c
+
+Mon, 14 Oct 2019
+
+version 0.18
+
+    fixed min/man function in numerical.c; it conforms to numpy behaviour
+
+Fri, 11 Oct 2019
+
+version 0.171
+
+    found and fixed small bux in roll function
+
+Fri, 11 Oct 2019
+
+version 0.17
+
+    universal function can now take arbitrary typecodes
+
+Fri, 11 Oct 2019
+
+version 0.161
+
+    fixed bad error in iterator, and make_new_ndarray
+
+Thu, 10 Oct 2019
+
+varsion 0.16
+
+    changed ndarray to array in ulab.c, so as to conform to numpy's notation
+    extended subscr method to include slices (partially works)
+
+Tue, 8 Oct 2019
+
+version 0.15
+
+    added inv, neg, pos, and abs unary operators to ndarray.c
+
+Mon, 7 Oct 2019
+
+version 0.14
+
+    made the internal binary_op function tighter, and added keyword arguments to linspace
+
+Sat, 4 Oct 2019
+
+version 0.13
+
+    added the <, <=, >, >= binary operators to ndarray
+
+Fri, 4 Oct 2019
+
+version 0.12
+
+    added .flatten to ndarray, ones, zeros, and eye to linalg
+
+Thu, 3 Oct 2019
+
+version 0.11
+
+    binary operators are now based on macros
diff --git a/tulip/shared/ulab/docs/ulab-compare.ipynb b/tulip/shared/ulab/docs/ulab-compare.ipynb
new file mode 100644
index 000000000..69fa762cb
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-compare.ipynb
@@ -0,0 +1,467 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:02:42.934528Z",
+     "start_time": "2021-01-08T13:02:42.720862Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:02:44.890094Z",
+     "start_time": "2021-01-08T13:02:44.878787Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:06:20.583308Z",
+     "start_time": "2021-01-08T13:06:20.525830Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Comparison of arrays"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## equal, not_equal\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.equal.html\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.not_equal.html\n",
+    "\n",
+    "In `micropython`, equality of arrays or scalars can be established by utilising the `==`, `!=`, `<`, `>`, `<=`, or `=>` binary operators. In `circuitpython`, `==` and `!=` will produce unexpected results. In order to avoid this discrepancy, and to maintain compatibility with `numpy`, `ulab` implements the `equal` and `not_equal` operators that return the same results, irrespective of the `python` implementation.\n",
+    "\n",
+    "These two functions take two `ndarray`s, or scalars as their arguments. No keyword arguments are implemented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T14:22:13.990898Z",
+     "start_time": "2021-01-08T14:22:13.941896Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "b:  array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=float64)\n",
+      "\n",
+      "a == b:  array([True, False, False, False, False, False, False, False, False], dtype=bool)\n",
+      "a != b:  array([False, True, True, True, True, True, True, True, True], dtype=bool)\n",
+      "a == 2:  array([False, False, True, False, False, False, False, False, False], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9))\n",
+    "b = np.zeros(9)\n",
+    "\n",
+    "print('a: ', a)\n",
+    "print('b: ', b)\n",
+    "print('\\na == b: ', np.equal(a, b))\n",
+    "print('a != b: ', np.not_equal(a, b))\n",
+    "\n",
+    "# comparison with scalars\n",
+    "print('a == 2: ', np.equal(a, 2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## minimum\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.minimum.html\n",
+    "\n",
+    "Returns the minimum of two arrays, or two scalars, or an array, and a scalar. If the arrays are of different `dtype`, the output is upcast as in [Binary operators](#Binary-operators). If both inputs are scalars, a scalar is returned. Only positional arguments are implemented.\n",
+    "\n",
+    "## maximum\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.maximum.html\n",
+    "\n",
+    "Returns the maximum of two arrays, or two scalars, or an array, and a scalar. If the arrays are of different `dtype`, the output is upcast as in [Binary operators](#Binary-operators). If both inputs are scalars, a scalar is returned. Only positional arguments are implemented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:21:17.151280Z",
+     "start_time": "2021-01-08T13:21:17.123768Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "minimum of a, and b:\n",
+      "array([1.0, 2.0, 3.0, 2.0, 1.0], dtype=float64)\n",
+      "\n",
+      "maximum of a, and b:\n",
+      "array([5.0, 4.0, 3.0, 4.0, 5.0], dtype=float64)\n",
+      "\n",
+      "maximum of 1, and 5.5:\n",
+      "5.5\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)\n",
+    "b = np.array([5, 4, 3, 2, 1], dtype=np.float)\n",
+    "print('minimum of a, and b:')\n",
+    "print(np.minimum(a, b))\n",
+    "\n",
+    "print('\\nmaximum of a, and b:')\n",
+    "print(np.maximum(a, b))\n",
+    "\n",
+    "print('\\nmaximum of 1, and 5.5:')\n",
+    "print(np.maximum(1, 5.5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## clip\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.clip.html\n",
+    "\n",
+    "Clips an array, i.e., values that are outside of an interval are clipped to the interval edges. The function is equivalent to `maximum(a_min, minimum(a, a_max))` broadcasting takes place exactly as in [minimum](#minimum). If the arrays are of different `dtype`, the output is upcast as in [Binary operators](#Binary-operators)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T13:22:14.147310Z",
+     "start_time": "2021-01-08T13:22:14.123961Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "clipped:\t array([3, 3, 3, 3, 4, 5, 6, 7, 7], dtype=uint8)\n",
+      "\n",
+      "a:\t\t array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "b:\t\t array([3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0], dtype=float64)\n",
+      "clipped:\t array([3.0, 3.0, 3.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "print('a:\\t\\t', a)\n",
+    "print('clipped:\\t', np.clip(a, 3, 7))\n",
+    "\n",
+    "b = 3 * np.ones(len(a), dtype=np.float)\n",
+    "print('\\na:\\t\\t', a)\n",
+    "print('b:\\t\\t', b)\n",
+    "print('clipped:\\t', np.clip(a, b, 7))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-convert.ipynb b/tulip/shared/ulab/docs/ulab-convert.ipynb
new file mode 100644
index 000000000..4ce30e5a3
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-convert.ipynb
@@ -0,0 +1,544 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-01T09:27:13.438054Z",
+     "start_time": "2020-05-01T09:27:13.191491Z"
+    }
+   },
+   "source": [
+    "# conf.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:27:15.118699Z",
+     "start_time": "2022-02-09T06:27:15.100980Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting manual/source/conf.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile manual/source/conf.py\n",
+    "# Configuration file for the Sphinx documentation builder.\n",
+    "#\n",
+    "# This file only contains a selection of the most common options. For a full\n",
+    "# list see the documentation:\n",
+    "# http://www.sphinx-doc.org/en/master/config\n",
+    "\n",
+    "# -- Path setup --------------------------------------------------------------\n",
+    "\n",
+    "# If extensions (or modules to document with autodoc) are in another directory,\n",
+    "# add these directories to sys.path here. If the directory is relative to the\n",
+    "# documentation root, use os.path.abspath to make it absolute, like shown here.\n",
+    "#\n",
+    "import os\n",
+    "# import sys\n",
+    "# sys.path.insert(0, os.path.abspath('.'))\n",
+    "\n",
+    "#import sphinx_rtd_theme\n",
+    "\n",
+    "from sphinx.transforms import SphinxTransform\n",
+    "from docutils import nodes\n",
+    "from sphinx import addnodes\n",
+    "\n",
+    "# -- Project information -----------------------------------------------------\n",
+    "\n",
+    "project = 'The ulab book'\n",
+    "copyright = '2019-2024, Zoltán Vörös and contributors'\n",
+    "author = 'Zoltán Vörös'\n",
+    "\n",
+    "# The full version, including alpha/beta/rc tags\n",
+    "release = '6.6.0'\n",
+    "\n",
+    "\n",
+    "# -- General configuration ---------------------------------------------------\n",
+    "\n",
+    "# Add any Sphinx extension module names here, as strings. They can be\n",
+    "# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n",
+    "# ones.\n",
+    "extensions = [\n",
+    "]\n",
+    "\n",
+    "# Add any paths that contain templates here, relative to this directory.\n",
+    "templates_path = ['_templates']\n",
+    "\n",
+    "# List of patterns, relative to source directory, that match files and\n",
+    "# directories to ignore when looking for source files.\n",
+    "# This pattern also affects html_static_path and html_extra_path.\n",
+    "exclude_patterns = []\n",
+    "\n",
+    "\n",
+    "# Add any paths that contain custom static files (such as style sheets) here,\n",
+    "# relative to this directory. They are copied after the builtin static files,\n",
+    "# so a file named \"default.css\" will overwrite the builtin \"default.css\".\n",
+    "html_static_path = ['_static']\n",
+    "\n",
+    "latex_maketitle = r'''\n",
+    "\\begin{titlepage}\n",
+    "\\begin{flushright}\n",
+    "\\Huge\\textbf{The $\\mu$lab book}\n",
+    "\\vskip 0.5em\n",
+    "\\LARGE\n",
+    "\\textbf{Release %s}\n",
+    "\\vskip 5em\n",
+    "\\huge\\textbf{Zoltán Vörös}\n",
+    "\\end{flushright}\n",
+    "\\begin{flushright}\n",
+    "\\LARGE\n",
+    "\\vskip 2em\n",
+    "with contributions by\n",
+    "\\vskip 2em\n",
+    "\\textbf{Roberto Colistete Jr.}\n",
+    "\\vskip 0.2em\n",
+    "\\textbf{Jeff Epler}\n",
+    "\\vskip 0.2em\n",
+    "\\textbf{Taku Fukada}\n",
+    "\\vskip 0.2em\n",
+    "\\textbf{Diego Elio Pettenò}\n",
+    "\\vskip 0.2em\n",
+    "\\textbf{Scott Shawcroft}\n",
+    "\\vskip 5em\n",
+    "\\today\n",
+    "\\end{flushright}\n",
+    "\\end{titlepage}\n",
+    "'''%release\n",
+    "\n",
+    "latex_elements = {\n",
+    "    'maketitle': latex_maketitle\n",
+    "}\n",
+    "\n",
+    "\n",
+    "master_doc = 'index'\n",
+    "\n",
+    "author=u'Zoltán Vörös'\n",
+    "copyright=author\n",
+    "language='en'\n",
+    "\n",
+    "latex_documents = [\n",
+    "(master_doc, 'the-ulab-book.tex', 'The $\\mu$lab book',\n",
+    "'Zoltán Vörös', 'manual'),\n",
+    "]\n",
+    "\n",
+    "# Read the docs theme\n",
+    "on_rtd = os.environ.get('READTHEDOCS', None) == 'True'\n",
+    "if not on_rtd:\n",
+    "    try:\n",
+    "        import sphinx_rtd_theme\n",
+    "        html_theme = 'sphinx_rtd_theme'\n",
+    "        html_theme_path = [sphinx_rtd_theme.get_html_theme_path(), '.']\n",
+    "    except ImportError:\n",
+    "        html_theme = 'default'\n",
+    "        html_theme_path = ['.']\n",
+    "else:\n",
+    "    html_theme_path = ['.']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-05-09T06:06:28.491158Z",
+     "start_time": "2021-05-09T06:06:28.477127Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting manual/source/index.rst\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile manual/source/index.rst\n",
+    "\n",
+    ".. ulab-manual documentation master file, created by\n",
+    "   sphinx-quickstart on Sat Oct 19 12:48:00 2019.\n",
+    "   You can adapt this file completely to your liking, but it should at least\n",
+    "   contain the root `toctree` directive.\n",
+    "\n",
+    "Welcome to the ulab book!\n",
+    "=======================================\n",
+    "\n",
+    ".. toctree::\n",
+    "   :maxdepth: 2\n",
+    "   :caption: Introduction\n",
+    "\n",
+    "   ulab-intro\n",
+    "\n",
+    ".. toctree::\n",
+    "   :maxdepth: 2\n",
+    "   :caption: User's guide:\n",
+    "\n",
+    "   ulab-ndarray\n",
+    "   numpy-functions\n",
+    "   numpy-universal\n",
+    "   numpy-fft\n",
+    "   numpy-linalg\n",
+    "   numpy-random\n",
+    "   scipy-linalg\n",
+    "   scipy-optimize\n",
+    "   scipy-signal\n",
+    "   scipy-special\n",
+    "   ulab-utils\n",
+    "   ulab-tricks\n",
+    "   ulab-programming\n",
+    "\n",
+    "Indices and tables\n",
+    "==================\n",
+    "\n",
+    "* :ref:`genindex`\n",
+    "* :ref:`modindex`\n",
+    "* :ref:`search`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Notebook conversion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:27:21.647179Z",
+     "start_time": "2022-02-09T06:27:20.019520Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import nbformat as nb\n",
+    "import nbformat.v4.nbbase as nb4\n",
+    "from nbconvert import RSTExporter\n",
+    "\n",
+    "from jinja2 import FileSystemLoader\n",
+    "rstexporter = RSTExporter(\n",
+    "    extra_loaders=[FileSystemLoader('./templates')],\n",
+    "    template_file = './templates/manual.tpl'\n",
+    ")\n",
+    "\n",
+    "def convert_notebook(fn):\n",
+    "    source = nb.read(fn+'.ipynb', nb.NO_CONVERT)\n",
+    "    notebook = nb4.new_notebook()\n",
+    "    notebook.cells = []\n",
+    "    append_cell = False\n",
+    "    for cell in source['cells']:\n",
+    "        if append_cell:\n",
+    "            notebook.cells.append(cell)\n",
+    "        else:\n",
+    "            if cell.cell_type == 'markdown':\n",
+    "                if cell.source == '__END_OF_DEFS__':\n",
+    "                    append_cell = True\n",
+    "                    \n",
+    "    (rst, resources) = rstexporter.from_notebook_node(notebook)\n",
+    "    with open('./manual/source/' + fn + '.rst', 'w') as fout:\n",
+    "        # it's a bit odd, but even an emtpy notebook is converted into a \"None\" string\n",
+    "        rst = rst.lstrip('None')\n",
+    "        fout.write(rst)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:27:42.024028Z",
+     "start_time": "2022-02-09T06:27:36.109093Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n",
+      "/home/v923z/anaconda3/lib/python3.11/site-packages/nbconvert/exporters/exporter.py:349: MissingIDFieldWarning: Code cell is missing an id field, this will become a hard error in future nbformat versions. You may want to use `normalize()` on your notebooks before validations (available since nbformat 5.1.4). Previous versions of nbformat are fixing this issue transparently, and will stop doing so in the future.\n",
+      "  _, nbc = validator.normalize(nbc)\n"
+     ]
+    }
+   ],
+   "source": [
+    "files = ['ulab-intro',\n",
+    "         'ulab-ndarray',\n",
+    "         'numpy-functions', \n",
+    "         'numpy-universal',\n",
+    "         'numpy-fft',\n",
+    "         'numpy-linalg',\n",
+    "        'numpy-random',\n",
+    "         'scipy-linalg',\n",
+    "         'scipy-optimize',\n",
+    "         'scipy-signal',\n",
+    "         'scipy-special',\n",
+    "         'ulab-utils',\n",
+    "         'ulab-tricks',\n",
+    "         'ulab-programming']\n",
+    "\n",
+    "for file in files:\n",
+    "    convert_notebook(file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Template"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-30T19:04:50.295563Z",
+     "start_time": "2020-10-30T19:04:50.227535Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting ./templates/manual.tpl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile ./templates/manual.tpl\n",
+    "\n",
+    "{%- extends 'display_priority.tpl' -%}\n",
+    "\n",
+    "\n",
+    "{% block in_prompt %}\n",
+    "{% endblock in_prompt %}\n",
+    "\n",
+    "{% block output_prompt %}\n",
+    "{% endblock output_prompt %}\n",
+    "\n",
+    "{% block input scoped%}\n",
+    "\n",
+    "{%- if cell.source.split('\\n')[0].startswith('%%micropython') -%}\n",
+    ".. code::\n",
+    "        \n",
+    "{{ '\\n'.join(['# code to be run in micropython'] + cell.source.strip().split('\\n')[1:]) | indent}}\n",
+    "\n",
+    "{%- else -%}\n",
+    ".. code::\n",
+    "\n",
+    "{{ '\\n'.join(['# code to be run in CPython\\n'] + cell.source.strip().split('\\n')) | indent}}\n",
+    "{%- endif -%}\n",
+    "{% endblock input %}\n",
+    "\n",
+    "{% block error %}\n",
+    "::\n",
+    "\n",
+    "{{ super() }}\n",
+    "{% endblock error %}\n",
+    "\n",
+    "{% block traceback_line %}\n",
+    "{{ line | indent | strip_ansi }}\n",
+    "{% endblock traceback_line %}\n",
+    "\n",
+    "{% block execute_result %}\n",
+    "{% block data_priority scoped %}\n",
+    "{{ super() }}\n",
+    "{% endblock %}\n",
+    "{% endblock execute_result %}\n",
+    "\n",
+    "{% block stream %}\n",
+    ".. parsed-literal::\n",
+    "\n",
+    "{{ output.text | indent }}\n",
+    "{% endblock stream %}\n",
+    "\n",
+    "{% block data_svg %}\n",
+    ".. image:: {{ output.metadata.filenames['image/svg+xml'] | urlencode }}\n",
+    "{% endblock data_svg %}\n",
+    "\n",
+    "{% block data_png %}\n",
+    ".. image:: {{ output.metadata.filenames['image/png'] | urlencode }}\n",
+    "{%- set width=output | get_metadata('width', 'image/png') -%}\n",
+    "{%- if width is not none %}\n",
+    "   :width: {{ width }}px\n",
+    "{%- endif %}\n",
+    "{%- set height=output | get_metadata('height', 'image/png') -%}\n",
+    "{%- if height is not none %}\n",
+    "   :height: {{ height }}px\n",
+    "{%- endif %}\n",
+    "{% endblock data_png %}\n",
+    "\n",
+    "{% block data_jpg %}\n",
+    ".. image:: {{ output.metadata.filenames['image/jpeg'] | urlencode }}\n",
+    "{%- set width=output | get_metadata('width', 'image/jpeg') -%}\n",
+    "{%- if width is not none %}\n",
+    "   :width: {{ width }}px\n",
+    "{%- endif %}\n",
+    "{%- set height=output | get_metadata('height', 'image/jpeg') -%}\n",
+    "{%- if height is not none %}\n",
+    "   :height: {{ height }}px\n",
+    "{%- endif %}\n",
+    "{% endblock data_jpg %}\n",
+    "\n",
+    "{% block data_markdown %}\n",
+    "{{ output.data['text/markdown'] | convert_pandoc(\"markdown\", \"rst\") }}\n",
+    "{% endblock data_markdown %}\n",
+    "\n",
+    "{% block data_latex %}\n",
+    ".. math::\n",
+    "\n",
+    "{{ output.data['text/latex'] | strip_dollars | indent }}\n",
+    "{% endblock data_latex %}\n",
+    "\n",
+    "{% block data_text scoped %}\n",
+    ".. parsed-literal::\n",
+    "\n",
+    "{{ output.data['text/plain'] | indent }}\n",
+    "{% endblock data_text %}\n",
+    "\n",
+    "{% block data_html scoped %}\n",
+    ".. raw:: html\n",
+    "\n",
+    "{{ output.data['text/html'] | indent }}\n",
+    "{% endblock data_html %}\n",
+    "\n",
+    "{% block markdowncell scoped %}\n",
+    "{{ cell.source | convert_pandoc(\"markdown\", \"rst\") }}\n",
+    "{% endblock markdowncell %}\n",
+    "\n",
+    "{%- block rawcell scoped -%}\n",
+    "{%- if cell.metadata.get('raw_mimetype', '').lower() in resources.get('raw_mimetypes', ['']) %}\n",
+    "{{cell.source}}\n",
+    "{% endif -%}\n",
+    "{%- endblock rawcell -%}\n",
+    "\n",
+    "{% block headingcell scoped %}\n",
+    "{{ (\"#\" * cell.level + cell.source) | replace('\\n', ' ') | convert_pandoc(\"markdown\", \"rst\") }}\n",
+    "{% endblock headingcell %}\n",
+    "\n",
+    "{% block unknowncell scoped %}\n",
+    "unknown type  {{cell.type}}\n",
+    "{% endblock unknowncell %}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.5 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9e4ec6f642f986afcc9e252c165e44859a62defc5c697cae6f82c2943465ec10"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-intro.ipynb b/tulip/shared/ulab/docs/ulab-intro.ipynb
new file mode 100644
index 000000000..30266418b
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-intro.ipynb
@@ -0,0 +1,890 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:07:55.382930Z",
+     "start_time": "2021-01-08T12:07:46.895325Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:13:14.590799Z",
+     "start_time": "2022-01-07T18:13:14.585845Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:20:56.550047Z",
+     "start_time": "2022-01-07T18:20:56.527475Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Enter ulab\n",
+    "\n",
+    "`ulab` is a `numpy`-like module for `micropython` and its derivatives, meant to simplify and speed up common mathematical operations on arrays. `ulab` implements a small subset of `numpy` and `scipy`, as well as a number of functions manipulating byte arrays. The functions were chosen such that they might be useful in the context of a microcontroller. However, the project is a living one, and suggestions for new features are always welcome. \n",
+    "\n",
+    "This document discusses how you can use the library, starting from building your own firmware, through questions like what affects the firmware size, what are the trade-offs, and what are the most important differences to `numpy` and `scipy`, respectively. The document is organised as follows:\n",
+    "\n",
+    "The chapter after this one helps you with firmware customisation.\n",
+    "\n",
+    "The third chapter gives a very concise summary of the `ulab` functions and array methods. This chapter can be used as a quick reference.\n",
+    "\n",
+    "The chapters after that are an in-depth review of most functions. Here you can find usage examples, benchmarks, as well as a thorough discussion of such concepts as broadcasting, and views versus copies. \n",
+    "\n",
+    "The final chapter of this book can be regarded as the programming manual. The inner working of `ulab` is dissected here, and you will also find hints as to how to implement your own `numpy`-compatible functions.\n",
+    "\n",
+    "\n",
+    "## Purpose\n",
+    "\n",
+    "Of course, the first question that one has to answer is, why on Earth one would need a fast math library on a microcontroller. After all, it is not expected that heavy number crunching is going to take place on bare metal. It is not meant to. On a PC, the main reason for writing fast code is the sheer amount of data that one wants to process. On a microcontroller, the data volume is probably small, but it might lead to catastrophic system failure, if these data are not processed in time, because the microcontroller is supposed to interact with the outside world in a timely fashion. In fact, this latter objective was the initiator of this project: I needed the Fourier transform of a signal coming from the ADC of the `pyboard`, and all available options were simply too slow. \n",
+    "\n",
+    "In addition to speed, another issue that one has to keep in mind when working with embedded systems is the amount of available RAM: I believe, everything here could be implemented in pure `python` with relatively little effort (in fact, there are a couple of `python`-only implementations of `numpy` functions out there), but the price we would have to pay for that is not only speed, but RAM, too. `python` code, if is not frozen, and compiled into the firmware, has to be compiled at runtime, which is not exactly a cheap process. On top of that, if numbers are stored in a list or tuple, which would be the high-level container, then they occupy 8 bytes, no matter, whether they are all smaller than 100, or larger than one hundred million. This is obviously a waste of resources in an environment, where resources are scarce. \n",
+    "\n",
+    "Finally, there is a reason for using `micropython` in the first place. Namely, that a microcontroller can be programmed in a very elegant, and *pythonic* way. But if it is so, why should we not extend this idea to other tasks and concepts that might come up in this context? If there was no other reason than this *elegance*, I would find that convincing enough.\n",
+    "\n",
+    "Based on the above-mentioned considerations, all functions in `ulab` are implemented in a way that \n",
+    "\n",
+    "1. conforms to `numpy` as much as possible\n",
+    "2. is so frugal with RAM as possible,\n",
+    "3. and yet, fast. Much faster than pure python. Think of speed-ups of 30-50!\n",
+    "\n",
+    "The main points of `ulab` are \n",
+    "\n",
+    "- compact, iterable and slicable containers of numerical data in one to four dimensions. These containers support all the relevant unary and binary operators (e.g., `len`, ==, +, *, etc.)\n",
+    "- vectorised computations on `micropython` iterables and numerical arrays (in `numpy`-speak, universal functions)\n",
+    "- computing statistical properties (mean, standard deviation etc.) on arrays\n",
+    "- basic linear algebra routines (matrix inversion, multiplication, reshaping, transposition, determinant, and eigenvalues, Cholesky decomposition and so on)\n",
+    "- polynomial fits to numerical data, and evaluation of polynomials\n",
+    "- fast Fourier transforms\n",
+    "- filtering of data (convolution and second-order filters)\n",
+    "- function minimisation, fitting, and numerical approximation routines\n",
+    "- interfacing between numerical data and peripheral hardware devices\n",
+    "\n",
+    "`ulab` implements close to a hundred functions and array methods. At the time of writing this manual (for version 4.0.0), the library adds approximately 120 kB of extra compiled code to the `micropython` (pyboard.v.1.17) firmware. However, if you are tight with flash space, you can easily shave tens of kB off the firmware. In fact, if only a small sub-set of functions are needed, you can get away with less than 10 kB of flash space. See the section on [customising ulab](#Customising-the-firmware).\n",
+    "\n",
+    "## Resources and legal matters\n",
+    "\n",
+    "The source code of the module can be found under https://github.com/v923z/micropython-ulab/tree/master/code. while the source of this user manual is under https://github.com/v923z/micropython-ulab/tree/master/docs.\n",
+    "\n",
+    "The MIT licence applies to all material. \n",
+    "\n",
+    "## Friendly request\n",
+    "\n",
+    "If you use `ulab`, and bump into a bug, or think that a particular function is missing, or its behaviour does not conform to `numpy`, please, raise a [ulab issue](#https://github.com/v923z/micropython-ulab/issues) on github, so that the community can profit from your experiences. \n",
+    "\n",
+    "Even better, if you find the project to be useful, and think that it could be made better, faster, tighter, and shinier, please, consider contributing, and issue a pull request with the implementation of your improvements and new features. `ulab` can only become successful, if it offers what the community needs.\n",
+    "\n",
+    "These last comments apply to the documentation, too. If, in your opinion, the documentation is obscure, misleading, or not detailed enough, please, let us know, so that *we* can fix it.\n",
+    "\n",
+    "## Differences between micropython-ulab and circuitpython-ulab\n",
+    "\n",
+    "`ulab` has originally been developed for `micropython`, but has since been integrated into a number of its flavours. Most of these are simply forks of `micropython` itself, with some additional functionality. One of the notable exceptions is `circuitpython`, which has slightly diverged at the core level, and this has some minor consequences. Some of these concern the C implementation details only, which all have been sorted out with the generous and enthusiastic support of Jeff Epler from [Adafruit Industries](http://www.adafruit.com).\n",
+    "\n",
+    "There are, however, a couple of instances, where the two environments differ at the python level in how the class properties can be accessed. We will point out the differences and possible workarounds at the relevant places in this document."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Customising the firmware\n",
+    "\n",
+    "\n",
+    "As mentioned above, `ulab` has considerably grown since its conception, which also means that it might no longer fit on the microcontroller of your choice. There are, however, a couple of ways of customising the firmware, and thereby reducing its size. \n",
+    "\n",
+    "All `ulab` options are listed in a single header file, [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h), which contains pre-processor flags for each feature that can be fine-tuned. The first couple of lines of the file look like this\n",
+    "\n",
+    "```c\n",
+    "// The pre-processor constants in this file determine how ulab behaves:\n",
+    "//\n",
+    "// - how many dimensions ulab can handle\n",
+    "// - which functions are included in the compiled firmware\n",
+    "// - whether the python syntax is numpy-like, or modular\n",
+    "// - whether arrays can be sliced and iterated over\n",
+    "// - which binary/unary operators are supported\n",
+    "//\n",
+    "// A considerable amount of flash space can be saved by removing (setting\n",
+    "// the corresponding constants to 0) the unnecessary functions and features.\n",
+    "\n",
+    "// Values defined here can be overridden by your own config file as\n",
+    "// make -DULAB_CONFIG_FILE=\"my_ulab_config.h\"\n",
+    "#if defined(ULAB_CONFIG_FILE)\n",
+    "#include ULAB_CONFIG_FILE\n",
+    "#endif\n",
+    "\n",
+    "// Adds support for complex ndarrays\n",
+    "#ifndef ULAB_SUPPORTS_COMPLEX\n",
+    "#define ULAB_SUPPORTS_COMPLEX               (1)\n",
+    "#endif\n",
+    "\n",
+    "// Determines, whether scipy is defined in ulab. The sub-modules and functions\n",
+    "// of scipy have to be defined separately\n",
+    "#define ULAB_HAS_SCIPY                      (1)\n",
+    "\n",
+    "// The maximum number of dimensions the firmware should be able to support\n",
+    "// Possible values lie between 1, and 4, inclusive\n",
+    "#define ULAB_MAX_DIMS                       2\n",
+    "\n",
+    "// By setting this constant to 1, iteration over array dimensions will be implemented\n",
+    "// as a function (ndarray_rewind_array), instead of writing out the loops in macros\n",
+    "// This reduces firmware size at the expense of speed\n",
+    "#define ULAB_HAS_FUNCTION_ITERATOR          (0)\n",
+    "\n",
+    "// If NDARRAY_IS_ITERABLE is 1, the ndarray object defines its own iterator function\n",
+    "// This option saves approx. 250 bytes of flash space\n",
+    "#define NDARRAY_IS_ITERABLE                 (1)\n",
+    "\n",
+    "// Slicing can be switched off by setting this variable to 0\n",
+    "#define NDARRAY_IS_SLICEABLE                (1)\n",
+    "\n",
+    "// The default threshold for pretty printing. These variables can be overwritten\n",
+    "// at run-time via the set_printoptions() function\n",
+    "#define ULAB_HAS_PRINTOPTIONS               (1)\n",
+    "#define NDARRAY_PRINT_THRESHOLD             10\n",
+    "#define NDARRAY_PRINT_EDGEITEMS             3\n",
+    "\n",
+    "// determines, whether the dtype is an object, or simply a character\n",
+    "// the object implementation is numpythonic, but requires more space\n",
+    "#define ULAB_HAS_DTYPE_OBJECT               (0)\n",
+    "\n",
+    "// the ndarray binary operators\n",
+    "#define NDARRAY_HAS_BINARY_OPS              (1)\n",
+    "\n",
+    "// Firmware size can be reduced at the expense of speed by using function\n",
+    "// pointers in iterations. For each operator, he function pointer saves around\n",
+    "// 2 kB in the two-dimensional case, and around 4 kB in the four-dimensional case.\n",
+    "\n",
+    "#define NDARRAY_BINARY_USES_FUN_POINTER     (0)\n",
+    "\n",
+    "#define NDARRAY_HAS_BINARY_OP_ADD           (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_EQUAL         (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_LESS          (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_LESS_EQUAL    (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_MORE          (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_MORE_EQUAL    (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_MULTIPLY      (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_NOT_EQUAL     (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_POWER         (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_SUBTRACT      (1)\n",
+    "#define NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE   (1)\n",
+    "...     \n",
+    "```\n",
+    "\n",
+    "The meaning of flags with names `_HAS_` should be obvious, so we will just explain the other options. \n",
+    "\n",
+    "To see how much you can gain by un-setting the functions that you do not need, here are some pointers. In four dimensions, including all functions adds around 120 kB to the `micropython` firmware. On the other hand, if you are interested in Fourier transforms only, and strip everything else, you get away with less than 5 kB extra. \n",
+    "\n",
+    "## Compatibility with numpy\n",
+    "\n",
+    "The functions implemented in `ulab` are organised in four sub-modules at the C level, namely, `numpy`, `scipy`, `utils`, and `user`. This modularity is elevated to `python`, meaning that in order to use functions that are part of `numpy`, you have to import `numpy` as\n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "x = np.array([4, 5, 6])\n",
+    "p = np.array([1, 2, 3])\n",
+    "np.polyval(p, x)\n",
+    "```\n",
+    "\n",
+    "There are a couple of exceptions to this rule, namely `fft`, `linalg`, and `random`, which are sub-modules even in `numpy`, thus you have to write them out as \n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "A = np.array([1, 2, 3, 4]).reshape()\n",
+    "np.linalg.trace(A)\n",
+    "```\n",
+    "\n",
+    "Some of the functions in `ulab` are re-implementations of `scipy` functions, and they are to be imported as \n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "\n",
+    "x = np.array([1, 2, 3])\n",
+    "spy.special.erf(x)\n",
+    "```\n",
+    "\n",
+    "`numpy`-compatibility has an enormous benefit : namely, by `try`ing to `import`, we can guarantee that the same, unmodified code runs in `CPython`, as in `micropython`. The following snippet is platform-independent, thus, the `python` code can be tested and debugged on a computer before loading it onto the microcontroller.\n",
+    "\n",
+    "```python\n",
+    "\n",
+    "try:\n",
+    "    from ulab import numpy as np\n",
+    "    from ulab import scipy as spy\n",
+    "except ImportError:\n",
+    "    import numpy as np\n",
+    "    import scipy as spy\n",
+    "    \n",
+    "x = np.array([1, 2, 3])\n",
+    "spy.special.erf(x)    \n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The impact of dimensionality\n",
+    "\n",
+    "### Reducing the number of dimensions\n",
+    "\n",
+    "`ulab` supports tensors of rank four, but this is expensive in terms of flash: with all available functions and options, the library adds around 100 kB to the firmware. However, if such high dimensions are not required, significant reductions in size can be gotten by changing the value of \n",
+    "\n",
+    "```c\n",
+    "#define ULAB_MAX_DIMS                   2\n",
+    "```\n",
+    "\n",
+    "Two dimensions cost a bit more than half of four, while you can get away with around 20 kB of flash in one dimension, because all those functions that don't make sense (e.g., matrix inversion, eigenvalues etc.) are automatically stripped from the firmware.\n",
+    "\n",
+    "### Using the function iterator\n",
+    "\n",
+    "In higher dimensions, the firmware size increases, because each dimension (axis) adds another level of nested loops. An example of this is the macro of the binary operator in three dimensions\n",
+    "\n",
+    "```c\n",
+    "#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\n",
+    "    type_out *array = (type_out *)results->array;\n",
+    "    size_t j = 0;\n",
+    "    do {\n",
+    "        size_t k = 0;\n",
+    "        do {\n",
+    "            size_t l = 0;\n",
+    "            do {\n",
+    "                *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\n",
+    "                (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\n",
+    "                (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\n",
+    "                l++;\n",
+    "            } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\n",
+    "            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\n",
+    "            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\n",
+    "            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS-1];\n",
+    "            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\n",
+    "            k++;\n",
+    "        } while(k < (results)->shape[ULAB_MAX_DIMS - 2]);\n",
+    "        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];\n",
+    "        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\n",
+    "        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * results->shape[ULAB_MAX_DIMS-2];\n",
+    "        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\n",
+    "        j++;\n",
+    "    } while(j < (results)->shape[ULAB_MAX_DIMS - 3]);\n",
+    "```\n",
+    "\n",
+    "In order to reduce firmware size, it *might* make sense in higher dimensions to make use of the function iterator by setting the \n",
+    "\n",
+    "```c\n",
+    "#define ULAB_HAS_FUNCTION_ITERATOR      (1)\n",
+    "```\n",
+    "\n",
+    "constant to 1. This allows the compiler to call the `ndarray_rewind_array` function, so that it doesn't have to unwrap the loops for `k`, and `j`. Instead of the macro above, we now have \n",
+    "\n",
+    "```c\n",
+    "#define BINARY_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides, OPERATOR)\n",
+    "    type_out *array = (type_out *)(results)->array;\n",
+    "    size_t *lcoords = ndarray_new_coords((results)->ndim);\n",
+    "    size_t *rcoords = ndarray_new_coords((results)->ndim);\n",
+    "    for(size_t i=0; i < (results)->len/(results)->shape[ULAB_MAX_DIMS -1]; i++) {\n",
+    "        size_t l = 0;\n",
+    "        do {\n",
+    "            *array++ = *((type_left *)(larray)) OPERATOR *((type_right *)(rarray));\n",
+    "            (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\n",
+    "            (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\n",
+    "            l++;\n",
+    "        } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\n",
+    "        ndarray_rewind_array((results)->ndim, larray, (results)->shape, lstrides, lcoords);\n",
+    "        ndarray_rewind_array((results)->ndim, rarray, (results)->shape, rstrides, rcoords);\n",
+    "    } while(0)\n",
+    "```\n",
+    "\n",
+    "Since the `ndarray_rewind_array` function is implemented only once, a lot of space can be saved. Obviously,  function calls cost time, thus such trade-offs must be evaluated for each application. The gain also depends on which functions and features you include. Operators and functions that involve two arrays are expensive, because at the C level, the number of cases that must be handled scales with the squares of the number of data types. As an example, the innocent-looking expression\n",
+    "\n",
+    "```python\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3])\n",
+    "b = np.array([4, 5, 6])\n",
+    "\n",
+    "c = a + b\n",
+    "```\n",
+    "requires 25 loops in C, because the `dtypes` of both `a`, and `b` can assume 5 different values, and the addition has to be resolved for all possible cases. Hint: each binary operator costs between 3 and 4 kB in two dimensions."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The ulab version string\n",
+    "\n",
+    "As is customary with `python` packages, information on the package version can be found be querying the `__version__` string. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T06:25:27.328061Z",
+     "start_time": "2021-01-12T06:25:27.308199Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "you are running ulab version 2.1.0-2D\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab\n",
+    "\n",
+    "print('you are running ulab version', ulab.__version__)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The first three numbers indicate the major, minor, and sub-minor versions of `ulab` (defined by the `ULAB_VERSION` constant in [ulab.c](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.c)). We usually change the minor version, whenever a new function is added to the code, and the sub-minor version will be incremented, if a bug fix is implemented. \n",
+    "\n",
+    "`2D` tells us that the particular firmware supports tensors of rank 2 (defined by `ULAB_MAX_DIMS` in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h)). \n",
+    "\n",
+    "If you find a bug, please, include the version string in your report!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Should you need the numerical value of `ULAB_MAX_DIMS`, you can get it from the version string in the following way:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:00:00.616473Z",
+     "start_time": "2021-01-13T06:00:00.602787Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "version string:  2.1.0-2D\n",
+      "version dimensions:  2D\n",
+      "numerical value of dimensions:  2\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab\n",
+    "\n",
+    "version = ulab.__version__\n",
+    "version_dims = version.split('-')[1]\n",
+    "version_num = int(version_dims.replace('D', ''))\n",
+    "\n",
+    "print('version string: ', version)\n",
+    "print('version dimensions: ', version_dims)\n",
+    "print('numerical value of dimensions: ', version_num)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ulab with complex arrays\n",
+    "\n",
+    "If the firmware supports complex arrays, `-c` is appended to the version string as can be seen below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T18:21:04.079894Z",
+     "start_time": "2022-01-07T18:21:04.058855Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "version string:  4.0.0-2D-c\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab\n",
+    "\n",
+    "version = ulab.__version__\n",
+    "\n",
+    "print('version string: ', version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Finding out what your firmware supports\n",
+    "\n",
+    "`ulab` implements a number of array operators and functions, but this does not mean that all of these functions and methods are actually compiled into the firmware. You can fine-tune your firmware by setting/unsetting any of the `_HAS_` constants in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h). \n",
+    "\n",
+    "### Functions included  in the firmware\n",
+    "\n",
+    "The version string will not tell you everything about your firmware, because the supported functions and sub-modules can still arbitrarily be included or excluded. One way of finding out what is compiled into the firmware is calling `dir` with `ulab` as its argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:47:37.963507Z",
+     "start_time": "2021-01-08T12:47:37.936641Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "===== constants, functions, and modules of numpy =====\n",
+      "\n",
+      " ['__class__', '__name__', 'bool', 'sort', 'sum', 'acos', 'acosh', 'arange', 'arctan2', 'argmax', 'argmin', 'argsort', 'around', 'array', 'asin', 'asinh', 'atan', 'atanh', 'ceil', 'clip', 'concatenate', 'convolve', 'cos', 'cosh', 'cross', 'degrees', 'diag', 'diff', 'e', 'equal', 'exp', 'expm1', 'eye', 'fft', 'flip', 'float', 'floor', 'frombuffer', 'full', 'get_printoptions', 'inf', 'int16', 'int8', 'interp', 'linalg', 'linspace', 'log', 'log10', 'log2', 'logspace', 'max', 'maximum', 'mean', 'median', 'min', 'minimum', 'nan', 'ndinfo', 'not_equal', 'ones', 'pi', 'polyfit', 'polyval', 'radians', 'roll', 'set_printoptions', 'sin', 'sinh', 'sqrt', 'std', 'tan', 'tanh', 'trapz', 'uint16', 'uint8', 'vectorize', 'zeros']\n",
+      "\n",
+      "functions included in the fft module:\n",
+      " ['__class__', '__name__', 'fft', 'ifft']\n",
+      "\n",
+      "functions included in the linalg module:\n",
+      " ['__class__', '__name__', 'cholesky', 'det', 'dot', 'eig', 'inv', 'norm', 'trace']\n",
+      "\n",
+      "\n",
+      "===== modules of scipy =====\n",
+      "\n",
+      " ['__class__', '__name__', 'optimize', 'signal', 'special']\n",
+      "\n",
+      "functions included in the optimize module:\n",
+      " ['__class__', '__name__', 'bisect', 'fmin', 'newton']\n",
+      "\n",
+      "functions included in the signal module:\n",
+      " ['__class__', '__name__', 'sosfilt', 'spectrogram']\n",
+      "\n",
+      "functions included in the special module:\n",
+      " ['__class__', '__name__', 'erf', 'erfc', 'gamma', 'gammaln']\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import scipy as spy\n",
+    "\n",
+    "\n",
+    "print('===== constants, functions, and modules of numpy =====\\n\\n', dir(np))\n",
+    "\n",
+    "# since fft and linalg are sub-modules, print them separately\n",
+    "print('\\nfunctions included in the fft module:\\n', dir(np.fft))\n",
+    "print('\\nfunctions included in the linalg module:\\n', dir(np.linalg))\n",
+    "\n",
+    "print('\\n\\n===== modules of scipy =====\\n\\n', dir(spy))\n",
+    "print('\\nfunctions included in the optimize module:\\n', dir(spy.optimize))\n",
+    "print('\\nfunctions included in the signal module:\\n', dir(spy.signal))\n",
+    "print('\\nfunctions included in the special module:\\n', dir(spy.special))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Methods included in the firmware\n",
+    "\n",
+    "The `dir` function applied to the module or its sub-modules gives information on what the module and sub-modules include, but is not enough to find out which methods the `ndarray` class supports. We can list the methods by calling `dir` with the `array` object itself:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:48:17.927709Z",
+     "start_time": "2021-01-08T12:48:17.903132Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['__class__', '__name__', 'copy', 'sort', '__bases__', '__dict__', 'dtype', 'flatten', 'itemsize', 'reshape', 'shape', 'size', 'strides', 'tobytes', 'transpose']\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print(dir(np.array))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Operators included in the firmware\n",
+    "\n",
+    "A list of operators cannot be generated as shown above. If you really need to find out, whether, e.g., the `**` operator is supported by the firmware, you have to `try` it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-08T12:49:59.902054Z",
+     "start_time": "2021-01-08T12:49:59.875760Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "operator is not supported:  unsupported types for __pow__: 'ndarray', 'ndarray'\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3])\n",
+    "b = np.array([4, 5, 6])\n",
+    "\n",
+    "try:\n",
+    "    print(a ** b)\n",
+    "except Exception as e:\n",
+    "    print('operator is not supported: ', e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The exception above would be raised, if the firmware was compiled with the \n",
+    "\n",
+    "```c\n",
+    "#define NDARRAY_HAS_BINARY_OP_POWER         (0)\n",
+    "```\n",
+    "\n",
+    "definition."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-ndarray.ipynb b/tulip/shared/ulab/docs/ulab-ndarray.ipynb
new file mode 100644
index 000000000..8d67ed9b5
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-ndarray.ipynb
@@ -0,0 +1,3804 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:20:20.064769Z",
+     "start_time": "2021-01-12T16:20:19.787429Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:10:18.391925Z",
+     "start_time": "2022-02-09T06:10:18.388146Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:10:19.000982Z",
+     "start_time": "2022-02-09T06:10:18.979322Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ndarray, the base class\n",
+    "\n",
+    "The `ndarray` is the underlying container of numerical data. It can be thought of as micropython's own `array` object, but has a great number of extra features starting with how it can be initialised, which operations can be done on it, and which functions can accept it as an argument. One important property of an `ndarray` is that it is also a proper `micropython` iterable.\n",
+    "\n",
+    "The `ndarray` consists of a short header, and a pointer that holds the data. The pointer always points to a contiguous segment in memory (`numpy` is more flexible in this regard), and the header tells the interpreter, how the data from this segment is to be read out, and what the bytes mean. Some operations, e.g., `reshape`, are fast, because they do not operate on the data, they work on the header, and therefore, only a couple of bytes are manipulated, even if there are a million data entries. A more detailed exposition of how operators are implemented can be found in the section titled [Programming ulab](#Programming_ula).\n",
+    "\n",
+    "Since the `ndarray` is a binary container, it is also compact, meaning that it takes only a couple of bytes of extra RAM in addition to what is required for storing the numbers themselves. `ndarray`s are also type-aware, i.e., one can save RAM by specifying a data type, and using the smallest reasonable one. Five such types are defined, namely `uint8`, `int8`, which occupy a single byte of memory per datum, `uint16`, and `int16`, which occupy two bytes per datum, and `float`, which occupies four or eight bytes per datum. The precision/size of the `float` type depends on the definition of `mp_float_t`. Some platforms, e.g., the PYBD, implement `double`s, but some, e.g., the pyboard.v.11, do not. You can find out, what type of float your particular platform implements by looking at the output of the [.itemsize](#.itemsize) class property, or looking at the exact `dtype`, when you print out an array.\n",
+    "\n",
+    "In addition to the five above-mentioned numerical types, it is also possible to define Boolean arrays, which can be used in the indexing of data. However, Boolean arrays are really nothing but arrays of type `uint8` with an extra flag. \n",
+    "\n",
+    "On the following pages, we will see how one can work with `ndarray`s. Those familiar with `numpy` should find that the nomenclature and naming conventions of `numpy` are adhered to as closely as possible. We will point out the few differences, where necessary.\n",
+    "\n",
+    "For the sake of comparison, in addition to the `ulab` code snippets, sometimes the equivalent `numpy` code is also presented. You can find out, where the snippet is supposed to run by looking at its first line, the header of the code block."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The ndinfo function\n",
+    "\n",
+    "A concise summary of a couple of the properties of an `ndarray` can be printed out by calling the `ndinfo` \n",
+    "function. In addition to finding out what the *shape* and *strides* of the array array, we also get the `itemsize`, as well as the type.  An interesting piece of information is the *data pointer*, which tells us, what the address of the data segment of the `ndarray` is. We will see the significance of this in the section [Slicing and indexing](#Slicing-and-indexing).\n",
+    "\n",
+    "Note that this function simply prints some information, but does not return anything. If you need to get a handle of the data contained in the printout, you should call the dedicated `shape`, `strides`, or `itemsize` functions directly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:24:08.710325Z",
+     "start_time": "2021-01-12T16:24:08.699287Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "class: ndarray\n",
+      "shape: (5,)\n",
+      "strides: (8,)\n",
+      "itemsize: 8\n",
+      "data pointer: 0x7f8f6fa2e240\n",
+      "type: float\n",
+      "\n",
+      "\n",
+      "class: ndarray\n",
+      "shape: (5, 5)\n",
+      "strides: (5, 1)\n",
+      "itemsize: 1\n",
+      "data pointer: 0x7f8f6fa2e2e0\n",
+      "type: uint8\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(5), dtype=np.float)\n",
+    "b = np.array(range(25), dtype=np.uint8).reshape((5, 5))\n",
+    "np.ndinfo(a)\n",
+    "print('\\n')\n",
+    "np.ndinfo(b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialising an array\n",
+    "\n",
+    "A new array can be created by passing either a standard micropython iterable, or another `ndarray` into the constructor."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialising by passing iterables\n",
+    "\n",
+    "If the iterable is one-dimensional, i.e., one whose elements are numbers, then a row vector will be created and returned. If the iterable is two-dimensional, i.e., one whose elements are again iterables, a matrix will be created. If the lengths of the iterables are not consistent, a `ValueError` will be raised. Iterables of different types can be mixed in the initialisation function. \n",
+    "\n",
+    "If the `dtype` keyword with the possible `uint8/int8/uint16/int16/float` values is supplied, the new `ndarray` will have that type, otherwise, it assumes `float` as default. In addition, if `ULAB_SUPPORTS_COMPLEX` is set to 1 in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h), the `dtype` can also take on the value of `complex`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:24:21.952689Z",
+     "start_time": "2021-01-12T16:24:21.938231Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t [1, 2, 3, 4, 5, 6, 7, 8]\n",
+      "b:\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "\n",
+      "c:\t array([[0, 1, 2, 3, 4],\n",
+      "       [20, 21, 22, 23, 24],\n",
+      "       [44, 55, 66, 77, 88]], dtype=uint8)\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dev/shm/micropython.py\", line 15, in <module>\n",
+      "ValueError: iterables are not of the same length\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = [1, 2, 3, 4, 5, 6, 7, 8]\n",
+    "b = np.array(a)\n",
+    "\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"b:\\t\", b)\n",
+    "\n",
+    "# a two-dimensional array with mixed-type initialisers\n",
+    "c = np.array([range(5), range(20, 25, 1), [44, 55, 66, 77, 88]], dtype=np.uint8)\n",
+    "print(\"\\nc:\\t\", c)\n",
+    "\n",
+    "# and now we throw an exception\n",
+    "d = np.array([range(5), range(10), [44, 55, 66, 77, 88]], dtype=np.uint8)\n",
+    "print(\"\\nd:\\t\", d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialising by passing arrays\n",
+    "\n",
+    "An `ndarray` can be initialised by supplying another array. This statement is almost trivial, since `ndarray`s are iterables themselves, though it should be pointed out that initialising through arrays is a bit faster. This statement is especially true, if the `dtype`s of the source and output arrays are the same, because then the contents can simply be copied without further ado. While type conversion is also possible, it will always be slower than straight copying."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:24:33.050654Z",
+     "start_time": "2021-01-12T16:24:33.039754Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t [1, 2, 3, 4, 5, 6, 7, 8]\n",
+      "\n",
+      "b:\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "\n",
+      "c:\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float64)\n",
+      "\n",
+      "d:\t array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = [1, 2, 3, 4, 5, 6, 7, 8]\n",
+    "b = np.array(a)\n",
+    "c = np.array(b)\n",
+    "d = np.array(b, dtype=np.uint8)\n",
+    "\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"\\nb:\\t\", b)\n",
+    "print(\"\\nc:\\t\", c)\n",
+    "print(\"\\nd:\\t\", d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that the default type of the `ndarray` is `float`. Hence, if the array is initialised from another array, type conversion will always take place, except, when the output type is specifically supplied. I.e., "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:24:39.722844Z",
+     "start_time": "2021-01-12T16:24:39.709963Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([0, 1, 2, 3, 4], dtype=uint8)\n",
+      "\n",
+      "b:\t array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(5), dtype=np.uint8)\n",
+    "b = np.array(a)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"\\nb:\\t\", b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "will iterate over the elements in `a`, since in the assignment `b = np.array(a)`, no output type was given, therefore, `float` was assumed. On the other hand, "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:25:06.597051Z",
+     "start_time": "2021-01-12T16:25:06.585511Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([0, 1, 2, 3, 4], dtype=uint8)\n",
+      "\n",
+      "b:\t array([0, 1, 2, 3, 4], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(5), dtype=np.uint8)\n",
+    "b = np.array(a, dtype=np.uint8)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"\\nb:\\t\", b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "will simply copy the content of `a` into `b` without any iteration, and will, therefore, be faster. Keep this in mind, whenever the output type, or performance is important."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Array initialisation functions\n",
+    "\n",
+    "There are nine functions that can be used for initialising an array. Starred functions accept `complex` as the value of the `dtype`, if the firmware was compiled with complex support.\n",
+    "\n",
+    "1. [numpy.arange](#arange)\n",
+    "1. [numpy.concatenate](#concatenate)\n",
+    "1. [numpy.diag*](#diag)\n",
+    "1. [numpy.empty*](#empty)\n",
+    "1. [numpy.eye*](#eye)\n",
+    "1. [numpy.frombuffer](#frombuffer)\n",
+    "1. [numpy.full*](#full)\n",
+    "1. [numpy.linspace*](#linspace)\n",
+    "1. [numpy.logspace](#logspace)\n",
+    "1. [numpy.ones*](#ones)\n",
+    "1. [numpy.zeros*](#zeros)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## arange\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.arange.html\n",
+    "\n",
+    "The function returns a one-dimensional array with evenly spaced values. Takes 3 positional arguments (two are optional), and the `dtype` keyword argument. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:26:03.795728Z",
+     "start_time": "2021-01-12T16:26:03.782352Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int16)\n",
+      "array([2, 3, 4, 5, 6, 7, 8, 9], dtype=int16)\n",
+      "array([2, 5, 8], dtype=int16)\n",
+      "array([2.0, 5.0, 8.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print(np.arange(10))\n",
+    "print(np.arange(2, 10))\n",
+    "print(np.arange(2, 10, 3))\n",
+    "print(np.arange(2, 10, 3, dtype=np.float))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## concatenate\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.concatenate.html\n",
+    "\n",
+    "The function joins a sequence of arrays, if they are compatible in shape, i.e., if all shapes except the one along the joining axis are equal. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:26:37.145965Z",
+     "start_time": "2021-01-12T16:26:37.134350Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[0, 1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8, 9],\n",
+      "       [10, 11, 12, 13, 14],\n",
+      "       [15, 16, 17, 18, 19],\n",
+      "       [20, 21, 22, 23, 24],\n",
+      "       [0, 1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8, 9],\n",
+      "       [10, 11, 12, 13, 14]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(25), dtype=np.uint8).reshape((5, 5))\n",
+    "b = np.array(range(15), dtype=np.uint8).reshape((3, 5))\n",
+    "\n",
+    "c = np.concatenate((a, b), axis=0)\n",
+    "print(c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**WARNING**: `numpy` accepts arbitrary `dtype`s in the sequence of arrays, in `ulab` the `dtype`s must be identical. If you want to concatenate different types, you have to convert all arrays to the same type first. Here `b` is of `float` type, so it cannot directly be concatenated to `a`. However, if we cast the `dtype` of `b`, the concatenation works:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:26:56.120820Z",
+     "start_time": "2021-01-12T16:26:56.102365Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([[0, 1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8, 9],\n",
+      "       [10, 11, 12, 13, 14],\n",
+      "       [15, 16, 17, 18, 19],\n",
+      "       [20, 21, 22, 23, 24]], dtype=uint8)\n",
+      "====================\n",
+      "d:  array([[1, 2, 3],\n",
+      "       [4, 5, 6],\n",
+      "       [7, 8, 9],\n",
+      "       [10, 11, 12],\n",
+      "       [13, 14, 15]], dtype=uint8)\n",
+      "====================\n",
+      "c:  array([[1, 2, 3, 0, 1, 2, 3, 4],\n",
+      "       [4, 5, 6, 5, 6, 7, 8, 9],\n",
+      "       [7, 8, 9, 10, 11, 12, 13, 14],\n",
+      "       [10, 11, 12, 15, 16, 17, 18, 19],\n",
+      "       [13, 14, 15, 20, 21, 22, 23, 24]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(25), dtype=np.uint8).reshape((5, 5))\n",
+    "b = np.array(range(15), dtype=np.float).reshape((5, 3))\n",
+    "d = np.array(b+1, dtype=np.uint8)\n",
+    "print('a: ', a)\n",
+    "print('='*20 + '\\nd: ', d)\n",
+    "c = np.concatenate((d, a), axis=1)\n",
+    "print('='*20 + '\\nc: ', c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## diag\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.diag.html\n",
+    "\n",
+    "Extract a diagonal, or construct a diagonal array.\n",
+    "\n",
+    "The function takes a positional argument, an `ndarray`, or any `micropython` iterable, and an optional keyword argument, a shift, with a default value of 0. If the first argument is a two-dimensional array (or a two-dimensional iterable, e.g., a list of lists), the function returns a one-dimensional array containing the diagonal entries. The diagonal can be shifted by an amount given in the second argument. If the shift is larger than the length of the corresponding axis, an empty array is returned.\n",
+    "\n",
+    "If the first argument is a one-dimensional array, the function returns a two-dimensional square tensor with its diagonal elements given by the first argument. Again, the diagonal be shifted by an amount given by the keyword argument.\n",
+    "\n",
+    "The `diag` function can accept a complex array, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:24:38.290495Z",
+     "start_time": "2022-02-09T06:24:38.273075Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[1, 0, 0],\n",
+      "       [0, 2, 0],\n",
+      "       [0, 0, 3]], dtype=uint8)\n",
+      "\n",
+      "diagonal shifted by 2\n",
+      "array([[0, 0, 1, 0, 0],\n",
+      "       [0, 0, 0, 2, 0],\n",
+      "       [0, 0, 0, 0, 3],\n",
+      "       [0, 0, 0, 0, 0],\n",
+      "       [0, 0, 0, 0, 0]], dtype=uint8)\n",
+      "\n",
+      "diagonal shifted by -2\n",
+      "array([[0, 0, 0, 0, 0],\n",
+      "       [0, 0, 0, 0, 0],\n",
+      "       [1, 0, 0, 0, 0],\n",
+      "       [0, 2, 0, 0, 0],\n",
+      "       [0, 0, 3, 0, 0]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.uint8)\n",
+    "print(np.diag(a))\n",
+    "\n",
+    "print('\\ndiagonal shifted by 2')\n",
+    "print(np.diag(a, k=2))\n",
+    "\n",
+    "print('\\ndiagonal shifted by -2')\n",
+    "print(np.diag(a, k=-2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-02-09T06:26:39.213828Z",
+     "start_time": "2022-02-09T06:26:39.199294Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[0, 1, 2, 3],\n",
+      "       [4, 5, 6, 7],\n",
+      "       [8, 9, 10, 11],\n",
+      "       [12, 13, 14, 15]], dtype=int16)\n",
+      "\n",
+      "diagonal of a:\n",
+      "array([0, 5, 10, 15], dtype=int16)\n",
+      "\n",
+      "diagonal of a:\n",
+      "array([0, 5, 10, 15], dtype=int16)\n",
+      "\n",
+      "diagonal of a, shifted by 2\n",
+      "array([2, 7], dtype=int16)\n",
+      "\n",
+      "diagonal of a, shifted by 5\n",
+      "array([], dtype=int16)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.arange(16).reshape((4, 4))\n",
+    "print(a)\n",
+    "print('\\ndiagonal of a:')\n",
+    "print(np.diag(a))\n",
+    "\n",
+    "print('\\ndiagonal of a:')\n",
+    "print(np.diag(a))\n",
+    "\n",
+    "print('\\ndiagonal of a, shifted by 2')\n",
+    "print(np.diag(a, k=2))\n",
+    "\n",
+    "print('\\ndiagonal of a, shifted by 5')\n",
+    "print(np.diag(a, k=5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## empty\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.empty.html\n",
+    "\n",
+    "`empty` is simply an alias for `zeros`, i.e., as opposed to `numpy`, the entries of the tensor will be initialised to zero. \n",
+    "\n",
+    "The `empty` function can accept complex as the value of the dtype, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## eye\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.eye.html\n",
+    "\n",
+    "Another special array method is the `eye` function, whose call signature is \n",
+    "\n",
+    "```python\n",
+    "eye(N, M, k=0, dtype=float)\n",
+    "```\n",
+    "where `N` (`M`) specify the dimensions of the matrix (if only `N` is supplied, then we get a square matrix, otherwise one with `M` rows, and `N` columns), and `k` is the shift of the ones (the main diagonal corresponds to `k=0`). Here are a couple of examples.\n",
+    "\n",
+    "The `eye` function can accept `complex` as the value of the `dtype`, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### With a single argument"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:27:08.533394Z",
+     "start_time": "2021-01-12T16:27:08.518940Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[1.0, 0.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 1.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 1.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 1.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0, 1.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print(np.eye(5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Specifying the dimensions of the matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:27:34.075468Z",
+     "start_time": "2021-01-12T16:27:34.064137Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[0, 0, 0, 0, 0, 0],\n",
+      "       [1, 0, 0, 0, 0, 0],\n",
+      "       [0, 1, 0, 0, 0, 0],\n",
+      "       [0, 0, 1, 0, 0, 0]], dtype=int16)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print(np.eye(4, M=6, k=-1, dtype=np.int16))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:27:42.492135Z",
+     "start_time": "2021-01-12T16:27:42.477684Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[1, 0, 0, 0, 0, 0],\n",
+      "       [0, 1, 0, 0, 0, 0],\n",
+      "       [0, 0, 1, 0, 0, 0],\n",
+      "       [0, 0, 0, 1, 0, 0]], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print(np.eye(4, M=6, dtype=np.int8))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## frombuffer\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.frombuffer.html\n",
+    "\n",
+    "The function interprets a contiguous buffer as a one-dimensional array, and thus can be used for piping buffered data directly into an array. This method of analysing, e.g., ADC data is much more efficient than passing the ADC buffer into the `array` constructor, because `frombuffer` simply creates the `ndarray` header and blindly copies the memory segment, without inspecting the underlying data. \n",
+    "\n",
+    "The function takes a single positional argument, the buffer, and three keyword arguments. These are the `dtype` with a default value of `float`, the `offset`, with a default of 0, and the `count`, with a default of -1, meaning that all data are taken in."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-15T07:01:35.320458Z",
+     "start_time": "2021-01-15T07:01:35.307407Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "buffer:  b'\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08'\n",
+      "a, all data read:  array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "b, all data with an offset:  array([3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "c, only 3 items with an offset:  array([3, 4, 5], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "buffer = b'\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08'\n",
+    "print('buffer: ', buffer)\n",
+    "\n",
+    "a = np.frombuffer(buffer, dtype=np.uint8)\n",
+    "print('a, all data read: ', a)\n",
+    "\n",
+    "b = np.frombuffer(buffer, dtype=np.uint8, offset=2)\n",
+    "print('b, all data with an offset: ', b)\n",
+    "\n",
+    "c = np.frombuffer(buffer, dtype=np.uint8, offset=2, count=3)\n",
+    "print('c, only 3 items with an offset: ', c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## full\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.full.html\n",
+    "\n",
+    "The function returns an array of arbitrary dimension, whose elements are all equal to the second positional argument. The first argument is a tuple describing the shape of the tensor. The `dtype` keyword argument with a default value of `float` can also be supplied.\n",
+    "\n",
+    "The `full` function can accept a complex scalar, or `complex` as the value of `dtype`, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:29:11.931011Z",
+     "start_time": "2021-01-12T16:29:11.915195Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[3.0, 3.0, 3.0, 3.0],\n",
+      "       [3.0, 3.0, 3.0, 3.0]], dtype=float64)\n",
+      "\n",
+      "====================\n",
+      "\n",
+      "array([[3, 3, 3, 3],\n",
+      "       [3, 3, 3, 3]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "# create an array with the default type\n",
+    "print(np.full((2, 4), 3))\n",
+    "\n",
+    "print('\\n' + '='*20 + '\\n')\n",
+    "# the array type is uint8 now\n",
+    "print(np.full((2, 4), 3, dtype=np.uint8))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## linspace\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linspace.html\n",
+    "\n",
+    "This function returns an array, whose elements are uniformly spaced between the `start`, and `stop` points. The number of intervals is determined by the `num` keyword argument, whose default value is 50. With the `endpoint` keyword argument (defaults to `True`) one can include `stop` in the sequence. In addition, the `dtype` keyword can be supplied to force type conversion of the output. The default is `float`. Note that, when `dtype` is of integer type, the sequence is not necessarily evenly spaced. This is not an error, rather a consequence of rounding. (This is also the `numpy` behaviour.)\n",
+    "\n",
+    "The `linspace` function can accept `complex` as the value of the `dtype`, if the firmware was compiled with complex support. The output `dtype` is automatically complex, if either of the endpoints is a complex scalar."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:29:45.897927Z",
+     "start_time": "2021-01-12T16:29:45.876325Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "default sequence:\t array([0.0, 0.2040816326530612, 0.4081632653061225, ..., 9.591836734693871, 9.795918367346932, 9.999999999999993], dtype=float64)\n",
+      "num=5:\t\t\t array([0.0, 2.5, 5.0, 7.5, 10.0], dtype=float64)\n",
+      "num=5:\t\t\t array([0.0, 2.0, 4.0, 6.0, 8.0], dtype=float64)\n",
+      "num=5:\t\t\t array([0, 0, 1, 2, 2, 3, 4], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "# generate a sequence with defaults\n",
+    "print('default sequence:\\t', np.linspace(0, 10))\n",
+    "\n",
+    "# num=5\n",
+    "print('num=5:\\t\\t\\t', np.linspace(0, 10, num=5))\n",
+    "\n",
+    "# num=5, endpoint=False\n",
+    "print('num=5:\\t\\t\\t', np.linspace(0, 10, num=5, endpoint=False))\n",
+    "\n",
+    "# num=5, endpoint=False, dtype=uint8\n",
+    "print('num=5:\\t\\t\\t', np.linspace(0, 5, num=7, endpoint=False, dtype=np.uint8))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## logspace\n",
+    "\n",
+    "`linspace`' equivalent for logarithmically spaced data is `logspace`. This function produces a sequence of numbers, in which the quotient of consecutive numbers is constant. This is a geometric sequence.\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.logspace.html\n",
+    "\n",
+    "This function returns an array, whose elements are uniformly spaced between the `start`, and `stop` points. The number of intervals is determined by the `num` keyword argument, whose default value is 50. With the `endpoint` keyword argument (defaults to `True`) one can include `stop` in the sequence. In addition, the `dtype` keyword can be supplied to force type conversion of the output. The default is `float`. Note that, exactly as in `linspace`, when `dtype` is of integer type, the sequence is not necessarily evenly spaced in log space.\n",
+    "\n",
+    "In addition to the keyword arguments found in `linspace`, `logspace` also accepts the `base` argument. The default value is 10. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:30:44.483893Z",
+     "start_time": "2021-01-12T16:30:44.466705Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "default sequence:\t array([1.0, 1.151395399326447, 1.325711365590109, ..., 754.3120063354646, 868.5113737513561, 1000.000000000004], dtype=float64)\n",
+      "num=5:\t\t\t array([10.0, 1778.279410038923, 316227.766016838, 56234132.5190349, 10000000000.0], dtype=float64)\n",
+      "num=5:\t\t\t array([10.0, 630.9573444801933, 39810.71705534974, 2511886.431509581, 158489319.2461114], dtype=float64)\n",
+      "num=5:\t\t\t array([2.0, 6.964404506368993, 24.25146506416637, 84.44850628946524, 294.066778879241], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "# generate a sequence with defaults\n",
+    "print('default sequence:\\t', np.logspace(0, 3))\n",
+    "\n",
+    "# num=5\n",
+    "print('num=5:\\t\\t\\t', np.logspace(1, 10, num=5))\n",
+    "\n",
+    "# num=5, endpoint=False\n",
+    "print('num=5:\\t\\t\\t', np.logspace(1, 10, num=5, endpoint=False))\n",
+    "\n",
+    "# num=5, endpoint=False\n",
+    "print('num=5:\\t\\t\\t', np.logspace(1, 10, num=5, endpoint=False, base=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## ones, zeros\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ones.html\n",
+    "\n",
+    "A couple of special arrays and matrices can easily be initialised by calling one of the `ones`, or `zeros` functions. `ones` and `zeros` follow the same pattern, and have the call signature\n",
+    "\n",
+    "```python\n",
+    "ones(shape, dtype=float)\n",
+    "zeros(shape, dtype=float)\n",
+    "```\n",
+    "where shape is either an integer, or a tuple specifying the shape.\n",
+    "\n",
+    "The `ones/zeros` functions can accept complex as the value of the dtype, if the firmware was compiled with complex support."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-12T16:32:05.422109Z",
+     "start_time": "2021-01-12T16:32:05.407921Z"
+    },
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([1, 1, 1, 1, 1, 1], dtype=uint8)\n",
+      "array([[0.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0],\n",
+      "       [0.0, 0.0, 0.0, 0.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "print(np.ones(6, dtype=np.uint8))\n",
+    "\n",
+    "print(np.zeros((6, 4)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " When specifying the shape, make sure that the length of the tuple is not larger than the maximum dimension of your firmware."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:01:44.960353Z",
+     "start_time": "2021-01-13T06:01:44.944935Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "maximum number of dimensions:  2.1.0-2D\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dev/shm/micropython.py\", line 7, in <module>\n",
+      "TypeError: too many dimensions\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "import ulab\n",
+    "\n",
+    "print('maximum number of dimensions: ', ulab.__version__)\n",
+    "\n",
+    "print(np.zeros((2, 2, 2)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Customising array printouts"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`ndarray`s are pretty-printed, i.e., if the number of entries along the last axis is larger than 10 (default value), then only the first and last three entries will be printed. Also note that, as opposed to `numpy`, the printout always contains the `dtype`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:02:20.162127Z",
+     "start_time": "2021-01-13T06:02:20.146219Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([0.0, 1.0, 2.0, ..., 197.0, 198.0, 199.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(200))\n",
+    "print(\"a:\\t\", a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## set_printoptions\n",
+    "\n",
+    "The default values can be overwritten by means of the `set_printoptions` function [numpy.set_printoptions](https://numpy.org/doc/1.18/reference/generated/numpy.set_printoptions.html), which accepts two keywords arguments, the `threshold`, and the `edgeitems`. The first of these arguments determines the length of the longest array that will be printed in full, while the second is the number of items that will be printed on the left and right hand side of the ellipsis, if the array is longer than `threshold`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:02:42.073823Z",
+     "start_time": "2021-01-13T06:02:42.057424Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a printed with defaults:\t array([0.0, 1.0, 2.0, ..., 17.0, 18.0, 19.0], dtype=float64)\n",
+      "\n",
+      "a printed in full:\t\t array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], dtype=float64)\n",
+      "\n",
+      "a truncated with 2 edgeitems:\t array([0.0, 1.0, ..., 18.0, 19.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(20))\n",
+    "print(\"a printed with defaults:\\t\", a)\n",
+    "\n",
+    "np.set_printoptions(threshold=200)\n",
+    "print(\"\\na printed in full:\\t\\t\", a)\n",
+    "\n",
+    "np.set_printoptions(threshold=10, edgeitems=2)\n",
+    "print(\"\\na truncated with 2 edgeitems:\\t\", a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## get_printoptions\n",
+    "\n",
+    "The set value of the `threshold` and `edgeitems` can be retrieved by calling the `get_printoptions` function with no arguments. The function returns a *dictionary* with two keys."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:02:51.383653Z",
+     "start_time": "2021-01-13T06:02:51.372551Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'threshold': 100, 'edgeitems': 20}\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "np.set_printoptions(threshold=100, edgeitems=20)\n",
+    "print(np.get_printoptions())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Methods and properties of ndarrays\n",
+    "\n",
+    "Arrays have several *properties* that can queried, and some methods that can be called. With the exception of the flatten and transpose operators, properties return an object that describe some feature of the array, while the methods return a new array-like object. The `imag`, and `real` properties are included in the firmware only, when it was compiled with complex support.\n",
+    "\n",
+    "1. [.byteswap](#.byteswap)\n",
+    "1. [.copy](#.copy)\n",
+    "1. [.dtype](#.dtype)\n",
+    "1. [.flat](#.flat)\n",
+    "1. [.flatten](#.flatten)\n",
+    "1. [.imag*](#.imag)\n",
+    "1. [.itemsize](#.itemsize)\n",
+    "1. [.real*](#.real)\n",
+    "1. [.reshape](#.reshape)\n",
+    "1. [.shape](#.shape)\n",
+    "1. [.size](#.size)\n",
+    "1. [.T](#.transpose)\n",
+    "1. [.tobytes](#.tobytes)\n",
+    "1. [.tolist](#.tolist)\n",
+    "1. [.transpose](#.transpose)\n",
+    "1. [.sort](#.sort)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .byteswap\n",
+    "\n",
+    "`numpy` https://numpy.org/doc/stable/reference/generated/numpy.char.chararray.byteswap.html\n",
+    "\n",
+    "The method takes a single keyword argument, `inplace`, with values `True` or `False`, and swaps the bytes in the array.  If `inplace = False`, a new `ndarray` is returned, otherwise the original values are overwritten.\n",
+    "\n",
+    "The `frombuffer` function is a convenient way of receiving data from peripheral devices that work with buffers. However, it is not guaranteed that the byte order (in other words, the _endianness_) of the peripheral device matches that of the microcontroller. The `.byteswap` method makes it possible to change the endianness of the incoming data stream.\n",
+    "\n",
+    "Obviously, byteswapping makes sense only for those cases, when a datum occupies more than one byte, i.e., for the `uint16`, `int16`, and `float` `dtype`s. When `dtype` is either `uint8`, or `int8`, the method simply returns a view or copy of self, depending upon the value of `inplace`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-02-15T16:06:20.409727Z",
+     "start_time": "2021-02-15T16:06:20.398057Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "buffer:  b'\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08'\n",
+      "a:  array([513, 1027, 1541, 2055], dtype=uint16)\n",
+      "b:  array([258, 772, 1286, 1800], dtype=uint16)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "buffer = b'\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08'\n",
+    "print('buffer: ', buffer)\n",
+    "\n",
+    "a = np.frombuffer(buffer, dtype=np.uint16)\n",
+    "print('a: ', a)\n",
+    "b = a.byteswap()\n",
+    "print('b: ', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .copy\n",
+    "\n",
+    "The `.copy` method creates a new *deep copy* of an array, i.e., the entries of the source array are *copied* into the target array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:02:58.898485Z",
+     "start_time": "2021-01-13T06:02:58.878864Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([1, 2, 3, 4], dtype=int8)\n",
+      "====================\n",
+      "b:  array([1, 2, 3, 4], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "b = a.copy()\n",
+    "print('a: ', a)\n",
+    "print('='*20)\n",
+    "print('b: ', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .dtype\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.dtype.htm\n",
+    "\n",
+    "The `.dtype` property is the `dtype` of an array. This can then be used for initialising another array with the matching type. `ulab` implements two versions of `dtype`; one that is `numpy`-like, i.e., one, which returns a `dtype` object, and one that is significantly cheaper in terms of flash space, but does not define a `dtype` object, and holds a single character (number) instead. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-02T17:16:12.818777Z",
+     "start_time": "2020-11-02T17:16:12.807147Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([1, 2, 3, 4], dtype=int8)\n",
+      "dtype of a:  dtype('int8')\n",
+      "\n",
+      "b:  array([5, 6, 7], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "b = np.array([5, 6, 7], dtype=a.dtype)\n",
+    "print('a: ', a)\n",
+    "print('dtype of a: ', a.dtype)\n",
+    "print('\\nb: ', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If the `ulab.h` header file sets the pre-processor constant `ULAB_HAS_DTYPE_OBJECT` to 0 as\n",
+    "\n",
+    "```c\n",
+    "#define ULAB_HAS_DTYPE_OBJECT               (0)\n",
+    "```\n",
+    "then the output of the previous snippet will be"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-02T20:36:23.099166Z",
+     "start_time": "2020-11-02T20:36:23.088586Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([1, 2, 3, 4], dtype=int8)\n",
+      "dtype of a:  98\n",
+      "\n",
+      "b:  array([5, 6, 7], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "b = np.array([5, 6, 7], dtype=a.dtype)\n",
+    "print('a: ', a)\n",
+    "print('dtype of a: ', a.dtype)\n",
+    "print('\\nb: ', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here 98 is nothing but the ASCII value of the character `b`, which is the type code for signed 8-bit integers. The object definition adds around 600 bytes to the firmware."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .flat\n",
+    "\n",
+    "numpy: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flat.htm\n",
+    "\n",
+    "`.flat` returns the array's flat iterator. For one-dimensional objects the flat iterator is equivalent to the standart iterator, while for higher dimensional tensors, it amounts to first flattening the array, and then iterating over it. Note, however, that the flat iterator does not consume RAM beyond what is required for holding the position of the iterator itself, while flattening produces a new copy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "2\n",
+      "3\n",
+      "4\n",
+      "a:\n",
+      " array([[1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8]], dtype=int8)\n",
+      "array([1, 2, 3, 4], dtype=int8)\n",
+      "array([5, 6, 7, 8], dtype=int8)\n",
+      "1\n",
+      "2\n",
+      "3\n",
+      "4\n",
+      "5\n",
+      "6\n",
+      "7\n",
+      "8\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "for _a in a:\n",
+    "    print(_a)\n",
+    "\n",
+    "a = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.int8)\n",
+    "print('a:\\n', a)\n",
+    "\n",
+    "for _a in a:\n",
+    "    print(_a)\n",
+    "\n",
+    "for _a in a.flat:\n",
+    "    print(_a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .flatten\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.flatten.htm\n",
+    "\n",
+    "`.flatten` returns the flattened array. The array can be flattened in `C` style (i.e., moving along the last axis in the tensor), or in `fortran` style (i.e., moving along the first axis in the tensor)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:07:16.735771Z",
+     "start_time": "2021-01-13T06:07:16.723514Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \t\t array([1, 2, 3, 4], dtype=int8)\n",
+      "a flattened: \t array([1, 2, 3, 4], dtype=int8)\n",
+      "\n",
+      "b: array([[1, 2, 3],\n",
+      "       [4, 5, 6]], dtype=int8)\n",
+      "b flattened (C): \t array([1, 2, 3, 4, 5, 6], dtype=int8)\n",
+      "b flattened (F): \t array([1, 4, 2, 5, 3, 6], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "print(\"a: \\t\\t\", a)\n",
+    "print(\"a flattened: \\t\", a.flatten())\n",
+    "\n",
+    "b = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int8)\n",
+    "print(\"\\nb:\", b)\n",
+    "\n",
+    "print(\"b flattened (C): \\t\", b.flatten())\n",
+    "print(\"b flattened (F): \\t\", b.flatten(order='F'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .imag\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.imag.html\n",
+    "\n",
+    "The `.imag` property is defined only, if the firmware was compiled with complex support, and returns a copy with the imaginary part of an array. If the array is real, then the output is straight zeros with the `dtype` of the input. If the input is complex, the output `dtype` is always `float`, irrespective of the values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:07:26.171208Z",
+     "start_time": "2022-01-07T19:07:26.152633Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([1, 2, 3], dtype=uint16)\n",
+      "a.imag:\t array([0, 0, 0], dtype=uint16)\n",
+      "\n",
+      "b:\t array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)\n",
+      "b.imag:\t array([0.0, 1.0, -1.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.uint16)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"a.imag:\\t\", a.imag)\n",
+    "\n",
+    "b = np.array([1, 2+1j, 3-1j], dtype=np.complex)\n",
+    "print(\"\\nb:\\t\", b)\n",
+    "print(\"b.imag:\\t\", b.imag)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .itemsize\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.itemsize.html\n",
+    "\n",
+    "The `.itemsize` property is an integer with the size of elements in the array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:07:49.080817Z",
+     "start_time": "2021-01-13T06:07:49.065749Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([1, 2, 3], dtype=int8)\n",
+      "itemsize of a: 1\n",
+      "\n",
+      "b:\n",
+      " array([[1.0, 2.0],\n",
+      "       [3.0, 4.0]], dtype=float64)\n",
+      "itemsize of b: 8\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.int8)\n",
+    "print(\"a:\\n\", a)\n",
+    "print(\"itemsize of a:\", a.itemsize)\n",
+    "\n",
+    "b= np.array([[1, 2], [3, 4]], dtype=np.float)\n",
+    "print(\"\\nb:\\n\", b)\n",
+    "print(\"itemsize of b:\", b.itemsize)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .real\n",
+    "\n",
+    "numpy: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.real.html\n",
+    "\n",
+    "The `.real` property is defined only, if the firmware was compiled with complex support, and returns a copy with the real part of an array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:10:01.870921Z",
+     "start_time": "2022-01-07T19:10:01.860071Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([1, 2, 3], dtype=uint16)\n",
+      "a.real:\t array([1, 2, 3], dtype=uint16)\n",
+      "\n",
+      "b:\t array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=complex)\n",
+      "b.real:\t array([1.0, 2.0, 3.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.uint16)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"a.real:\\t\", a.real)\n",
+    "\n",
+    "b = np.array([1, 2+1j, 3-1j], dtype=np.complex)\n",
+    "print(\"\\nb:\\t\", b)\n",
+    "print(\"b.real:\\t\", b.real)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .reshape\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html\n",
+    "\n",
+    "`reshape` re-writes the shape properties of an `ndarray`, but the array will not be modified in any other way. The function takes a single 2-tuple with two integers as its argument. The 2-tuple should specify the desired number of rows and columns. If the new shape is not consistent with the old, a `ValueError` exception will be raised."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:08:12.234490Z",
+     "start_time": "2021-01-13T06:08:12.217652Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a (4 by 4): array([[1, 2, 3, 4],\n",
+      "       [5, 6, 7, 8],\n",
+      "       [9, 10, 11, 12],\n",
+      "       [13, 14, 15, 16]], dtype=uint8)\n",
+      "a (2 by 8): array([[1, 2, 3, 4, 5, 6, 7, 8],\n",
+      "       [9, 10, 11, 12, 13, 14, 15, 16]], dtype=uint8)\n",
+      "a (1 by 16): array([[1, 2, 3, ..., 14, 15, 16]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]], dtype=np.uint8)\n",
+    "print('a (4 by 4):', a)\n",
+    "print('a (2 by 8):', a.reshape((2, 8)))\n",
+    "print('a (1 by 16):', a.reshape((1, 16)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Note that `ndarray.reshape()` can also be called by assigning to `ndarray.shape`. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .shape\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.shape.html\n",
+    "\n",
+    "The `.shape` property is a tuple whose elements are the length of the array along each axis. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:08:50.479850Z",
+     "start_time": "2021-01-13T06:08:50.464741Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([1, 2, 3, 4], dtype=int8)\n",
+      "shape of a: (4,)\n",
+      "\n",
+      "b:\n",
+      " array([[1, 2],\n",
+      "       [3, 4]], dtype=int8)\n",
+      "shape of b: (2, 2)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "print(\"a:\\n\", a)\n",
+    "print(\"shape of a:\", a.shape)\n",
+    "\n",
+    "b= np.array([[1, 2], [3, 4]], dtype=np.int8)\n",
+    "print(\"\\nb:\\n\", b)\n",
+    "print(\"shape of b:\", b.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By assigning a tuple to the `.shape` property, the array can be `reshape`d:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], dtype=float64)\n",
+      "\n",
+      "a:\n",
+      " array([[1.0, 2.0, 3.0],\n",
+      "       [4.0, 5.0, 6.0],\n",
+      "       [7.0, 8.0, 9.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
+    "print('a:\\n', a)\n",
+    "\n",
+    "a.shape = (3, 3)\n",
+    "print('\\na:\\n', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .size\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.size.html\n",
+    "\n",
+    "The `.size` property is an integer specifying the number of elements in the array. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-02-11T06:32:22.721112Z",
+     "start_time": "2020-02-11T06:32:22.713111Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([1, 2, 3], dtype=int8)\n",
+      "size of a: 3\n",
+      "\n",
+      "b:\n",
+      " array([[1, 2],\n",
+      "\t [3, 4]], dtype=int8)\n",
+      "size of b: 4\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3], dtype=np.int8)\n",
+    "print(\"a:\\n\", a)\n",
+    "print(\"size of a:\", a.size)\n",
+    "\n",
+    "b= np.array([[1, 2], [3, 4]], dtype=np.int8)\n",
+    "print(\"\\nb:\\n\", b)\n",
+    "print(\"size of b:\", b.size)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    ".T\n",
+    "\n",
+    "The `.T` property of the `ndarray` is equivalent to [.transpose](#.transpose)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .tobytes\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.tobytes.html\n",
+    "\n",
+    "The `.tobytes` method can be used for acquiring a handle of the underlying data pointer of an array, and it returns a new `bytearray` that can be fed into any method that can accep a `bytearray`, e.g., ADC data can be buffered into this `bytearray`, or the `bytearray` can be fed into a DAC. Since the `bytearray` is really nothing but the bare data container of the array, any manipulation on the `bytearray` automatically modifies the array itself.\n",
+    "\n",
+    "Note that the method raises a `ValueError` exception, if the array is not dense (i.e., it has already been sliced)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:09:57.262071Z",
+     "start_time": "2021-01-13T06:09:57.250519Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([0, 1, 2, 3, 4, 5, 6, 7], dtype=uint8)\n",
+      "b:  bytearray(b'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07')\n",
+      "====================\n",
+      "b:  bytearray(b'\\r\\x01\\x02\\x03\\x04\\x05\\x06\\x07')\n",
+      "a:  array([13, 1, 2, 3, 4, 5, 6, 7], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(8), dtype=np.uint8)\n",
+    "print('a: ', a)\n",
+    "b = a.tobytes()\n",
+    "print('b: ', b)\n",
+    "\n",
+    "# modify b\n",
+    "b[0] = 13\n",
+    "\n",
+    "print('='*20)\n",
+    "print('b: ', b)\n",
+    "print('a: ', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .tolist\n",
+    "\n",
+    "`numpy`: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.tolist.html\n",
+    "\n",
+    "The `.tolist` method can be used for converting the numerical array into a (nested) `python` lists."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:01:28.671234Z",
+     "start_time": "2022-01-07T19:01:28.568786Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([0, 1, 2, 3], dtype=uint8)\n",
+      "b:  [0, 1, 2, 3]\n",
+      "====================\n",
+      "c:  array([[0, 1],\n",
+      "       [2, 3]], dtype=uint8)\n",
+      "d:  [[0, 1], [2, 3]]\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(4), dtype=np.uint8)\n",
+    "print('a: ', a)\n",
+    "b = a.tolist()\n",
+    "print('b: ', b)\n",
+    "\n",
+    "c = a.reshape((2, 2))\n",
+    "print('='*20)\n",
+    "print('c: ', c)\n",
+    "d = c.tolist()\n",
+    "print('d: ', d)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .transpose\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.transpose.html\n",
+    "\n",
+    "Returns the transposed array. Only defined, if the number of maximum dimensions is larger than 1."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 384,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-19T08:39:11.844987Z",
+     "start_time": "2019-10-19T08:39:11.828099Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[1, 2, 3],\n",
+      "\t [4, 5, 6],\n",
+      "\t [7, 8, 9],\n",
+      "\t [10, 11, 12]], dtype=uint8)\n",
+      "shape of a: (4, 3)\n",
+      "\n",
+      "transpose of a:\n",
+      " array([[1, 4, 7, 10],\n",
+      "\t [2, 5, 8, 11],\n",
+      "\t [3, 6, 9, 12]], dtype=uint8)\n",
+      "shape of a: (3, 4)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=np.uint8)\n",
+    "print('a:\\n', a)\n",
+    "print('shape of a:', a.shape)\n",
+    "a.transpose()\n",
+    "print('\\ntranspose of a:\\n', a)\n",
+    "print('shape of a:', a.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The transpose of the array can also be gotten through the `T` property:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[1, 2, 3],\n",
+      "       [4, 5, 6],\n",
+      "       [7, 8, 9]], dtype=uint8)\n",
+      "\n",
+      "transpose of a:\n",
+      " array([[1, 4, 7],\n",
+      "       [2, 5, 8],\n",
+      "       [3, 6, 9]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)\n",
+    "print('a:\\n', a)\n",
+    "print('\\ntranspose of a:\\n', a.T)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## .sort\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.sort.html\n",
+    "\n",
+    "In-place sorting of an `ndarray`. For a more detailed exposition, see [sort](#sort)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:11:20.989109Z",
+     "start_time": "2021-01-13T06:11:20.972842Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[1, 12, 3, 0],\n",
+      "       [5, 3, 4, 1],\n",
+      "       [9, 11, 1, 8],\n",
+      "       [7, 10, 0, 1]], dtype=uint8)\n",
+      "\n",
+      "a sorted along vertical axis:\n",
+      " array([[1, 3, 0, 0],\n",
+      "       [5, 10, 1, 1],\n",
+      "       [7, 11, 3, 1],\n",
+      "       [9, 12, 4, 8]], dtype=uint8)\n",
+      "\n",
+      "a sorted along horizontal axis:\n",
+      " array([[0, 1, 3, 12],\n",
+      "       [1, 3, 4, 5],\n",
+      "       [1, 8, 9, 11],\n",
+      "       [0, 1, 7, 10]], dtype=uint8)\n",
+      "\n",
+      "flattened a sorted:\n",
+      " array([0, 0, 1, ..., 10, 11, 12], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.uint8)\n",
+    "print('\\na:\\n', a)\n",
+    "a.sort(axis=0)\n",
+    "print('\\na sorted along vertical axis:\\n', a)\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.uint8)\n",
+    "a.sort(axis=1)\n",
+    "print('\\na sorted along horizontal axis:\\n', a)\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.uint8)\n",
+    "a.sort(axis=None)\n",
+    "print('\\nflattened a sorted:\\n', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Unary operators\n",
+    "\n",
+    "With the exception of `len`, which returns a single number, all unary operators manipulate the underlying data element-wise. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## len\n",
+    "\n",
+    "This operator takes a single argument, the array, and returns either the length of the first axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:11:49.266192Z",
+     "start_time": "2021-01-13T06:11:49.255493Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([1, 2, 3, 4, 5], dtype=uint8)\n",
+      "length of a:  5\n",
+      "shape of a:  (5,)\n",
+      "\n",
+      "b:\t array([[0, 1, 2, 3, 4],\n",
+      "       [0, 1, 2, 3, 4],\n",
+      "       [0, 1, 2, 3, 4],\n",
+      "       [0, 1, 2, 3, 4]], dtype=uint8)\n",
+      "length of b:  2\n",
+      "shape of b:  (4, 5)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)\n",
+    "b = np.array([range(5), range(5), range(5), range(5)], dtype=np.uint8)\n",
+    "\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"length of a: \", len(a))\n",
+    "print(\"shape of a: \", a.shape)\n",
+    "print(\"\\nb:\\t\", b)\n",
+    "print(\"length of b: \", len(b))\n",
+    "print(\"shape of b: \", b.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " The number returned by `len` is also the length of the iterations, when the array supplies the elements for an iteration (see later)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## invert\n",
+    "\n",
+    "The function is defined for integer data types (`uint8`, `int8`, `uint16`, and `int16`) only, takes a single argument, and returns the element-by-element, bit-wise inverse of the array. If a `float` is supplied, the function raises a `ValueError` exception.\n",
+    "\n",
+    "With signed integers (`int8`, and `int16`), the results might be unexpected, as in the example below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-11T13:16:16.754210Z",
+     "start_time": "2019-10-11T13:16:16.735618Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([0, -1, -100], dtype=int8)\n",
+      "inverse of a:\t array([-1, 0, 99], dtype=int8)\n",
+      "\n",
+      "a:\t\t array([0, 1, 254, 255], dtype=uint8)\n",
+      "inverse of a:\t array([255, 254, 1, 0], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([0, -1, -100], dtype=np.int8)\n",
+    "print(\"a:\\t\\t\", a)\n",
+    "print(\"inverse of a:\\t\", ~a)\n",
+    "\n",
+    "a = np.array([0, 1, 254, 255], dtype=np.uint8)\n",
+    "print(\"\\na:\\t\\t\", a)\n",
+    "print(\"inverse of a:\\t\", ~a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## abs\n",
+    "\n",
+    "This function takes a single argument, and returns the element-by-element absolute value of the array. When the data type is unsigned (`uint8`, or `uint16`), a copy of the array will be returned immediately, and no calculation takes place."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-11T13:05:43.926821Z",
+     "start_time": "2019-10-11T13:05:43.912629Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t\t  array([0, -1, -100], dtype=int8)\n",
+      "absolute value of a:\t  array([0, 1, 100], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([0, -1, -100], dtype=np.int8)\n",
+    "print(\"a:\\t\\t\\t \", a)\n",
+    "print(\"absolute value of a:\\t \", abs(a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## neg\n",
+    "\n",
+    "This operator takes a single argument, and changes the sign of each element in the array. Unsigned values are wrapped. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-11T13:17:00.946009Z",
+     "start_time": "2019-10-11T13:17:00.927264Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([10, -1, 1], dtype=int8)\n",
+      "negative of a:\t array([-10, 1, -1], dtype=int8)\n",
+      "\n",
+      "b:\t\t array([0, 100, 200], dtype=uint8)\n",
+      "negative of b:\t array([0, 156, 56], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([10, -1, 1], dtype=np.int8)\n",
+    "print(\"a:\\t\\t\", a)\n",
+    "print(\"negative of a:\\t\", -a)\n",
+    "\n",
+    "b = np.array([0, 100, 200], dtype=np.uint8)\n",
+    "print(\"\\nb:\\t\\t\", b)\n",
+    "print(\"negative of b:\\t\", -b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pos\n",
+    "\n",
+    "This function takes a single argument, and simply returns a copy of the array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-11T13:09:15.965662Z",
+     "start_time": "2019-10-11T13:09:15.945461Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t array([10, -1, 1], dtype=int8)\n",
+      "positive of a:\t array([10, -1, 1], dtype=int8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([10, -1, 1], dtype=np.int8)\n",
+    "print(\"a:\\t\\t\", a)\n",
+    "print(\"positive of a:\\t\", +a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Binary operators\n",
+    "\n",
+    "`ulab` implements the `+`, `-`, `*`, `/`, `**`, `<`, `>`, `<=`, `>=`, `==`, `!=`, `+=`, `-=`, `*=`, `/=`, `**=`  binary operators, as well as the `AND`, `OR`, `XOR` bit-wise operators that work element-wise. Note that the bit-wise operators will raise an exception, if either of the operands is of `float` or `complex` type.\n",
+    "\n",
+    "Broadcasting is available, meaning that the two operands do not even have to have the same shape. If the lengths along the respective axes are equal, or one of them is 1, or the axis is missing, the element-wise operation can still be carried out. \n",
+    "A thorough explanation of broadcasting can be found under https://numpy.org/doc/stable/user/basics.broadcasting.html. \n",
+    "\n",
+    "**WARNING**: note that relational operators (`<`, `>`, `<=`, `>=`, `==`, `!=`) should have the `ndarray` on their left hand side, when compared to scalars. This means that the following works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:12:30.802935Z",
+     "start_time": "2021-01-13T06:12:30.786069Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([False, False, True], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3])\n",
+    "print(a > 2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "while the equivalent statement, `2 < a`, will raise a `TypeError` exception:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:12:51.262197Z",
+     "start_time": "2021-01-13T06:12:51.244206Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dev/shm/micropython.py\", line 5, in <module>\n",
+      "TypeError: unsupported types for __lt__: 'int', 'ndarray'\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3])\n",
+    "print(2 < a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**WARNING:** `circuitpython` users should use the `equal`, and `not_equal` operators instead of `==`, and `!=`. See the section on [array comparison](#Comparison-of-arrays) for details."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Upcasting\n",
+    "\n",
+    "Binary operations require special attention, because two arrays with different typecodes can be the operands of an operation, in which case it is not trivial, what the typecode of the result is. This decision on the result's typecode is called upcasting. Since the number of typecodes in `ulab` is significantly smaller than in `numpy`, we have to define new upcasting rules. Where possible, I followed `numpy`'s conventions. \n",
+    "\n",
+    "`ulab` observes the following upcasting rules:\n",
+    "\n",
+    "1. Operations on two `ndarray`s of the same `dtype` preserve their `dtype`, even when the results overflow.\n",
+    "\n",
+    "2. if either of the operands is a float, the result is automatically a float\n",
+    "\n",
+    "3. When one of the operands is a scalar, it will internally be turned into a single-element `ndarray` with the *smallest* possible `dtype`. Thus, e.g., if the scalar is 123, it will be converted into an array of `dtype` `uint8`, while -1000 will be converted into `int16`. An `mp_obj_float`, will always be promoted to `dtype` `float`. Similarly, if `ulab` supports complex arrays, the result of a binary operation involving a `complex` array is always complex. Other `micropython` types (e.g., lists, tuples, etc.) raise a `TypeError` exception. \n",
+    "\n",
+    "4. \n",
+    "    \n",
+    "| left hand side | right hand side | ulab result | numpy result |\n",
+    "|----------------|-----------------|-------------|--------------|\n",
+    "|`uint8`         |`int8`           |`int16`      |`int16`       |\n",
+    "|`uint8`         |`int16`          |`int16`      |`int16`       |\n",
+    "|`uint8`         |`uint16`         |`uint16`     |`uint16`      |\n",
+    "|`int8`          |`int16`          |`int16`      |`int16`       | \n",
+    "|`int8`          |`uint16`         |`uint16`     |`int32`       |\n",
+    "|`uint16`        |`int16`          |`float`      |`int32`       |\n",
+    "    \n",
+    "Note that the last two operations are promoted to `int32` in `numpy`.\n",
+    "    \n",
+    "**WARNING:** Due to the lower number of available data types, the upcasting rules of `ulab` are slightly different to those of `numpy`. Watch out for this, when porting code!\n",
+    "\n",
+    "Upcasting can be seen in action in the following snippet:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:13:23.026904Z",
+     "start_time": "2021-01-13T06:13:23.009315Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([1, 2, 3, 4], dtype=uint8)\n",
+      "b:\t array([1, 2, 3, 4], dtype=int8)\n",
+      "a+b:\t array([2, 4, 6, 8], dtype=int16)\n",
+      "\n",
+      "a:\t array([1, 2, 3, 4], dtype=uint8)\n",
+      "c:\t array([1.0, 2.0, 3.0, 4.0], dtype=float64)\n",
+      "a*c:\t array([1.0, 4.0, 9.0, 16.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4], dtype=np.uint8)\n",
+    "b = np.array([1, 2, 3, 4], dtype=np.int8)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"b:\\t\", b)\n",
+    "print(\"a+b:\\t\", a+b)\n",
+    "\n",
+    "c = np.array([1, 2, 3, 4], dtype=np.float)\n",
+    "print(\"\\na:\\t\", a)\n",
+    "print(\"c:\\t\", c)\n",
+    "print(\"a*c:\\t\", a*c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Benchmarks\n",
+    "\n",
+    "The following snippet compares the performance of binary operations to a possible implementation in python. For the time measurement, we will take the following snippet from the micropython manual:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T06:39:52.225256Z",
+     "start_time": "2020-05-07T06:39:52.194691Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 490,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-19T13:23:45.432395Z",
+     "start_time": "2019-10-19T13:23:45.344021Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "python add:\n",
+      "execution time:  10051  us\n",
+      "\n",
+      "python multiply:\n",
+      "execution time:  14175  us\n",
+      "\n",
+      "ulab add:\n",
+      "execution time:  222  us\n",
+      "\n",
+      "ulab multiply:\n",
+      "execution time:  213  us\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "@timeit\n",
+    "def py_add(a, b):\n",
+    "    return [a[i]+b[i] for i in range(1000)]\n",
+    "\n",
+    "@timeit\n",
+    "def py_multiply(a, b):\n",
+    "    return [a[i]*b[i] for i in range(1000)]\n",
+    "\n",
+    "@timeit\n",
+    "def ulab_add(a, b):\n",
+    "    return a + b\n",
+    "\n",
+    "@timeit\n",
+    "def ulab_multiply(a, b):\n",
+    "    return a * b\n",
+    "\n",
+    "a = [0.0]*1000\n",
+    "b = range(1000)\n",
+    "\n",
+    "print('python add:')\n",
+    "py_add(a, b)\n",
+    "\n",
+    "print('\\npython multiply:')\n",
+    "py_multiply(a, b)\n",
+    "\n",
+    "a = np.linspace(0, 10, num=1000)\n",
+    "b = np.ones(1000)\n",
+    "\n",
+    "print('\\nulab add:')\n",
+    "ulab_add(a, b)\n",
+    "\n",
+    "print('\\nulab multiply:')\n",
+    "ulab_multiply(a, b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The python implementation above is not perfect, and certainly, there is much room for improvement. However, the factor of 50 difference in execution time is very spectacular. This is nothing but a consequence of the fact that the `ulab` functions run `C` code, with very little python overhead. The factor of 50 appears to be quite universal: the FFT routine obeys similar scaling (see [Speed of FFTs](#Speed-of-FFTs)), and this number came up with font rendering, too: [fast font rendering on graphical displays](https://forum.micropython.org/viewtopic.php?f=15&t=5815&p=33362&hilit=ufont#p33383)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Comparison operators\n",
+    "\n",
+    "The smaller than, greater than, smaller or equal, and greater or equal operators return a vector of Booleans indicating the positions (`True`), where the condition is satisfied. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T15:08:38.673585Z",
+     "start_time": "2020-10-17T15:08:38.659225Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([True, True, True, True, False, False, False, False], dtype=bool)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.uint8)\n",
+    "print(a < 5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**WARNING**: at the moment, due to `micropython`'s implementation details, the `ndarray` must be on the left hand side of the relational operators.\n",
+    "\n",
+    "That is, while `a < 5` and `5 > a` have the same meaning, the following code will not work:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dev/shm/micropython.py\", line 5, in <module>\n",
+      "TypeError: unsupported types for __gt__: 'int', 'ndarray'\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=np.uint8)\n",
+    "print(5 > a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Iterating over arrays\n",
+    "\n",
+    "`ndarray`s are iterable, which means that their elements can also be accessed as can the elements of a list, tuple, etc. If the array is one-dimensional, the iterator returns scalars, otherwise a new reduced-dimensional *view* is created and returned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-01-13T06:14:11.756254Z",
+     "start_time": "2021-01-13T06:14:11.742246Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([1, 2, 3, 4, 5], dtype=uint8)\n",
+      "element 0 in a: 1\n",
+      "element 1 in a: 2\n",
+      "element 2 in a: 3\n",
+      "element 3 in a: 4\n",
+      "element 4 in a: 5\n",
+      "\n",
+      "b:\t array([[0, 1, 2, 3, 4],\n",
+      "       [10, 11, 12, 13, 14],\n",
+      "       [20, 21, 22, 23, 24],\n",
+      "       [30, 31, 32, 33, 34]], dtype=uint8)\n",
+      "element 0 in b: array([0, 1, 2, 3, 4], dtype=uint8)\n",
+      "element 1 in b: array([10, 11, 12, 13, 14], dtype=uint8)\n",
+      "element 2 in b: array([20, 21, 22, 23, 24], dtype=uint8)\n",
+      "element 3 in b: array([30, 31, 32, 33, 34], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)\n",
+    "b = np.array([range(5), range(10, 15, 1), range(20, 25, 1), range(30, 35, 1)], dtype=np.uint8)\n",
+    "\n",
+    "print(\"a:\\t\", a)\n",
+    "\n",
+    "for i, _a in enumerate(a):\n",
+    "    print(\"element %d in a:\"%i, _a)\n",
+    "    \n",
+    "print(\"\\nb:\\t\", b)\n",
+    "\n",
+    "for i, _b in enumerate(b):\n",
+    "    print(\"element %d in b:\"%i, _b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Slicing and indexing\n",
+    "\n",
+    "\n",
+    "## Views vs. copies\n",
+    "\n",
+    "`numpy` has a very important concept called *views*, which is a powerful extension of `python`'s own notion of slicing. Slices are special python objects of the form\n",
+    "\n",
+    "```python\n",
+    "slice = start:end:stop\n",
+    "```\n",
+    "\n",
+    "where `start`, `end`, and `stop` are (not necessarily non-negative) integers. Not all of these three numbers must be specified in an index, in fact, all three of them can be missing. The interpreter takes care of filling in the missing values. (Note that slices cannot be defined in this way, only there, where an index is expected.) For a good explanation on how slices work in python, you can read the stackoverflow question https://stackoverflow.com/questions/509211/understanding-slice-notation.\n",
+    "\n",
+    "In order to see what slicing does, let us take the string `a = '012345679'`! We can extract every second character by creating the slice `::2`, which is equivalent to `0:len(a):2`, i.e., increments the character pointer by 2 starting from 0, and traversing the string up to the very end."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T05:26:17.758735Z",
+     "start_time": "2020-10-12T05:26:17.748487Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'02468'"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "string = '0123456789'\n",
+    "string[::2]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we can do the same with numerical arrays."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T05:25:49.352435Z",
+     "start_time": "2020-10-12T05:25:49.339452Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)\n",
+      "a[::2]:\t array([0, 2, 4, 6, 8], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(10), dtype=np.uint8)\n",
+    "print('a:\\t', a)\n",
+    "\n",
+    "print('a[::2]:\\t', a[::2])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This looks similar to `string` above, but there is a very important difference that is not so obvious. Namely, `string[::2]` produces a partial copy of `string`, while `a[::2]` only produces a *view* of `a`. What this means is that `a`, and `a[::2]` share their data, and the only difference between the two is, how the data are read out. In other words, internally, `a[::2]` has the same data pointer as `a`. We can easily convince ourselves that this is indeed the case by calling the [ndinfo](#The_ndinfo_function) function: the *data pointer* entry is the same in the two printouts."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-16T18:43:07.480791Z",
+     "start_time": "2020-10-16T18:43:07.471473Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8) \n",
+      "\n",
+      "class: ndarray\n",
+      "shape: (10,)\n",
+      "strides: (1,)\n",
+      "itemsize: 1\n",
+      "data pointer: 0x7ff6c6193220\n",
+      "type: uint8\n",
+      "\n",
+      "====================\n",
+      "a[::2]:  array([0, 2, 4, 6, 8], dtype=uint8) \n",
+      "\n",
+      "class: ndarray\n",
+      "shape: (5,)\n",
+      "strides: (2,)\n",
+      "itemsize: 1\n",
+      "data pointer: 0x7ff6c6193220\n",
+      "type: uint8\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(10), dtype=np.uint8)\n",
+    "print('a: ', a, '\\n')\n",
+    "np.ndinfo(a)\n",
+    "print('\\n' + '='*20)\n",
+    "print('a[::2]: ', a[::2], '\\n')\n",
+    "np.ndinfo(a[::2])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you are still a bit confused about the meaning of *views*, the section [Slicing and assigning to slices](#Slicing-and-assigning-to-slices) should clarify the issue."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Indexing\n",
+    "\n",
+    "The simplest form of indexing is specifying a single integer between the square brackets as in "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T18:31:45.485584Z",
+     "start_time": "2020-10-12T18:31:45.464551Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)\n",
+      "the first, and last element of a:\n",
+      " 0 9\n",
+      "the second, and last but one element of a:\n",
+      " 1 8\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(10), dtype=np.uint8)\n",
+    "print(\"a: \", a)\n",
+    "print(\"the first, and last element of a:\\n\", a[0], a[-1])\n",
+    "print(\"the second, and last but one element of a:\\n\", a[1], a[-2])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Indexing can be applied to higher-dimensional tensors, too. When the length of the indexing sequences is smaller than the number of dimensions, a new *view* is returned, otherwise, we get a single number."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T18:26:12.783883Z",
+     "start_time": "2020-10-12T18:26:12.770180Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0, 1, 2],\n",
+      "\t[3, 4, 5],\n",
+      "\t[6, 7, 8]], dtype=uint8)\n",
+      "a[0]:\n",
+      " array([[0, 1, 2]], dtype=uint8)\n",
+      "a[1,1]:  4\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8).reshape((3, 3))\n",
+    "print(\"a:\\n\", a)\n",
+    "print(\"a[0]:\\n\", a[0])\n",
+    "print(\"a[1,1]: \", a[1,1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Indices can also be a list of Booleans. By using a Boolean list, we can select those elements of an array that satisfy a specific condition. At the moment, such indexing is defined for row vectors only; when the rank of the tensor is higher than 1, the function raises a `NotImplementedError` exception, though this will be rectified in a future version of `ulab`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T17:34:34.105614Z",
+     "start_time": "2020-10-12T17:34:34.094017Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float)\n",
+      "a[a < 5]:\t array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.float)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"a[a < 5]:\\t\", a[a < 5])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Indexing with Boolean arrays can take more complicated expressions. This is a very concise way of comparing two vectors, e.g.:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T18:03:38.846377Z",
+     "start_time": "2020-10-12T18:03:38.826689Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "a**2:\t array([0, 1, 4, 9, 16, 25, 36, 49, 64], dtype=uint16)\n",
+      "\n",
+      "b:\t array([4, 4, 4, 3, 3, 3, 13, 13, 13], dtype=uint8)\n",
+      "\n",
+      "100*sin(b):\t array([-75.68024953079282, -75.68024953079282, -75.68024953079282, 14.11200080598672, 14.11200080598672, 14.11200080598672, 42.01670368266409, 42.01670368266409, 42.01670368266409], dtype=float)\n",
+      "\n",
+      "a[a*a > np.sin(b)*100.0]:\t array([0, 1, 2, 4, 5, 7, 8], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "b = np.array([4, 4, 4, 3, 3, 3, 13, 13, 13], dtype=np.uint8)\n",
+    "print(\"a:\\t\", a)\n",
+    "print(\"\\na**2:\\t\", a*a)\n",
+    "print(\"\\nb:\\t\", b)\n",
+    "print(\"\\n100*sin(b):\\t\", np.sin(b)*100.0)\n",
+    "print(\"\\na[a*a > np.sin(b)*100.0]:\\t\", a[a*a > np.sin(b)*100.0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Boolean indices can also be used in assignments, if the array is one-dimensional. The following example replaces the data in an array, wherever some condition is fulfilled."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-13T16:14:21.055356Z",
+     "start_time": "2020-10-13T16:14:21.035329Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0, 1, 2], dtype=uint8)\n",
+      "array([123, 123, 123, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "b = np.array(range(9)) + 12\n",
+    "\n",
+    "print(a[b < 15])\n",
+    "\n",
+    "a[b < 15] = 123\n",
+    "print(a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "On the right hand side of the assignment we can even have another array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-13T16:14:10.054210Z",
+     "start_time": "2020-10-13T16:14:10.039523Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0, 1, 2], dtype=uint8) array([12.0, 13.0, 14.0], dtype=float)\n",
+      "array([12, 13, 14, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "b = np.array(range(9)) + 12\n",
+    "\n",
+    "print(a[b < 15], b[b < 15])\n",
+    "\n",
+    "a[b < 15] = b[b < 15]\n",
+    "print(a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Slicing and assigning to slices\n",
+    "\n",
+    "You can also generate sub-arrays by specifying slices as the index of an array. Slices are special python objects of the form "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T17:38:15.975404Z",
+     "start_time": "2020-10-12T17:38:15.955070Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[1, 2, 3],\n",
+      "\t[4, 5, 6],\n",
+      "\t[7, 8, 9]], dtype=uint8)\n",
+      "\n",
+      "a[0]:\n",
+      " array([[1, 2, 3]], dtype=uint8)\n",
+      "\n",
+      "a[0,:2]:\n",
+      " array([[1, 2]], dtype=uint8)\n",
+      "\n",
+      "a[:,0]:\n",
+      " array([[1],\n",
+      "\t[4],\n",
+      "\t[7]], dtype=uint8)\n",
+      "\n",
+      "a[-1]:\n",
+      " array([[7, 8, 9]], dtype=uint8)\n",
+      "\n",
+      "a[-1:-3:-1]:\n",
+      " array([[7, 8, 9],\n",
+      "\t[4, 5, 6]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)\n",
+    "print('a:\\n', a)\n",
+    "\n",
+    "# the first row\n",
+    "print('\\na[0]:\\n', a[0])\n",
+    "\n",
+    "# the first two elements of the first row\n",
+    "print('\\na[0,:2]:\\n', a[0,:2])\n",
+    "\n",
+    "# the zeroth element in each row (also known as the zeroth column)\n",
+    "print('\\na[:,0]:\\n', a[:,0])\n",
+    "\n",
+    "# the last row\n",
+    "print('\\na[-1]:\\n', a[-1])\n",
+    "\n",
+    "# the last two rows backwards\n",
+    "print('\\na[-1:-3:-1]:\\n', a[-1:-3:-1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Assignment to slices can be done for the whole slice, per row, and per column. A couple of examples should make these statements clearer:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T17:40:24.031254Z",
+     "start_time": "2020-10-12T17:40:24.011816Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0, 0, 0],\n",
+      "\t[0, 0, 0],\n",
+      "\t[0, 0, 0]], dtype=uint8)\n",
+      "\n",
+      "a[0] = 1\n",
+      " array([[1, 1, 1],\n",
+      "\t[0, 0, 0],\n",
+      "\t[0, 0, 0]], dtype=uint8)\n",
+      "\n",
+      "a[:,0]:\n",
+      " array([[0, 0, 3],\n",
+      "\t[0, 0, 3],\n",
+      "\t[0, 0, 3]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.zeros((3, 3), dtype=np.uint8)\n",
+    "print('a:\\n', a)\n",
+    "\n",
+    "# assigning to the whole row\n",
+    "a[0] = 1\n",
+    "print('\\na[0] = 1\\n', a)\n",
+    "\n",
+    "a = np.zeros((3, 3), dtype=np.uint8)\n",
+    "\n",
+    "# assigning to a column\n",
+    "a[:,2] = 3.0\n",
+    "print('\\na[:,0]:\\n', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, you should notice that we re-set the array `a` after the first assignment. Do you care to see what happens, if we do not do that? Well, here are the results:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-12T17:44:09.180623Z",
+     "start_time": "2020-10-12T17:44:09.161578Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  array([[1, 1, 3],\n",
+      "\t[0, 0, 3],\n",
+      "\t[0, 0, 3]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.zeros((3, 3), dtype=np.uint8)\n",
+    "b = a[:,:]\n",
+    "# assign 1 to the first row\n",
+    "b[0] = 1\n",
+    "\n",
+    "# assigning to the last column\n",
+    "b[:,2] = 3\n",
+    "print('a: ', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that both assignments involved `b`, and not `a`, yet, when we print out `a`, its entries are updated. This proves our earlier statement about the behaviour of *views*: in the statement `b = a[:,:]` we simply created a *view* of `a`, and not a *deep* copy of it, meaning that whenever we modify `b`, we actually modify `a`, because the underlying data container of `a` and `b` are shared between the two object. Having a single data container for two seemingly different objects provides an extremely powerful way of manipulating sub-sets of numerical data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you want to work on a *copy* of your data, you can use the `.copy` method of the `ndarray`. The following snippet should drive the point home:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T13:06:20.223171Z",
+     "start_time": "2020-10-17T13:06:20.206422Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "class: ndarray\n",
+      "shape: (3, 3)\n",
+      "strides: (3, 1)\n",
+      "itemsize: 1\n",
+      "data pointer: 0x7ff737ea3220\n",
+      "type: uint8\n",
+      "\n",
+      "class: ndarray\n",
+      "shape: (3, 3)\n",
+      "strides: (3, 1)\n",
+      "itemsize: 1\n",
+      "data pointer: 0x7ff737ea3340\n",
+      "type: uint8\n",
+      "\n",
+      "a:  array([[0, 0, 0],\n",
+      "\t[0, 0, 0],\n",
+      "\t[0, 0, 0]], dtype=uint8)\n",
+      "====================\n",
+      "b:  array([[1, 1, 1],\n",
+      "\t[0, 0, 0],\n",
+      "\t[0, 0, 0]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.zeros((3, 3), dtype=np.uint8)\n",
+    "b = a.copy()\n",
+    "\n",
+    "# get the address of the underlying data pointer\n",
+    "\n",
+    "np.ndinfo(a)\n",
+    "print()\n",
+    "np.ndinfo(b)\n",
+    "\n",
+    "# assign 1 to the first row of b, and do not touch a\n",
+    "b[0] = 1\n",
+    "\n",
+    "print()\n",
+    "print('a: ', a)\n",
+    "print('='*20)\n",
+    "print('b: ', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `.copy` method can also be applied to views: below, `a[0]` is a *view* of `a`, out of which we create a *deep copy* called `b`. This is a row vector now. We can then do whatever we want to with `b`, and that leaves `a` unchanged."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T13:00:06.217232Z",
+     "start_time": "2020-10-17T13:00:06.207417Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "b:  array([0, 0, 0], dtype=uint8)\n",
+      "====================\n",
+      "a:  array([[0, 0, 0],\n",
+      "\t[0, 0, 0],\n",
+      "\t[0, 0, 0]], dtype=uint8)\n",
+      "====================\n",
+      "b:  array([1, 0, 0], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.zeros((3, 3), dtype=np.uint8)\n",
+    "b = a[0].copy()\n",
+    "print('b: ', b)\n",
+    "print('='*20)\n",
+    "# assign 1 to the first entry of b, and do not touch a\n",
+    "b[0] = 1\n",
+    "print('a: ', a)\n",
+    "print('='*20)\n",
+    "print('b: ', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The fact that the underlying data of a view is the same as that of the original array has another important consequence, namely, that the creation of a view is cheap. Both in terms of RAM, and execution time. A view is really nothing but a short header with a data array that already exists, and is filled up. Hence, creating the view requires only the creation of its header. This operation is fast, and uses virtually no RAM."
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "ce9a02f9f7db620716422019cafa4bc1786ca85daa298b819f6da075e7993842"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-numerical.ipynb b/tulip/shared/ulab/docs/ulab-numerical.ipynb
new file mode 100644
index 000000000..be549545d
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-numerical.ipynb
@@ -0,0 +1,1160 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-03T19:50:50.150235Z",
+     "start_time": "2020-11-03T19:50:48.888079Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:16:29.118001Z",
+     "start_time": "2022-01-07T19:16:29.114692Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-07T19:16:37.453883Z",
+     "start_time": "2022-01-07T19:16:37.422478Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Numerical\n",
+    "\n",
+    "Function in this section can be used for calculating statistical properties, or manipulating the arrangement of array elements."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## min, argmin, max, argmax\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.min.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.max.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argmax.html\n",
+    "\n",
+    "**WARNING:** Difference to `numpy`: the `out` keyword argument is not implemented.\n",
+    "\n",
+    "These functions follow the same pattern, and work with generic iterables, and `ndarray`s. `min`, and `max` return the minimum or maximum of a sequence. If the input array is two-dimensional, the `axis` keyword argument can be supplied, in which case the minimum/maximum along the given axis will be returned. If `axis=None` (this is also the default value), the minimum/maximum of the flattened array will be determined.\n",
+    "\n",
+    "`argmin/argmax` return the position (index) of the minimum/maximum in the sequence."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T21:26:22.507996Z",
+     "start_time": "2020-10-17T21:26:22.492543Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([1.0, 2.0, 3.0], dtype=float)\n",
+      "array([], dtype=float)\n",
+      "[] 0\n",
+      "array([1.0, 2.0, 3.0], dtype=float)\n",
+      "array([], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "\n",
+    "a = np.array([1, 2, 3])\n",
+    "print(a)\n",
+    "print(a[-1:-1:-3])\n",
+    "try:\n",
+    "    sa = list(a[-1:-1:-3])\n",
+    "    la = len(sa)\n",
+    "except IndexError as e:\n",
+    "    sa = str(e)\n",
+    "    la = -1\n",
+    "    \n",
+    "print(sa, la)\n",
+    "\n",
+    "a[-1:-1:-3] = np.ones(0)\n",
+    "print(a)\n",
+    "\n",
+    "b = np.ones(0) + 1\n",
+    "print(b)\n",
+    "# print('b', b.shape())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T21:54:49.123748Z",
+     "start_time": "2020-10-17T21:54:49.093819Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0, 1, -3array([], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "a = np.array([1, 2, 3])\n",
+    "print(a[0:1:-3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T21:57:01.482277Z",
+     "start_time": "2020-10-17T21:57:01.477362Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0]"
+      ]
+     },
+     "execution_count": 127,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "l = list(range(13))\n",
+    "\n",
+    "l[0:10:113]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-17T20:59:58.285134Z",
+     "start_time": "2020-10-17T20:59:58.263605Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0,)"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = np.array([1, 2, 3])\n",
+    "np.ones(0, dtype=uint8) / np.zeros(0, dtype=uint16)\n",
+    "np.ones(0).shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 375,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-18T13:08:28.113525Z",
+     "start_time": "2019-10-18T13:08:28.093518Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: array([1.0, 2.0, 0.0, 1.0, 10.0], dtype=float)\n",
+      "min of a: 0.0\n",
+      "argmin of a: 2\n",
+      "\n",
+      "b:\n",
+      " array([[1.0, 2.0, 0.0],\n",
+      "\t [1.0, 10.0, -1.0]], dtype=float)\n",
+      "min of b (flattened): -1.0\n",
+      "min of b (axis=0): array([1.0, 2.0, -1.0], dtype=float)\n",
+      "min of b (axis=1): array([0.0, -1.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([1, 2, 0, 1, 10])\n",
+    "print('a:', a)\n",
+    "print('min of a:', numerical.min(a))\n",
+    "print('argmin of a:', numerical.argmin(a))\n",
+    "\n",
+    "b = np.array([[1, 2, 0], [1, 10, -1]])\n",
+    "print('\\nb:\\n', b)\n",
+    "print('min of b (flattened):', numerical.min(b))\n",
+    "print('min of b (axis=0):', numerical.min(b, axis=0))\n",
+    "print('min of b (axis=1):', numerical.min(b, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sum, std, mean\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.mean.html\n",
+    "\n",
+    "These three functions follow the same pattern: if the axis keyword is not specified, it assumes the default value of `None`, and returns the result of the computation for the flattened array. Otherwise, the calculation is along the given axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 527,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-20T06:51:58.845076Z",
+     "start_time": "2019-10-20T06:51:58.798730Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \n",
+      " array([[1.0, 2.0, 3.0],\n",
+      "\t [4.0, 5.0, 6.0],\n",
+      "\t [7.0, 8.0, 9.0]], dtype=float)\n",
+      "sum, flat array:  45.0\n",
+      "mean, horizontal:  array([2.0, 5.0, 8.0], dtype=float)\n",
+      "std, vertical:  array([2.44949, 2.44949, 2.44949], dtype=float)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
+    "print('a: \\n', a)\n",
+    "\n",
+    "print('sum, flat array: ', numerical.sum(a))\n",
+    "\n",
+    "print('mean, horizontal: ', numerical.mean(a, axis=1))\n",
+    "\n",
+    "print('std, vertical: ', numerical.std(a, axis=0))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## roll\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.roll.html\n",
+    "\n",
+    "The roll function shifts the content of a vector by the positions given as the second argument. If the `axis` keyword is supplied, the shift is applied to the given axis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 229,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-11T19:39:47.459395Z",
+     "start_time": "2019-10-11T19:39:47.443691Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\t\t\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float)\n",
+      "a rolled to the left:\t array([3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0], dtype=float)\n",
+      "a rolled to the right:\t array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5, 6, 7, 8])\n",
+    "print(\"a:\\t\\t\\t\", a)\n",
+    "\n",
+    "numerical.roll(a, 2)\n",
+    "print(\"a rolled to the left:\\t\", a)\n",
+    "\n",
+    "# this should be the original vector\n",
+    "numerical.roll(a, -2)\n",
+    "print(\"a rolled to the right:\\t\", a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Rolling works with matrices, too. If the `axis` keyword is 0, the matrix is rolled along its vertical axis, otherwise, horizontally. \n",
+    "\n",
+    "Horizontal rolls are faster, because they require fewer steps, and larger memory chunks are copied, however, they also require more RAM: basically the whole row must be stored internally. Most expensive are the `None` keyword values, because with `axis = None`, the array is flattened first, hence the row's length is the size of the whole matrix.\n",
+    "\n",
+    "Vertical rolls require two internal copies of single columns. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 268,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-15T17:46:20.051069Z",
+     "start_time": "2019-10-15T17:46:20.033205Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[1.0, 2.0, 3.0, 4.0],\n",
+      "\t [5.0, 6.0, 7.0, 8.0]], dtype=float)\n",
+      "\n",
+      "a rolled to the left:\n",
+      " array([[3.0, 4.0, 5.0, 6.0],\n",
+      "\t [7.0, 8.0, 1.0, 2.0]], dtype=float)\n",
+      "\n",
+      "a rolled up:\n",
+      " array([[6.0, 3.0, 4.0, 5.0],\n",
+      "\t [2.0, 7.0, 8.0, 1.0]], dtype=float)\n",
+      "\n",
+      "a rolled with None:\n",
+      " array([[3.0, 4.0, 5.0, 2.0],\n",
+      "\t [7.0, 8.0, 1.0, 6.0]], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])\n",
+    "print(\"a:\\n\", a)\n",
+    "\n",
+    "numerical.roll(a, 2)\n",
+    "print(\"\\na rolled to the left:\\n\", a)\n",
+    "\n",
+    "numerical.roll(a, -1, axis=1)\n",
+    "print(\"\\na rolled up:\\n\", a)\n",
+    "\n",
+    "numerical.roll(a, 1, axis=None)\n",
+    "print(\"\\na rolled with None:\\n\", a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Simple running weighted average\n",
+    "\n",
+    "As a demonstration of the conciseness of `ulab/numpy` operations, we will calculate an exponentially weighted running average of a measurement vector in just a couple of lines. I chose this particular example, because I think that this can indeed be used in real-life applications."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 230,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-11T20:03:00.713235Z",
+     "start_time": "2019-10-11T20:03:00.696932Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([0.01165623031556606, 0.03168492019176483, 0.08612854033708572, 0.234121635556221, 0.6364086270332336], dtype=float)\n",
+      "0.2545634508132935\n",
+      "array([0.0, 0.0, 0.0, 0.0, 2.0], dtype=float)\n",
+      "0.3482121050357819\n",
+      "array([0.0, 0.0, 0.0, 2.0, 2.0], dtype=float)\n",
+      "0.3826635211706161\n",
+      "array([0.0, 0.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3953374892473221\n",
+      "array([0.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3999999813735485\n",
+      "array([2.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3999999813735485\n",
+      "array([2.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3999999813735485\n",
+      "array([2.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3999999813735485\n",
+      "array([2.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3999999813735485\n",
+      "array([2.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "0.3999999813735485\n",
+      "array([2.0, 2.0, 2.0, 2.0, 2.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "from ulab import vector\n",
+    "\n",
+    "def dummy_adc():\n",
+    "    # dummy adc function, so that the results are reproducible\n",
+    "    return 2\n",
+    "    \n",
+    "n = 10\n",
+    "# These are the normalised weights; the last entry is the most dominant\n",
+    "weight = vector.exp([1, 2, 3, 4, 5])\n",
+    "weight = weight/numerical.sum(weight)\n",
+    "\n",
+    "print(weight)\n",
+    "# initial array of samples\n",
+    "samples = np.array([0]*n)\n",
+    "\n",
+    "for i in range(n):\n",
+    "    # a new datum is inserted on the right hand side. This simply overwrites whatever was in the last slot\n",
+    "    samples[-1] = dummy_adc()\n",
+    "    print(numerical.mean(samples[-5:]*weight))\n",
+    "    print(samples[-5:])\n",
+    "    # the data are shifted by one position to the left\n",
+    "    numerical.roll(samples, 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## flip\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.flip.html\n",
+    "\n",
+    "The `flip` function takes one positional, an `ndarray`, and one keyword argument, `axis = None`, and reverses the order of elements along the given axis. If the keyword argument is `None`, the matrix' entries are flipped along all axes. `flip` returns a new copy of the array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 275,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-16T06:35:52.163725Z",
+     "start_time": "2019-10-16T06:35:52.149231Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a: \t array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=float)\n",
+      "a flipped:\t array([5.0, 4.0, 3.0, 2.0, 1.0], dtype=float)\n",
+      "\n",
+      "a flipped horizontally\n",
+      " array([[3, 2, 1],\n",
+      "\t [6, 5, 4],\n",
+      "\t [9, 8, 7]], dtype=uint8)\n",
+      "\n",
+      "a flipped vertically\n",
+      " array([[7, 8, 9],\n",
+      "\t [4, 5, 6],\n",
+      "\t [1, 2, 3]], dtype=uint8)\n",
+      "\n",
+      "a flipped horizontally+vertically\n",
+      " array([[9, 8, 7],\n",
+      "\t [6, 5, 4],\n",
+      "\t [3, 2, 1]], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5])\n",
+    "print(\"a: \\t\", a)\n",
+    "print(\"a flipped:\\t\", np.flip(a))\n",
+    "\n",
+    "a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.uint8)\n",
+    "print(\"\\na flipped horizontally\\n\", numerical.flip(a, axis=1))\n",
+    "print(\"\\na flipped vertically\\n\", numerical.flip(a, axis=0))\n",
+    "print(\"\\na flipped horizontally+vertically\\n\", numerical.flip(a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## diff\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.diff.html\n",
+    "\n",
+    "The `diff` function returns the numerical derivative of the forward scheme, or more accurately, the differences of an `ndarray` along a given axis. The order of derivative can be stipulated with the `n` keyword argument, which should be between 0, and 9. Default is 1. If higher order derivatives are required, they can be gotten by repeated calls to the function. The `axis` keyword argument should  be -1 (last axis, in `ulab` equivalent to the second axis, and this also happens to be the default value), 0, or 1. \n",
+    "\n",
+    "Beyond the output array, the function requires only a couple of bytes of extra RAM for the differentiation stencil. (The stencil is an `int8` array, one byte longer than `n`. This also explains, why the highest order is 9: the coefficients of a ninth-order stencil all fit in signed bytes, while 10 would require `int16`.) Note that as usual in numerical differentiation (and also in `numpy`), the length of the respective axis will be reduced by `n` after the operation. If `n` is larger than, or equal to the length of the axis, an empty array will be returned.\n",
+    "\n",
+    "**WARNING**: the `diff` function does not implement the `prepend` and `append` keywords that can be found in `numpy`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 169,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-31T11:51:02.854338Z",
+     "start_time": "2019-10-31T11:51:02.838000Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=uint8)\n",
+      "\n",
+      "first derivative:\n",
+      " array([1, 1, 1, 1, 1, 1, 1, 1], dtype=uint8)\n",
+      "\n",
+      "second derivative:\n",
+      " array([0, 0, 0, 0, 0, 0, 0], dtype=uint8)\n",
+      "\n",
+      "c:\n",
+      " array([[1.0, 2.0, 3.0, 4.0],\n",
+      "\t [4.0, 3.0, 2.0, 1.0],\n",
+      "\t [1.0, 4.0, 9.0, 16.0],\n",
+      "\t [0.0, 0.0, 0.0, 0.0]], dtype=float)\n",
+      "\n",
+      "first derivative, first axis:\n",
+      " array([[3.0, 1.0, -1.0, -3.0],\n",
+      "\t [-3.0, 1.0, 7.0, 15.0],\n",
+      "\t [-1.0, -4.0, -9.0, -16.0]], dtype=float)\n",
+      "\n",
+      "first derivative, second axis:\n",
+      " array([[1.0, 1.0, 1.0],\n",
+      "\t [-1.0, -1.0, -1.0],\n",
+      "\t [3.0, 5.0, 7.0],\n",
+      "\t [0.0, 0.0, 0.0]], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array(range(9), dtype=np.uint8)\n",
+    "print('a:\\n', a)\n",
+    "\n",
+    "print('\\nfirst derivative:\\n', numerical.diff(a, n=1))\n",
+    "print('\\nsecond derivative:\\n', numerical.diff(a, n=2))\n",
+    "\n",
+    "c = np.array([[1, 2, 3, 4], [4, 3, 2, 1], [1, 4, 9, 16], [0, 0, 0, 0]])\n",
+    "print('\\nc:\\n', c)\n",
+    "print('\\nfirst derivative, first axis:\\n', numerical.diff(c, axis=0))\n",
+    "print('\\nfirst derivative, second axis:\\n', numerical.diff(c, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## median\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.median.html\n",
+    "\n",
+    "The function computes the median along the specified axis, and returns the median of the array elements. If the `axis` keyword argument is `None`, the arrays is flattened first. The `dtype` of the results is always float."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-03T19:54:38.047790Z",
+     "start_time": "2020-11-03T19:54:38.029264Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:\n",
+      " array([[0, 1, 2, 3],\n",
+      "       [4, 5, 6, 7],\n",
+      "       [8, 9, 10, 11]], dtype=int8)\n",
+      "\n",
+      "median of the flattened array:  5.5\n",
+      "\n",
+      "median along the vertical axis:  array([4.0, 5.0, 6.0, 7.0], dtype=float)\n",
+      "\n",
+      "median along the horizontal axis:  array([1.5, 5.5, 9.5], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "\n",
+    "a = np.array(range(12), dtype=np.int8).reshape((3, 4))\n",
+    "print('a:\\n', a)\n",
+    "print('\\nmedian of the flattened array: ', np.median(a))\n",
+    "print('\\nmedian along the vertical axis: ', np.median(a, axis=0))\n",
+    "print('\\nmedian along the horizontal axis: ', np.median(a, axis=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sort\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.sort.html\n",
+    "\n",
+    "The sort function takes an ndarray, and sorts its elements in ascending order along the specified axis using a heap sort algorithm. As opposed to the `.sort()` method discussed earlier, this function creates a copy of its input before sorting, and at the end, returns this copy. Sorting takes place in place, without auxiliary storage. The `axis` keyword argument takes on the possible values of -1 (the last axis, in `ulab` equivalent to the second axis, and this also happens to be the default value), 0, 1, or `None`. The first three cases are identical to those in [diff](#diff), while the last one flattens the array before sorting. \n",
+    "\n",
+    "If descending order is required, the result can simply be `flip`ped, see [flip](#flip).\n",
+    "\n",
+    "**WARNING:** `numpy` defines the `kind`, and `order` keyword arguments that are not implemented here. The function in `ulab` always uses heap sort, and since `ulab` does not have the concept of data fields, the `order` keyword argument would have no meaning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-11-05T16:06:27.536193Z",
+     "start_time": "2019-11-05T16:06:27.521792Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[1.0, 12.0, 3.0, 0.0],\n",
+      "\t [5.0, 3.0, 4.0, 1.0],\n",
+      "\t [9.0, 11.0, 1.0, 8.0],\n",
+      "\t [7.0, 10.0, 0.0, 1.0]], dtype=float)\n",
+      "\n",
+      "a sorted along vertical axis:\n",
+      " array([[1.0, 3.0, 0.0, 0.0],\n",
+      "\t [5.0, 10.0, 1.0, 1.0],\n",
+      "\t [7.0, 11.0, 3.0, 1.0],\n",
+      "\t [9.0, 12.0, 4.0, 8.0]], dtype=float)\n",
+      "\n",
+      "a sorted along horizontal axis:\n",
+      " array([[0.0, 1.0, 3.0, 12.0],\n",
+      "\t [1.0, 3.0, 4.0, 5.0],\n",
+      "\t [1.0, 8.0, 9.0, 11.0],\n",
+      "\t [0.0, 1.0, 7.0, 10.0]], dtype=float)\n",
+      "\n",
+      "flattened a sorted:\n",
+      " array([0.0, 0.0, 1.0, ..., 10.0, 11.0, 12.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.float)\n",
+    "print('\\na:\\n', a)\n",
+    "b = numerical.sort(a, axis=0)\n",
+    "print('\\na sorted along vertical axis:\\n', b)\n",
+    "\n",
+    "c = numerical.sort(a, axis=1)\n",
+    "print('\\na sorted along horizontal axis:\\n', c)\n",
+    "\n",
+    "c = numerical.sort(a, axis=None)\n",
+    "print('\\nflattened a sorted:\\n', c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Heap sort requires $\\sim N\\log N$ operations, and notably, the worst case costs only 20% more time than the average. In order to get an order-of-magnitude estimate, we will take the sine of 1000 uniformly spaced numbers between 0, and two pi, and sort them:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import vector\n",
+    "from ulab import numerical\n",
+    "\n",
+    "@timeit\n",
+    "def sort_time(array):\n",
+    "    return numerical.sort(array)\n",
+    "\n",
+    "b = vector.sin(np.linspace(0, 6.28, num=1000))\n",
+    "print('b: ', b)\n",
+    "sort_time(b)\n",
+    "print('\\nb sorted:\\n', b)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## argsort\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html\n",
+    "\n",
+    "Similarly to [sort](#sort), `argsort` takes a positional, and a keyword argument, and returns an unsigned short index array of type `ndarray` with the same dimensions as the input, or, if `axis=None`, as a row vector with length equal to the number of elements in the input (i.e., the flattened array). The indices in the output sort the input in ascending order. The routine in `argsort` is the same as in `sort`, therefore, the comments on computational expenses (time and RAM) also apply. In particular, since no copy of the original data is required, virtually no RAM beyond the output array is used. \n",
+    "\n",
+    "Since the underlying container of the output array is of type `uint16_t`, neither of the output dimensions should be larger than 65535. If that happens to be the case, the function will bail out with a `ValueError`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-11-06T06:28:45.719578Z",
+     "start_time": "2019-11-06T06:28:45.704072Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([[1.0, 12.0, 3.0, 0.0],\n",
+      "\t [5.0, 3.0, 4.0, 1.0],\n",
+      "\t [9.0, 11.0, 1.0, 8.0],\n",
+      "\t [7.0, 10.0, 0.0, 1.0]], dtype=float)\n",
+      "\n",
+      "a sorted along vertical axis:\n",
+      " array([[0, 1, 3, 0],\n",
+      "\t [1, 3, 2, 1],\n",
+      "\t [3, 2, 0, 3],\n",
+      "\t [2, 0, 1, 2]], dtype=uint16)\n",
+      "\n",
+      "a sorted along horizontal axis:\n",
+      " array([[3, 0, 2, 1],\n",
+      "\t [3, 1, 2, 0],\n",
+      "\t [2, 3, 0, 1],\n",
+      "\t [2, 3, 0, 1]], dtype=uint16)\n",
+      "\n",
+      "flattened a sorted:\n",
+      " array([3, 14, 0, ..., 13, 9, 1], dtype=uint16)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([[1, 12, 3, 0], [5, 3, 4, 1], [9, 11, 1, 8], [7, 10, 0, 1]], dtype=np.float)\n",
+    "print('\\na:\\n', a)\n",
+    "b = numerical.argsort(a, axis=0)\n",
+    "print('\\na sorted along vertical axis:\\n', b)\n",
+    "\n",
+    "c = numerical.argsort(a, axis=1)\n",
+    "print('\\na sorted along horizontal axis:\\n', c)\n",
+    "\n",
+    "c = numerical.argsort(a, axis=None)\n",
+    "print('\\nflattened a sorted:\\n', c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Since during the sorting, only the indices are shuffled, `argsort` does not modify the input array, as one can verify this by the following example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-11-06T16:04:31.653444Z",
+     "start_time": "2019-11-06T16:04:31.634995Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "a:\n",
+      " array([0, 5, 1, 3, 2, 4], dtype=uint8)\n",
+      "\n",
+      "sorting indices:\n",
+      " array([0, 2, 4, 3, 5, 1], dtype=uint16)\n",
+      "\n",
+      "the original array:\n",
+      " array([0, 5, 1, 3, 2, 4], dtype=uint8)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import numerical\n",
+    "\n",
+    "a = np.array([0, 5, 1, 3, 2, 4], dtype=np.uint8)\n",
+    "print('\\na:\\n', a)\n",
+    "b = numerical.argsort(a, axis=1)\n",
+    "print('\\nsorting indices:\\n', b)\n",
+    "print('\\nthe original array:\\n', a)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-poly.ipynb b/tulip/shared/ulab/docs/ulab-poly.ipynb
new file mode 100644
index 000000000..9cd7223e4
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-poly.ipynb
@@ -0,0 +1,454 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-01T09:27:13.438054Z",
+     "start_time": "2020-05-01T09:27:13.191491Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-08-03T18:32:45.342280Z",
+     "start_time": "2020-08-03T18:32:45.338442Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-07-23T20:31:25.296014Z",
+     "start_time": "2020-07-23T20:31:25.265937Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Polynomials\n",
+    "\n",
+    "Functions in the polynomial sub-module can be invoked by importing the module first."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## polyval\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyval.html\n",
+    "\n",
+    "`polyval` takes two arguments, both arrays or other iterables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 187,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-11-01T12:53:22.448303Z",
+     "start_time": "2019-11-01T12:53:22.435176Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "coefficients:  [1, 1, 1, 0]\n",
+      "independent values:  [0, 1, 2, 3, 4]\n",
+      "\n",
+      "values of p(x):  array([0.0, 3.0, 14.0, 39.0, 84.0], dtype=float)\n",
+      "\n",
+      "ndarray (a):  array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)\n",
+      "value of p(a):  array([0.0, 3.0, 14.0, 39.0, 84.0], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import poly\n",
+    "\n",
+    "p = [1, 1, 1, 0]\n",
+    "x = [0, 1, 2, 3, 4]\n",
+    "print('coefficients: ', p)\n",
+    "print('independent values: ', x)\n",
+    "print('\\nvalues of p(x): ', poly.polyval(p, x))\n",
+    "\n",
+    "# the same works with one-dimensional ndarrays\n",
+    "a = np.array(x)\n",
+    "print('\\nndarray (a): ', a)\n",
+    "print('value of p(a): ', poly.polyval(p, a))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## polyfit\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyfit.html\n",
+    "\n",
+    "polyfit takes two, or three arguments. The last one is the degree of the polynomial that will be fitted, the last but one is an array or iterable with the `y` (dependent) values, and the first one, an array or iterable with the `x` (independent) values, can be dropped. If that is the case, `x` will be generated in the function, assuming uniform sampling. \n",
+    "\n",
+    "If the length of `x`, and `y` are not the same, the function raises a `ValueError`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 189,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-11-01T12:54:08.326802Z",
+     "start_time": "2019-11-01T12:54:08.311182Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "independent values:\t array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], dtype=float)\n",
+      "dependent values:\t array([9.0, 4.0, 1.0, 0.0, 1.0, 4.0, 9.0], dtype=float)\n",
+      "fitted values:\t\t array([1.0, -6.0, 9.000000000000004], dtype=float)\n",
+      "\n",
+      "dependent values:\t array([9.0, 4.0, 1.0, 0.0, 1.0, 4.0, 9.0], dtype=float)\n",
+      "fitted values:\t\t array([1.0, -6.0, 9.000000000000004], dtype=float)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import poly\n",
+    "\n",
+    "x = np.array([0, 1, 2, 3, 4, 5, 6])\n",
+    "y = np.array([9, 4, 1, 0, 1, 4, 9])\n",
+    "print('independent values:\\t', x)\n",
+    "print('dependent values:\\t', y)\n",
+    "print('fitted values:\\t\\t', poly.polyfit(x, y, 2))\n",
+    "\n",
+    "# the same with missing x\n",
+    "print('\\ndependent values:\\t', y)\n",
+    "print('fitted values:\\t\\t', poly.polyfit(y, 2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Execution time\n",
+    "\n",
+    "`polyfit` is based on the inversion of a matrix (there is more on the background in  https://en.wikipedia.org/wiki/Polynomial_regression), and it requires the intermediate storage of `2*N*(deg+1)` floats, where `N` is the number of entries in the input array, and `deg` is the fit's degree. The additional computation costs of the matrix inversion discussed in [inv](#inv) also apply. The example from above needs around 150 microseconds to return:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 560,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-10-20T07:24:39.002243Z",
+     "start_time": "2019-10-20T07:24:38.978687Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "execution time:  153  us\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import ulab as np\n",
+    "from ulab import poly\n",
+    "\n",
+    "@timeit\n",
+    "def time_polyfit(x, y, n):\n",
+    "    return poly.polyfit(x, y, n)\n",
+    "\n",
+    "x = np.array([0, 1, 2, 3, 4, 5, 6])\n",
+    "y = np.array([9, 4, 1, 0, 1, 4, 9])\n",
+    "\n",
+    "time_polyfit(x, y, 2)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/docs/ulab-programming.ipynb b/tulip/shared/ulab/docs/ulab-programming.ipynb
new file mode 100644
index 000000000..9685991e1
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-programming.ipynb
@@ -0,0 +1,798 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-10-25T21:25:53.804315Z",
+     "start_time": "2020-10-25T21:25:43.765649Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Programming ulab\n",
+    "\n",
+    "Earlier we have seen, how `ulab`'s functions and methods can be accessed in `micropython`. This last section of the book explains, how these functions are implemented. By the end of this chapter, not only would you be able to extend `ulab`, and write your own `numpy`-compatible functions, but through a deeper understanding of the inner workings of the functions, you would also be able to see what the trade-offs are at the `python` level.\n",
+    "\n",
+    "\n",
+    "## Code organisation\n",
+    "\n",
+    "As mentioned earlier, the `python` functions are organised into sub-modules at the C level. The C sub-modules can be found in `./ulab/code/`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The `ndarray` object"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### General comments\n",
+    "\n",
+    "`ndarrays` are efficient containers of numerical data of the same type (i.e., signed/unsigned chars, signed/unsigned integers or `mp_float_t`s, which, depending on the platform, are either C `float`s, or C `double`s). Beyond storing the actual data in the void pointer `*array`, the type definition has eight additional members (on top of the `base` type). Namely, the `dtype`, which tells us, how the bytes are to be interpreted. Moreover, the `itemsize`, which stores the size of a single entry in the array, `boolean`, an unsigned integer, which determines, whether the arrays is to be treated as a set of Booleans, or as numerical data, `ndim`, the number of dimensions (`uint8_t`), `len`, the length of the array (the number of entries), the shape (`*size_t`), the strides (`*int32_t`). The length is simply the product of the numbers in `shape`.\n",
+    "\n",
+    "The type definition is as follows:\n",
+    "\n",
+    "```c\n",
+    "typedef struct _ndarray_obj_t {\n",
+    "    mp_obj_base_t base;\n",
+    "    uint8_t dtype;\n",
+    "    uint8_t itemsize;\n",
+    "    uint8_t boolean;\n",
+    "    uint8_t ndim;\n",
+    "    size_t len;\n",
+    "    size_t shape[ULAB_MAX_DIMS];\n",
+    "    int32_t strides[ULAB_MAX_DIMS];\n",
+    "    void *array;\n",
+    "} ndarray_obj_t;\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Memory layout\n",
+    "\n",
+    "The values of an `ndarray` are stored in a contiguous segment in the RAM. The `ndarray` can be dense, meaning that all numbers in the linear memory segment belong to a linar combination of coordinates, and it can also be sparse, i.e., some elements of the linear storage space will be skipped, when the elements of the tensor are traversed. \n",
+    "\n",
+    "In the RAM, the position of the item $M(n_1, n_2, ..., n_{k-1}, n_k)$ in a dense tensor of rank $k$ is given by the linear combination \n",
+    "\n",
+    "\\begin{equation}\n",
+    "P(n_1, n_2, ..., n_{k-1}, n_k) = n_1 s_1 + n_2 s_2 + ... + n_{k-1}s_{k-1} + n_ks_k = \\sum_{i=1}^{k}n_is_i\n",
+    "\\end{equation}\n",
+    "where $s_i$ are the strides of the tensor, defined as \n",
+    "\n",
+    "\\begin{equation}\n",
+    "s_i = \\prod_{j=i+1}^k l_j\n",
+    "\\end{equation}\n",
+    "\n",
+    "where $l_j$ is length of the tensor along the $j$th axis. When the tensor is sparse (e.g., when the tensor is sliced), the strides along a particular axis will be multiplied by a non-zero integer. If this integer is different to $\\pm 1$, the linear combination above cannot access all elements in the RAM, i.e., some numbers will be skipped. Note that $|s_1| > |s_2| > ... > |s_{k-1}| > |s_k|$, even if the tensor is sparse. The statement is trivial for dense tensors, and it follows from the definition of $s_i$. For sparse tensors, a slice cannot have a step larger than the shape along that axis. But for dense tensors, $s_i/s_{i+1} = l_i$. \n",
+    "\n",
+    "When creating a *view*, we simply re-calculate the `strides`, and re-set the `*array` pointer.\n",
+    "\n",
+    "## Iterating over elements of a tensor\n",
+    "\n",
+    "The `shape` and `strides` members of the array tell us how we have to move our pointer, when we want to read out the numbers. For technical reasons that will become clear later, the numbers in `shape` and in `strides` are aligned to the right, and begin on the right hand side, i.e., if the number of possible dimensions is `ULAB_MAX_DIMS`, then `shape[ULAB_MAX_DIMS-1]` is the length of the last axis, `shape[ULAB_MAX_DIMS-2]` is the length of the last but one axis, and so on. If the number of actual dimensions, `ndim < ULAB_MAX_DIMS`, the first `ULAB_MAX_DIMS - ndim` entries in `shape` and `strides` will be equal to zero, but they could, in fact, be assigned any value, because these will never be accessed in an operation.\n",
+    "\n",
+    "With this definition of the strides, the linear combination in $P(n_1, n_2, ..., n_{k-1}, n_k)$ is a one-to-one mapping from the space of tensor coordinates, $(n_1, n_2, ..., n_{k-1}, n_k)$, and the coordinate in the linear array, $n_1s_1 + n_2s_2 + ... + n_{k-1}s_{k-1} + n_ks_k$, i.e., no two distinct sets of coordinates will result in the same position in the linear array. \n",
+    "\n",
+    "Since the `strides` are given in terms of bytes, when we iterate over an array, the void data pointer is usually cast to `uint8_t`, and the values are converted using the proper data type stored in `ndarray->dtype`. However, there might be cases, when it makes perfect sense to cast `*array` to a different type, in which case the `strides` have to be re-scaled by the value of `ndarray->itemsize`.\n",
+    "\n",
+    "### Iterating using the unwrapped loops\n",
+    "\n",
+    "The following macro definition is taken from [vector.h](https://github.com/v923z/micropython-ulab/blob/master/code/numpy/vector/vector.h), and demonstrates, how we can iterate over a single array in four dimensions. \n",
+    "\n",
+    "```c\n",
+    "#define ITERATE_VECTOR(type, array, source, sarray) do {\n",
+    "    size_t i=0;\n",
+    "    do {\n",
+    "        size_t j = 0;\n",
+    "        do {\n",
+    "            size_t k = 0;\n",
+    "            do {\n",
+    "                size_t l = 0;\n",
+    "                do {\n",
+    "                    *(array)++ = f(*((type *)(sarray)));\n",
+    "                    (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\n",
+    "                    l++;\n",
+    "                } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\n",
+    "                (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1];\n",
+    "                (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\n",
+    "                k++;\n",
+    "            } while(k < (source)->shape[ULAB_MAX_DIMS-2]);\n",
+    "            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS-2];\n",
+    "            (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];\n",
+    "            j++;\n",
+    "        } while(j < (source)->shape[ULAB_MAX_DIMS-3]);\n",
+    "        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 3] * (source)->shape[ULAB_MAX_DIMS-3];\n",
+    "        (sarray) += (source)->strides[ULAB_MAX_DIMS - 4];\n",
+    "        i++;\n",
+    "    } while(i < (source)->shape[ULAB_MAX_DIMS-4]);\n",
+    "} while(0)\n",
+    "```\n",
+    "\n",
+    "We start with the innermost loop, the one recursing `l`. `array` is already of type `mp_float_t`, while the source array, `sarray`, has been cast to `uint8_t` in the calling function. The numbers contained in `sarray` have to be read out in the proper type dictated by `ndarray->dtype`. This is what happens in the statement `*((type *)(sarray))`, and this number is then fed into the function `f`. Vectorised mathematical functions produce *dense* arrays, and for this reason, we can simply advance the `array` pointer. \n",
+    "\n",
+    "The advancing of the `sarray` pointer is a bit more involving: first, in the innermost loop, we simply move forward by the amount given by the last stride, which is `(source)->strides[ULAB_MAX_DIMS - 1]`, because the `shape` and the `strides` are aligned to the right. We move the pointer as many times as given by `(source)->shape[ULAB_MAX_DIMS-1]`, which is the length of the very last axis. Hence the the structure of the loop\n",
+    "\n",
+    "```c\n",
+    "    size_t l = 0;\n",
+    "    do {\n",
+    "        ...\n",
+    "        l++;\n",
+    "    } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\n",
+    "\n",
+    "```\n",
+    "Once we have exhausted the last axis, we have to re-wind the pointer, and advance it by an amount given by the last but one stride. Keep in mind that in the the innermost loop we moved our pointer `(source)->shape[ULAB_MAX_DIMS-1]` times by `(source)->strides[ULAB_MAX_DIMS - 1]`, i.e., we re-wind it by moving it backwards by `(source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1]`. In the next step, we move forward by `(source)->strides[ULAB_MAX_DIMS - 2]`, which is the last but one stride. \n",
+    "\n",
+    "\n",
+    "```c\n",
+    "    (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1];\n",
+    "    (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\n",
+    "\n",
+    "```\n",
+    "\n",
+    "This pattern must be repeated for each axis of the array, and this is how we arrive at the four nested loops listed above."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Re-winding arrays by means of a function\n",
+    "\n",
+    "\n",
+    "In addition to un-wrapping the iteration loops by means of macros, there is another way of traversing all elements of a tensor: we note that, since $|s_1| > |s_2| > ... > |s_{k-1}| > |s_k|$, $P(n1, n2, ..., n_{k-1}, n_k)$ changes most slowly in the last coordinate. Hence, if we start from the very beginning, ($n_i = 0$ for all $i$), and walk along the linear RAM segment, we increment the value of $n_k$ as long as $n_k < l_k$. Once $n_k = l_k$, we have to reset $n_k$ to 0, and increment $n_{k-1}$ by one. After each such round, $n_{k-1}$ will be incremented by one, as long as $n_{k-1} < l_{k-1}$. Once $n_{k-1} = l_{k-1}$, we reset both $n_k$, and $n_{k-1}$ to 0, and increment $n_{k-2}$ by one. \n",
+    "\n",
+    "Rewinding the arrays in this way is implemented in the function `ndarray_rewind_array` in [ndarray.c](https://github.com/v923z/micropython-ulab/blob/master/code/ndarray.c). \n",
+    "\n",
+    "```c\n",
+    "void ndarray_rewind_array(uint8_t ndim, uint8_t *array, size_t *shape, int32_t *strides, size_t *coords) {\n",
+    "    // resets the data pointer of a single array, whenever an axis is full\n",
+    "    // since we always iterate over the very last axis, we have to keep track of\n",
+    "    // the last ndim-2 axes only\n",
+    "    array -= shape[ULAB_MAX_DIMS - 1] * strides[ULAB_MAX_DIMS - 1];\n",
+    "    array += strides[ULAB_MAX_DIMS - 2];\n",
+    "    for(uint8_t i=1; i < ndim-1; i++) {\n",
+    "        coords[ULAB_MAX_DIMS - 1 - i] += 1;\n",
+    "        if(coords[ULAB_MAX_DIMS - 1 - i] == shape[ULAB_MAX_DIMS - 1 - i]) { // we are at a dimension boundary\n",
+    "            array -= shape[ULAB_MAX_DIMS - 1 - i] * strides[ULAB_MAX_DIMS - 1 - i];\n",
+    "            array += strides[ULAB_MAX_DIMS - 2 - i];\n",
+    "            coords[ULAB_MAX_DIMS - 1 - i] = 0;\n",
+    "            coords[ULAB_MAX_DIMS - 2 - i] += 1;\n",
+    "        } else { // coordinates can change only, if the last coordinate changes\n",
+    "            return;\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "and the function would be called as in the snippet below. Note that the innermost loop is factored out, so that we can save the `if(...)` statement for the last axis.\n",
+    "\n",
+    "```c\n",
+    "    size_t *coords = ndarray_new_coords(results->ndim);\n",
+    "    for(size_t i=0; i < results->len/results->shape[ULAB_MAX_DIMS -1]; i++) {\n",
+    "        size_t l = 0;\n",
+    "        do {\n",
+    "            ...\n",
+    "            l++;\n",
+    "        } while(l < results->shape[ULAB_MAX_DIMS - 1]);\n",
+    "        ndarray_rewind_array(results->ndim, array, results->shape, strides, coords);\n",
+    "    } while(0)\n",
+    "\n",
+    "```\n",
+    "\n",
+    "The advantage of this method is that the implementation is independent of the number of dimensions: the iteration requires more or less the same flash space for 2 dimensions as for 22. However, the price we have to pay for this convenience is the extra function call."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Iterating over two ndarrays simultaneously: broadcasting\n",
+    "\n",
+    "Whenever we invoke a binary operator, call a function with two arguments of `ndarray` type, or assign something to an `ndarray`, we have to iterate over two views at the same time. The task is trivial, if the two `ndarray`s in question have the same shape (but not necessarily the same set of strides), because in this case, we can still iterate in the same loop. All that happens is that we move two data pointers in sync.\n",
+    "\n",
+    "The problem becomes a bit more involving, when the shapes of the two `ndarray`s are not identical. For such cases, `numpy` defines so-called broadcasting, which boils down to two rules. \n",
+    "\n",
+    "1. The shapes in the tensor with lower rank has to be prepended with axes of size 1 till the two ranks become equal.\n",
+    "2. Along all axes the two tensors should have the same size, or one of the sizes must be 1. \n",
+    "\n",
+    "If, after applying the first rule the second is not satisfied, the two `ndarray`s cannot be broadcast together. \n",
+    "\n",
+    "Now, let us suppose that we have two compatible `ndarray`s, i.e., after applying the first rule, the second is satisfied. How do we iterate over the elements in the tensors? \n",
+    "\n",
+    "We should recall, what exactly we do, when iterating over a single array: normally, we move the data pointer by the last stride, except, when we arrive at a dimension boundary (when the last axis is exhausted). At that point, we move the pointer by an amount dictated by the strides. And this is the key: *dictated by the strides*. Now, if we have two arrays that are originally not compatible, we define new strides for them, and use these in the iteration. With that, we are back to the case, where we had two compatible arrays. \n",
+    "\n",
+    "Now, let us look at the second broadcasting rule: if the two arrays have the same size, we take both `ndarray`s' strides along that axis. If, on the other hand, one of the `ndarray`s is of length 1 along one of its axes, we set the corresponding strides to 0. This will ensure that that data pointer is not moved, when we iterate over both `ndarray`s at the same time. \n",
+    "\n",
+    "Thus, in order to implement broadcasting, we first have to check, whether the two above-mentioned rules can be satisfied, and if so, we have to find the two new sets strides. \n",
+    "\n",
+    "The `ndarray_can_broadcast` function from [ndarray.c](https://github.com/v923z/micropython-ulab/blob/master/code/ndarray.c) takes two `ndarray`s, and returns `true`, if the two arrays can be broadcast together. At the same time, it also calculates new strides for the two arrays, so that they can be iterated over at the same time. \n",
+    "\n",
+    "```c\n",
+    "bool ndarray_can_broadcast(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t *ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {\n",
+    "    // returns True or False, depending on, whether the two arrays can be broadcast together\n",
+    "    // numpy's broadcasting rules are as follows:\n",
+    "    //\n",
+    "    // 1. the two shapes are either equal\n",
+    "    // 2. one of the shapes is 1\n",
+    "    memset(lstrides, 0, sizeof(size_t)*ULAB_MAX_DIMS);\n",
+    "    memset(rstrides, 0, sizeof(size_t)*ULAB_MAX_DIMS);\n",
+    "    lstrides[ULAB_MAX_DIMS - 1] = lhs->strides[ULAB_MAX_DIMS - 1];\n",
+    "    rstrides[ULAB_MAX_DIMS - 1] = rhs->strides[ULAB_MAX_DIMS - 1];\n",
+    "    for(uint8_t i=ULAB_MAX_DIMS; i > 0; i--) {\n",
+    "        if((lhs->shape[i-1] == rhs->shape[i-1]) || (lhs->shape[i-1] == 0) || (lhs->shape[i-1] == 1) ||\n",
+    "        (rhs->shape[i-1] == 0) || (rhs->shape[i-1] == 1)) {\n",
+    "            shape[i-1] = MAX(lhs->shape[i-1], rhs->shape[i-1]);\n",
+    "            if(shape[i-1] > 0) (*ndim)++;\n",
+    "            if(lhs->shape[i-1] < 2) {\n",
+    "                lstrides[i-1] = 0;\n",
+    "            } else {\n",
+    "                lstrides[i-1] = lhs->strides[i-1];\n",
+    "            }\n",
+    "            if(rhs->shape[i-1] < 2) {\n",
+    "                rstrides[i-1] = 0;\n",
+    "            } else {\n",
+    "                rstrides[i-1] = rhs->strides[i-1];\n",
+    "            }\n",
+    "        } else {\n",
+    "            return false;\n",
+    "        }\n",
+    "    }\n",
+    "    return true;\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "A good example of how the function would be called can be found in [vector.c](https://github.com/v923z/micropython-ulab/blob/master/code/numpy/vector/vector.c), in the `vector_arctan2` function:\n",
+    "\n",
+    "```c\n",
+    "mp_obj_t vector_arctan2(mp_obj_t y, mp_obj_t x) {\n",
+    "    ...\n",
+    "    uint8_t ndim = 0;\n",
+    "    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);\n",
+    "    int32_t *xstrides = m_new(int32_t, ULAB_MAX_DIMS);\n",
+    "    int32_t *ystrides = m_new(int32_t, ULAB_MAX_DIMS);\n",
+    "    if(!ndarray_can_broadcast(ndarray_x, ndarray_y, &ndim, shape, xstrides, ystrides)) {\n",
+    "        mp_raise_ValueError(translate(\"operands could not be broadcast together\"));\n",
+    "        m_del(size_t, shape, ULAB_MAX_DIMS);\n",
+    "        m_del(int32_t, xstrides, ULAB_MAX_DIMS);\n",
+    "        m_del(int32_t, ystrides, ULAB_MAX_DIMS);\n",
+    "    }\n",
+    "\n",
+    "    uint8_t *xarray = (uint8_t *)ndarray_x->array;\n",
+    "    uint8_t *yarray = (uint8_t *)ndarray_y->array;\n",
+    "    \n",
+    "    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);\n",
+    "    mp_float_t *rarray = (mp_float_t *)results->array;\n",
+    "    ...\n",
+    "```\n",
+    "\n",
+    "After the new strides have been calculated, the iteration loop is identical to what we discussed in the previous section."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Contracting an `ndarray`\n",
+    "\n",
+    "\n",
+    "There are many operations that reduce the number of dimensions of an `ndarray` by 1, i.e., that remove an axis from the tensor. The drill is the same as before, with the exception that first we have to remove the `strides` and `shape` that corresponds to the axis along which we intend to contract. The `numerical_reduce_axes` function from [numerical.c](https://github.com/v923z/micropython-ulab/blob/master/code/numerical/numerical.c) does that. \n",
+    "\n",
+    "\n",
+    "```c\n",
+    "static void numerical_reduce_axes(ndarray_obj_t *ndarray, int8_t axis, size_t *shape, int32_t *strides) {\n",
+    "    // removes the values corresponding to a single axis from the shape and strides array\n",
+    "    uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + axis;\n",
+    "    if((ndarray->ndim == 1) && (axis == 0)) {\n",
+    "        index = 0;\n",
+    "        shape[ULAB_MAX_DIMS - 1] = 0;\n",
+    "        return;\n",
+    "    }\n",
+    "    for(uint8_t i = ULAB_MAX_DIMS - 1; i > 0; i--) {\n",
+    "        if(i > index) {\n",
+    "            shape[i] = ndarray->shape[i];\n",
+    "            strides[i] = ndarray->strides[i];\n",
+    "        } else {\n",
+    "            shape[i] = ndarray->shape[i-1];\n",
+    "            strides[i] = ndarray->strides[i-1];\n",
+    "        }\n",
+    "    }\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "Once the reduced `strides` and `shape` are known, we place the axis in question in the innermost loop, and wrap it with the loops, whose coordinates are in the `strides`, and `shape` arrays. The `RUN_STD` macro from [numerical.h](https://github.com/v923z/micropython-ulab/blob/master/code/numpy/numerical/numerical.h) is a good example. The macro is expanded in the `numerical_sum_mean_std_ndarray` function. \n",
+    "\n",
+    "\n",
+    "```c\n",
+    "static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t axis, uint8_t optype, size_t ddof) {\n",
+    "    uint8_t *array = (uint8_t *)ndarray->array;\n",
+    "    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);\n",
+    "    memset(shape, 0, sizeof(size_t)*ULAB_MAX_DIMS);\n",
+    "    int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);\n",
+    "    memset(strides, 0, sizeof(uint32_t)*ULAB_MAX_DIMS);\n",
+    "\n",
+    "    int8_t ax = mp_obj_get_int(axis);\n",
+    "    if(ax < 0) ax += ndarray->ndim;\n",
+    "    if((ax < 0) || (ax > ndarray->ndim - 1)) {\n",
+    "        mp_raise_ValueError(translate(\"index out of range\"));\n",
+    "    }\n",
+    "    numerical_reduce_axes(ndarray, ax, shape, strides);\n",
+    "    uint8_t index = ULAB_MAX_DIMS - ndarray->ndim + ax;\n",
+    "    ndarray_obj_t *results = NULL;\n",
+    "    uint8_t *rarray = NULL;\n",
+    "    ...\n",
+    "\n",
+    "```\n",
+    "Here is the macro for the three-dimensional case: \n",
+    "\n",
+    "```c\n",
+    "#define RUN_STD(ndarray, type, array, results, r, shape, strides, index, div) do {\n",
+    "    size_t k = 0;\n",
+    "    do {\n",
+    "        size_t l = 0;\n",
+    "        do {\n",
+    "            RUN_STD1((ndarray), type, (array), (results), (r), (index), (div));\n",
+    "            (array) -= (ndarray)->strides[(index)] * (ndarray)->shape[(index)];\n",
+    "            (array) += (strides)[ULAB_MAX_DIMS - 1];\n",
+    "            l++;\n",
+    "        } while(l < (shape)[ULAB_MAX_DIMS - 1]);\n",
+    "        (array) -= (strides)[ULAB_MAX_DIMS - 2] * (shape)[ULAB_MAX_DIMS-2];\n",
+    "        (array) += (strides)[ULAB_MAX_DIMS - 3];\n",
+    "        k++;\n",
+    "    } while(k < (shape)[ULAB_MAX_DIMS - 2]);\n",
+    "} while(0)\n",
+    "```\n",
+    "In `RUN_STD`, we simply move our pointers; the calculation itself happens in the `RUN_STD1` macro below. (Note that this is the implementation of the numerically stable Welford algorithm.)\n",
+    "\n",
+    "```c\n",
+    "#define RUN_STD1(ndarray, type, array, results, r, index, div)\n",
+    "({\n",
+    "    mp_float_t M, m, S = 0.0, s = 0.0;\n",
+    "    M = m = *(mp_float_t *)((type *)(array));\n",
+    "    for(size_t i=1; i < (ndarray)->shape[(index)]; i++) {\n",
+    "        (array) += (ndarray)->strides[(index)];\n",
+    "        mp_float_t value = *(mp_float_t *)((type *)(array));\n",
+    "        m = M + (value - M) / (mp_float_t)i;\n",
+    "        s = S + (value - M) * (value - m);\n",
+    "        M = m;\n",
+    "        S = s;\n",
+    "    }\n",
+    "    (array) += (ndarray)->strides[(index)];\n",
+    "    *(r)++ = MICROPY_FLOAT_C_FUN(sqrt)((ndarray)->shape[(index)] * s / (div));\n",
+    "})\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Upcasting\n",
+    "\n",
+    "When in an operation the `dtype`s of two arrays are different, the result's `dtype` will be decided by the following upcasting rules:\n",
+    "\n",
+    "1. Operations with two `ndarray`s of the same `dtype` preserve their `dtype`, even when the results overflow.\n",
+    "\n",
+    "2. if either of the operands is a float, the result automatically becomes a float\n",
+    "\n",
+    "3. otherwise\n",
+    "\n",
+    "    - `uint8` + `int8` => `int16`, \n",
+    "    - `uint8` + `int16` => `int16`\n",
+    "    - `uint8` + `uint16` => `uint16`\n",
+    "    \n",
+    "    - `int8` + `int16` => `int16`\n",
+    "    - `int8` + `uint16` => `uint16` (in numpy, the result is a `int32`)\n",
+    "\n",
+    "    - `uint16` + `int16` => `float` (in numpy, the result is a `int32`)\n",
+    "    \n",
+    "4. When one operand of a binary operation is a generic scalar `micropython` variable, i.e., `mp_obj_int`, or `mp_obj_float`, it will be converted to a linear array of length 1, and with the smallest `dtype` that can accommodate the variable in question. After that the broadcasting rules apply, as described in the section [Iterating over two ndarrays simultaneously: broadcasting](#Iterating_over_two_ndarrays_simultaneously:_broadcasting)\n",
+    "\n",
+    "Upcasting is resolved in place, wherever it is required. Notable examples can be found in [ndarray_operators.c](https://github.com/v923z/micropython-ulab/blob/master/code/ndarray_operators.c)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Slicing and indexing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An `ndarray` can be indexed with three types of objects: integer scalars, slices, and another `ndarray`, whose elements are either integer scalars, or Booleans. Since slice and integer indices can be thought of as modifications of the `strides`, these indices return a view of the `ndarray`. This statement does not hold for `ndarray` indices, and therefore, the return a copy of the array."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Extending ulab\n",
+    "\n",
+    "The `user` module is disabled by default, as can be seen from the last couple of lines of [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h)\n",
+    "\n",
+    "```c\n",
+    "// user-defined module\n",
+    "#ifndef ULAB_USER_MODULE\n",
+    "#define ULAB_USER_MODULE                (0)\n",
+    "#endif\n",
+    "```\n",
+    "\n",
+    "The module contains a very simple function, `user_dummy`, and this function is bound to the module itself. In other words, even if the module is enabled, one has to `import`:\n",
+    "\n",
+    "```python\n",
+    "\n",
+    "import ulab\n",
+    "from ulab import user\n",
+    "\n",
+    "user.dummy_function(2.5)\n",
+    "```\n",
+    "which should just return 5.0. Even if `numpy`-compatibility is required (i.e., if most functions are bound at the top level to `ulab` directly), having to `import` the module has a great advantage. Namely, only the [user.h](https://github.com/v923z/micropython-ulab/blob/master/code/user/user.h) and [user.c](https://github.com/v923z/micropython-ulab/blob/master/code/user/user.c) files have to be modified, thus it should be relatively straightforward to update your local copy from [github](https://github.com/v923z/micropython-ulab/blob/master/). \n",
+    "\n",
+    "Now, let us see, how we can add a more meaningful function. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating a new ndarray\n",
+    "\n",
+    "In the [General comments](#General_comments) sections we have seen the type definition of an `ndarray`. This structure can be generated by means of a couple of functions listed in [ndarray.c](https://github.com/v923z/micropython-ulab/blob/master/code/ndarray.c). \n",
+    "\n",
+    "\n",
+    "### ndarray_new_ndarray\n",
+    "\n",
+    "The `ndarray_new_ndarray` functions is called by all other array-generating functions. It takes the number of dimensions, `ndim`, a `uint8_t`, the `shape`, a pointer to `size_t`, the `strides`, a pointer to `int32_t`, and `dtype`, another `uint8_t` as its arguments, and returns a new array with all entries initialised to 0. \n",
+    "\n",
+    "Assuming that `ULAB_MAX_DIMS > 2`, a new dense array of dimension 3, of `shape` (3, 4, 5), of `strides` (1000, 200, 10), and `dtype` `uint16_t` can be generated by the following instructions\n",
+    "\n",
+    "```c\n",
+    "size_t *shape = m_new(size_t, ULAB_MAX_DIMS);\n",
+    "shape[ULAB_MAX_DIMS - 1] = 5;\n",
+    "shape[ULAB_MAX_DIMS - 2] = 4;\n",
+    "shape[ULAB_MAX_DIMS - 3] = 3;\n",
+    "\n",
+    "int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);\n",
+    "strides[ULAB_MAX_DIMS - 1] = 10;\n",
+    "strides[ULAB_MAX_DIMS - 2] = 200;\n",
+    "strides[ULAB_MAX_DIMS - 3] = 1000;\n",
+    "\n",
+    "ndarray_obj_t *new_ndarray = ndarray_new_ndarray(3, shape, strides, NDARRAY_UINT16);\n",
+    "```\n",
+    "\n",
+    "### ndarray_new_dense_ndarray\n",
+    "\n",
+    "The functions simply calculates the `strides` from the `shape`, and calls `ndarray_new_ndarray`. Assuming that `ULAB_MAX_DIMS > 2`, a new dense array of dimension 3, of `shape` (3, 4, 5), and `dtype` `mp_float_t` can be generated by the following instructions\n",
+    "\n",
+    "```c\n",
+    "size_t *shape = m_new(size_t, ULAB_MAX_DIMS);\n",
+    "shape[ULAB_MAX_DIMS - 1] = 5;\n",
+    "shape[ULAB_MAX_DIMS - 2] = 4;\n",
+    "shape[ULAB_MAX_DIMS - 3] = 3;\n",
+    "\n",
+    "ndarray_obj_t *new_ndarray = ndarray_new_dense_ndarray(3, shape, NDARRAY_FLOAT);\n",
+    "```\n",
+    "\n",
+    "### ndarray_new_linear_array\n",
+    "\n",
+    "Since the dimensions of a linear array are known (1), the `ndarray_new_linear_array` takes the `length`, a `size_t`, and the `dtype`, an `uint8_t`. Internally, `ndarray_new_linear_array` generates the `shape` array, and calls `ndarray_new_dense_array` with `ndim = 1`.\n",
+    "\n",
+    "A linear array of length 100, and `dtype` `uint8` could be created by the function call\n",
+    "\n",
+    "```c\n",
+    "ndarray_obj_t *new_ndarray = ndarray_new_linear_array(100, NDARRAY_UINT8)\n",
+    "```\n",
+    "\n",
+    "### ndarray_new_ndarray_from_tuple\n",
+    "\n",
+    "This function takes a `tuple`, which should hold the lengths of the axes (in other words, the `shape`), and the `dtype`, and calls internally `ndarray_new_dense_array`. A new `ndarray` can be generated by calling \n",
+    "\n",
+    "```c\n",
+    "ndarray_obj_t *new_ndarray = ndarray_new_ndarray_from_tuple(shape, NDARRAY_FLOAT);\n",
+    "```\n",
+    "where `shape` is a tuple.\n",
+    "\n",
+    "\n",
+    "### ndarray_new_view\n",
+    "\n",
+    "This function crates a *view*, and takes the source, an `ndarray`, the number of dimensions, an `uint8_t`, the `shape`, a pointer to `size_t`, the `strides`, a pointer to `int32_t`, and the offset, an `int32_t` as arguments. The offset is the number of bytes by which the void `array` pointer is shifted. E.g., the `python` statement\n",
+    "\n",
+    "```python\n",
+    "a = np.array([0, 1, 2, 3, 4, 5], dtype=uint8)\n",
+    "b = a[1::2]\n",
+    "```\n",
+    "\n",
+    "produces the array\n",
+    "\n",
+    "```python\n",
+    "array([1, 3, 5], dtype=uint8)\n",
+    "```\n",
+    "which holds its data at position `x0 + 1`, if `a`'s pointer is at `x0`. In this particular case, the offset is 1. \n",
+    "\n",
+    "The array `b` from the example above could be generated as \n",
+    "\n",
+    "```c\n",
+    "size_t *shape = m_new(size_t, ULAB_MAX_DIMS);\n",
+    "shape[ULAB_MAX_DIMS - 1] = 3;\n",
+    "\n",
+    "int32_t *strides = m_new(int32_t, ULAB_MAX_DIMS);\n",
+    "strides[ULAB_MAX_DIMS - 1] = 2;\n",
+    "\n",
+    "int32_t offset = 1;\n",
+    "uint8_t ndim = 1;\n",
+    "\n",
+    "ndarray_obj_t *new_ndarray = ndarray_new_view(ndarray_a, ndim, shape, strides, offset);\n",
+    "```\n",
+    "\n",
+    "### ndarray_copy_array\n",
+    "\n",
+    "The `ndarray_copy_array` function can be used for copying the contents of an array. Note that the target array has to be created beforehand. E.g., a one-to-one copy can be gotten by \n",
+    "\n",
+    "```c\n",
+    "ndarray_obj_t *new_ndarray = ndarray_new_ndarray(source->ndim, source->shape, source->strides, source->dtype);\n",
+    "ndarray_copy_array(source, new_ndarray);\n",
+    "\n",
+    "```\n",
+    "Note that the function cannot be used for forcing type conversion, i.e., the input and output types must be identical, because the function simply calls the `memcpy` function. On the other hand, the input and output `strides` do not necessarily have to be equal.\n",
+    "\n",
+    "### ndarray_copy_view\n",
+    "\n",
+    "The `ndarray_obj_t *new_ndarray = ...` instruction can be saved by calling the `ndarray_copy_view` function with the single `source` argument. \n",
+    "\n",
+    "\n",
+    "## Accessing data in the ndarray\n",
+    "\n",
+    "Having seen, how arrays can be generated and copied, it is time to look at how the data in an `ndarray` can be accessed and modified. \n",
+    "\n",
+    "For starters, let us suppose that the object in question comes from the user (i.e., via the `micropython` interface), First, we have to acquire a pointer to the `ndarray` by calling \n",
+    "\n",
+    "```c\n",
+    "ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(object_in);\n",
+    "```\n",
+    "\n",
+    "If it is not clear, whether the object is an `ndarray` (e.g., if we want to write a function that can take `ndarray`s, and other iterables as its argument), we find this out by evaluating \n",
+    "\n",
+    "```c\n",
+    "mp_obj_is_type(object_in, &ulab_ndarray_type)\n",
+    "```\n",
+    "which should return `true`. Once the pointer is at our disposal, we can get a pointer to the underlying numerical array as discussed earlier, i.e., \n",
+    "\n",
+    "```c\n",
+    "uint8_t *array = (uint8_t *)ndarray->array;\n",
+    "```\n",
+    "\n",
+    "If you need to find out the `dtype` of the array, you can get it by accessing the `dtype` member of the `ndarray`, i.e., \n",
+    "\n",
+    "```c\n",
+    "ndarray->dtype\n",
+    "```\n",
+    "should be equal to `B`, `b`, `H`, `h`, or `f`. The size of a single item is stored in the `itemsize` member. This number should be equal to 1, if the `dtype` is `B`, or `b`, 2, if the `dtype` is `H`, or `h`, 4, if the `dtype` is `f`, and 8 for `d`. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Boilerplate\n",
+    "\n",
+    "In the next section, we will construct a function that generates the element-wise square of a dense array, otherwise, raises a `TypeError` exception. Dense arrays can easily be iterated over, since we do not have to care about the `shape` and the `strides`. If the array is sparse, the section [Iterating over elements of a tensor](#Iterating-over-elements-of-a-tensor) should contain hints as to how the iteration can be implemented.\n",
+    "\n",
+    "The function is listed under [user.c](https://github.com/v923z/micropython-ulab/tree/master/code/user/). The `user` module is bound to `ulab` in [ulab.c](https://github.com/v923z/micropython-ulab/tree/master/code/ulab.c) in the lines \n",
+    "\n",
+    "```c\n",
+    "    #if ULAB_USER_MODULE\n",
+    "        { MP_ROM_QSTR(MP_QSTR_user), MP_ROM_PTR(&ulab_user_module) },\n",
+    "    #endif\n",
+    "```\n",
+    "which assumes that at the very end of [ulab.h](https://github.com/v923z/micropython-ulab/tree/master/code/ulab.h) the \n",
+    "\n",
+    "```c\n",
+    "// user-defined module\n",
+    "#ifndef ULAB_USER_MODULE\n",
+    "#define ULAB_USER_MODULE                (1)\n",
+    "#endif\n",
+    "```\n",
+    "constant has been set to 1. After compilation, you can call a particular `user` function in `python` by importing the module first, i.e., \n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "from ulab import user\n",
+    "\n",
+    "user.some_function(...)\n",
+    "```\n",
+    "\n",
+    "This separation of user-defined functions from the rest of the code ensures that the integrity of the main module and all its functions are always preserved. Even in case of a catastrophic failure, you can exclude the `user` module, and start over.\n",
+    "\n",
+    "And now the function:\n",
+    "\n",
+    "\n",
+    "```c\n",
+    "static mp_obj_t user_square(mp_obj_t arg) {\n",
+    "    // the function takes a single dense ndarray, and calculates the \n",
+    "    // element-wise square of its entries\n",
+    "    \n",
+    "    // raise a TypeError exception, if the input is not an ndarray\n",
+    "    if(!mp_obj_is_type(arg, &ulab_ndarray_type)) {\n",
+    "        mp_raise_TypeError(translate(\"input must be an ndarray\"));\n",
+    "    }\n",
+    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(arg);\n",
+    "    \n",
+    "    // make sure that the input is a dense array\n",
+    "    if(!ndarray_is_dense(ndarray)) {\n",
+    "        mp_raise_TypeError(translate(\"input must be a dense ndarray\"));\n",
+    "    }\n",
+    "    \n",
+    "    // if the input is a dense array, create `results` with the same number of \n",
+    "    // dimensions, shape, and dtype\n",
+    "    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype);\n",
+    "    \n",
+    "    // since in a dense array the iteration over the elements is trivial, we \n",
+    "    // can cast the data arrays ndarray->array and results->array to the actual type\n",
+    "    if(ndarray->dtype == NDARRAY_UINT8) {\n",
+    "        uint8_t *array = (uint8_t *)ndarray->array;\n",
+    "        uint8_t *rarray = (uint8_t *)results->array;\n",
+    "        for(size_t i=0; i < ndarray->len; i++, array++) {\n",
+    "            *rarray++ = (*array) * (*array);\n",
+    "        }\n",
+    "    } else if(ndarray->dtype == NDARRAY_INT8) {\n",
+    "        int8_t *array = (int8_t *)ndarray->array;\n",
+    "        int8_t *rarray = (int8_t *)results->array;\n",
+    "        for(size_t i=0; i < ndarray->len; i++, array++) {\n",
+    "            *rarray++ = (*array) * (*array);\n",
+    "        }\n",
+    "    } else if(ndarray->dtype == NDARRAY_UINT16) {\n",
+    "        uint16_t *array = (uint16_t *)ndarray->array;\n",
+    "        uint16_t *rarray = (uint16_t *)results->array;\n",
+    "        for(size_t i=0; i < ndarray->len; i++, array++) {\n",
+    "            *rarray++ = (*array) * (*array);\n",
+    "        }\n",
+    "    } else if(ndarray->dtype == NDARRAY_INT16) {\n",
+    "        int16_t *array = (int16_t *)ndarray->array;\n",
+    "        int16_t *rarray = (int16_t *)results->array;\n",
+    "        for(size_t i=0; i < ndarray->len; i++, array++) {\n",
+    "            *rarray++ = (*array) * (*array);\n",
+    "        }\n",
+    "    } else { // if we end up here, the dtype is NDARRAY_FLOAT\n",
+    "        mp_float_t *array = (mp_float_t *)ndarray->array;\n",
+    "        mp_float_t *rarray = (mp_float_t *)results->array;\n",
+    "        for(size_t i=0; i < ndarray->len; i++, array++) {\n",
+    "            *rarray++ = (*array) * (*array);\n",
+    "        }        \n",
+    "    }\n",
+    "    // at the end, return a micropython object\n",
+    "    return MP_OBJ_FROM_PTR(results);\n",
+    "}\n",
+    "\n",
+    "```\n",
+    "\n",
+    "To summarise, the steps for *implementing* a function are\n",
+    "\n",
+    "1. If necessary, inspect the type of the input object, which is always a `mp_obj_t` object\n",
+    "2. If the input is an `ndarray_obj_t`, acquire a pointer to it by calling `ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(arg);`\n",
+    "3. Create a new array, or modify the existing one; get a pointer to the data by calling `uint8_t *array = (uint8_t *)ndarray->array;`, or something equivalent\n",
+    "4. Once the new data have been calculated, return a `micropython` object by calling `MP_OBJ_FROM_PTR(...)`.\n",
+    "\n",
+    "The listing above contains the implementation of the function, but as such, it cannot be called from `python`: \n",
+    "it still has to be bound to the name space. This we do by first defining a function object in \n",
+    "\n",
+    "```c\n",
+    "MP_DEFINE_CONST_FUN_OBJ_1(user_square_obj, user_square);\n",
+    "\n",
+    "```\n",
+    "\n",
+    "`micropython` defines a number of `MP_DEFINE_CONST_FUN_OBJ_N` macros in [obj.h](https://github.com/micropython/micropython/blob/master/py/obj.h). `N` is always the number of arguments the function takes. We had a function definition `static mp_obj_t user_square(mp_obj_t arg)`, i.e., we dealt with a single argument. \n",
+    "\n",
+    "Finally, we have to bind this function object in the globals table of the `user` module: \n",
+    "\n",
+    "```c\n",
+    "static const mp_rom_map_elem_t ulab_user_globals_table[] = {\n",
+    "    { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_user) },\n",
+    "    { MP_OBJ_NEW_QSTR(MP_QSTR_square), (mp_obj_t)&user_square_obj },\n",
+    "};\n",
+    "```\n",
+    "\n",
+    "Thus, the three steps required for the definition of a user-defined function are \n",
+    "\n",
+    "1. The low-level implementation of the function itself\n",
+    "2. The definition of a function object by calling MP_DEFINE_CONST_FUN_OBJ_N()\n",
+    "3. Binding this function object to the namespace in the `ulab_user_globals_table[]`"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tulip/shared/ulab/docs/ulab-tricks.ipynb b/tulip/shared/ulab/docs/ulab-tricks.ipynb
new file mode 100644
index 000000000..ec67c8c83
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-tricks.ipynb
@@ -0,0 +1,582 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-01T09:27:13.438054Z",
+     "start_time": "2020-05-01T09:27:13.191491Z"
+    }
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-08-03T18:32:45.342280Z",
+     "start_time": "2020-08-03T18:32:45.338442Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-07-23T20:31:25.296014Z",
+     "start_time": "2020-07-23T20:31:25.265937Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../../micropython/ports/unix/micropython\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tricks\n",
+    "\n",
+    "This section of the book discusses a couple of tricks that can be exploited to either speed up computations, or save on RAM. However, there is probably no silver bullet, and you have to evaluate your code in terms of execution speed (if the execution is time critical), or RAM used. You should also keep in mind that, if a particular code snippet is optimised on some hardware, there is no guarantee that on another piece of hardware, you will get similar improvements. Hardware implementations are vastly different. Some microcontrollers do not even have an FPU, so you should not be surprised that you get significantly different benchmarks. Just to underline this statement, you can study the [collection of benchmarks](https://github.com/thiagofe/ulab_samples)."
+   ]
+  },
+  {
+   "source": [
+    "## Use an `ndarray`, if you can\n",
+    "\n",
+    "Many functions in `ulab` are implemented in a universal fashion, meaning that both generic `micropython` iterables, and `ndarray`s can be passed as an argument. E.g., both \n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "np.sum([1, 2, 3, 4, 5])\n",
+    "```\n",
+    "and\n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array([1, 2, 3, 4, 5])\n",
+    "np.sum(a)\n",
+    "```\n",
+    "\n",
+    "will return the `micropython` variable 15 as the result. Still, `np.sum(a)` is evaluated significantly faster, because in `np.sum([1, 2, 3, 4, 5])`, the interpreter has to fetch 5 `micropython` variables, convert them to `float`, and sum the values, while the C type of `a` is known, thus the interpreter can invoke a single `for` loop for the evaluation of the `sum`. In the `for` loop, there are no function calls, the iteration simply walks through the pointer holding the values of `a`, and adds the values to an accumulator. If the array `a` is already available, then you can gain a factor of 3 in speed by calling `sum` on the array, instead of using the list. Compared to the python implementation of the same functionality, the speed-up is around 40 (again, this might depend on the hardware).\n",
+    "\n",
+    "On the other hand, if the array is not available, then there is not much point in converting the list to an `ndarray` and passing that to the function. In fact, you should expect a slow-down: the constructor has to iterate over the list elements, and has to convert them to a numerical type. On top of that, it also has to reserve RAM for the `ndarray`."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "## Use a reasonable `dtype`\n",
+    "\n",
+    "Just as in `numpy`, the default `dtype` is `float`. But this does not mean that that is the most suitable one in all scenarios. If data are streamed from an 8-bit ADC, and you only want to know the maximum, or the sum, then it is quite reasonable to use `uint8` for the `dtype`. Storing the same data in `float` array would cost 4 or 8 times as much RAM, with absolutely no gain. Do not rely on the default value of the constructor's keyword argument, and choose one that fits!"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "## Beware the axis!\n",
+    "\n",
+    "Whenever `ulab` iterates over multi-dimensional arrays, the outermost loop is the first axis, then the second axis, and so on. E.g., when the `sum` of \n",
+    "\n",
+    "```python\n",
+    "a = array([[1, 2, 3, 4],\n",
+    "           [5, 6, 7, 8], \n",
+    "           [9, 10, 11, 12]], dtype=uint8)\n",
+    "```\n",
+    "\n",
+    "is being calculated, first the data pointer walks along `[1, 2, 3, 4]` (innermost loop, last axis), then is moved back to the position, where 5 is stored (this is the nesting loop), and traverses `[5, 6, 7, 8]`, and so on. Moving the pointer back to 5 is more expensive, than moving it along an axis, because the position of 5 has to be calculated, whereas moving from 5 to 6 is simply an addition to the address. Thus, while the matrix\n",
+    "\n",
+    "```python\n",
+    "b = array([[1, 5, 9],\n",
+    "           [2, 6, 10], \n",
+    "           [3, 7, 11],\n",
+    "           [4, 8, 12]], dtype=uint8)\n",
+    "```\n",
+    "\n",
+    "holds the same data as `a`, the summation over the entries in `b` is slower, because the pointer has to be re-wound three times, as opposed to twice in `a`. For small matrices the savings are not significant, but you would definitely notice the difference, if you had \n",
+    "\n",
+    "```\n",
+    "a = array(range(2000)).reshape((2, 1000))\n",
+    "b = array(range(2000)).reshape((1000, 2))\n",
+    "```\n",
+    "\n",
+    "The moral is that, in order to improve on the execution speed, whenever possible, you should try to make the last axis the longest. As a side note, `numpy` can re-arrange its loops, and puts the longest axis in the innermost loop. This is why the longest axis is sometimes referred to as the fast axis. In `ulab`, the order of the axes is fixed. "
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "## Reduce the number of artifacts\n",
+    "\n",
+    "Before showing a real-life example, let us suppose that we want to interpolate uniformly sampled data, and the absolute magnitude is not really important, we only care about the ratios between neighbouring value. One way of achieving this is calling the `interp` functions. However, we could just as well work with slices."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([ 0,  5, 10,  6,  2, 11, 20, 12,  4], dtype=uint8)"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 18
+    }
+   ],
+   "source": [
+    "a = array([0, 10, 2, 20, 4], dtype=np.uint8)\n",
+    "b = np.zeros(9, dtype=np.uint8)\n",
+    "\n",
+    "b[::2] = 2 * a\n",
+    "b[1::2] = a[:-1] + a[1:]\n",
+    "\n",
+    "b //= 2\n",
+    "b"
+   ]
+  },
+  {
+   "source": [
+    "`b` now has values from `a` at every even position, and interpolates the values on every odd position. If only the relative magnitudes are important, then we can even save the division by 2, and we end up with "
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "a = array([0, 10, 2, 20, 4], dtype=np.uint8)\n",
+    "b = np.zeros(9, dtype=np.uint8)\n",
+    "\n",
+    "b[::2] = 2 * a\n",
+    "b[1::2] = a[:-1] + a[1:]\n",
+    "\n",
+    "b"
+   ],
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": 16,
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([ 0, 10, 20, 12,  4, 22, 40, 24,  8], dtype=uint8)"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 16
+    }
+   ]
+  },
+  {
+   "source": [
+    "Importantly, we managed to keep the results in the smaller `dtype`, `uint8`. Now, while the two assignments above are terse and pythonic, the code is not the most efficient: the right hand sides are compound statements, generating intermediate results. To store them, RAM has to be allocated. This takes time, and leads to memory fragmentation. Better is to write out the assignments in 4 instructions:"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([ 0, 10, 20, 12,  4, 22, 40, 24,  8], dtype=uint8)"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 15
+    }
+   ],
+   "source": [
+    "b = np.zeros(9, dtype=np.uint8)\n",
+    "\n",
+    "b[::2] = a\n",
+    "b[::2] += a\n",
+    "b[1::2] = a[:-1]\n",
+    "b[1::2] += a[1:]\n",
+    "\n",
+    "b"
+   ]
+  },
+  {
+   "source": [
+    "The results are the same, but no extra RAM is allocated, except for the views `a[:-1]`, and `a[1:]`, but those had to be created even in the origin implementation."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "### Upscaling images\n",
+    "\n",
+    "And now the example: there are low-resolution thermal cameras out there. Low resolution might mean 8 by 8 pixels. Such a small number of pixels is just not reasonable to plot, no matter how small the display is. If you want to make the camera image a bit more pleasing, you can upscale (stretch) it in both dimensions. This can be done exactly as we up-scaled the linear array:"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "b = np.zeros((15, 15), dtype=np.uint8)\n",
+    "\n",
+    "b[1::2,::2] = a[:-1,:]\n",
+    "b[1::2,::2] += a[1:, :]\n",
+    "b[1::2,::2] //= 2\n",
+    "b[::,1::2] = a[::,:-1:2]\n",
+    "b[::,1::2] += a[::,2::2]\n",
+    "b[::,1::2] //= 2"
+   ]
+  },
+  {
+   "source": [
+    "Up-scaling by larger numbers can be done in a similar fashion, you simply have more assignments."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "There are cases, when one cannot do away with the intermediate results. Two prominent cases are the `where` function, and indexing by means of a Boolean array. E.g., in"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([1, 2, 3])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 20
+    }
+   ],
+   "source": [
+    "a = array([1, 2, 3, 4, 5])\n",
+    "b = a[a < 4]\n",
+    "b"
+   ]
+  },
+  {
+   "source": [
+    "the expression `a < 4` produces the Boolean array, "
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "source": [
+    "a < 4"
+   ],
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": 22,
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "array([ True,  True,  True, False, False])"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 22
+    }
+   ]
+  },
+  {
+   "source": [
+    "If you repeatedly have such conditions in a loop, you might have to peridically call the garbage collector to remove the Boolean arrays that are used only once."
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file
diff --git a/tulip/shared/ulab/docs/ulab-utils.ipynb b/tulip/shared/ulab/docs/ulab-utils.ipynb
new file mode 100644
index 000000000..8dce72c08
--- /dev/null
+++ b/tulip/shared/ulab/docs/ulab-utils.ipynb
@@ -0,0 +1,620 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-03-04T18:21:22.822563Z",
+     "start_time": "2021-03-04T18:21:18.656643Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Notebook magic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T16:53:11.972661Z",
+     "start_time": "2022-01-29T16:53:11.965952Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
+    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
+    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
+    "import subprocess\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T16:59:24.652277Z",
+     "start_time": "2022-01-29T16:59:24.639828Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@magics_class\n",
+    "class PyboardMagic(Magics):\n",
+    "    @cell_magic\n",
+    "    @magic_arguments()\n",
+    "    @argument('-skip')\n",
+    "    @argument('-unix')\n",
+    "    @argument('-pyboard')\n",
+    "    @argument('-file')\n",
+    "    @argument('-data')\n",
+    "    @argument('-time')\n",
+    "    @argument('-memory')\n",
+    "    def micropython(self, line='', cell=None):\n",
+    "        args = parse_argstring(self.micropython, line)\n",
+    "        if args.skip: # doesn't care about the cell's content\n",
+    "            print('skipped execution')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
+    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
+    "                fout.write(cell)\n",
+    "            proc = subprocess.Popen([\"../micropython/ports/unix/build-2/micropython-2\", \"/dev/shm/micropython.py\"], \n",
+    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
+    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
+    "            return None\n",
+    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
+    "            spaces = \"    \"\n",
+    "            try:\n",
+    "                with open(args.file, 'w') as fout:\n",
+    "                    fout.write(cell.replace('\\t', spaces))\n",
+    "                    printf('written cell to {}'.format(args.file))\n",
+    "            except:\n",
+    "                print('Failed to write to disc!')\n",
+    "            return None # do not parse the rest\n",
+    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
+    "            message = pyb.exec(cell)\n",
+    "            if len(message) == 0:\n",
+    "                print('pyboard >>>')\n",
+    "            else:\n",
+    "                print(message.decode('utf-8'))\n",
+    "                # register new variable in user namespace\n",
+    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
+    "        \n",
+    "        if args.time: # measures the time of executions\n",
+    "            pyb.exec('import utime')\n",
+    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
+    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
+    "            print(message.decode('utf-8'))\n",
+    "        \n",
+    "        if args.memory: # prints out memory information \n",
+    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
+    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(\">>> \", message.decode('utf-8'))\n",
+    "            message = pyb.exec('print(mem_info())')\n",
+    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
+    "\n",
+    "        if args.pyboard:\n",
+    "            message = pyb.exec(cell)\n",
+    "            print(message.decode('utf-8'))\n",
+    "\n",
+    "ip = get_ipython()\n",
+    "ip.register_magics(PyboardMagic)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pyboard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:35.126401Z",
+     "start_time": "2020-05-07T07:35:35.105824Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pyboard\n",
+    "pyb = pyboard.Pyboard('/dev/ttyACM0')\n",
+    "pyb.enter_raw_repl()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-19T19:11:18.145548Z",
+     "start_time": "2020-05-19T19:11:18.137468Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pyb.exit_raw_repl()\n",
+    "pyb.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-05-07T07:35:38.725924Z",
+     "start_time": "2020-05-07T07:35:38.645488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -pyboard 1\n",
+    "\n",
+    "import utime\n",
+    "import ulab as np\n",
+    "\n",
+    "def timeit(n=1000):\n",
+    "    def wrapper(f, *args, **kwargs):\n",
+    "        func_name = str(f).split(' ')[1]\n",
+    "        def new_func(*args, **kwargs):\n",
+    "            run_times = np.zeros(n, dtype=np.uint16)\n",
+    "            for i in range(n):\n",
+    "                t = utime.ticks_us()\n",
+    "                result = f(*args, **kwargs)\n",
+    "                run_times[i] = utime.ticks_diff(utime.ticks_us(), t)\n",
+    "            print('{}() execution times based on {} cycles'.format(func_name, n, (delta2-delta1)/n))\n",
+    "            print('\\tbest: %d us'%np.min(run_times))\n",
+    "            print('\\tworst: %d us'%np.max(run_times))\n",
+    "            print('\\taverage: %d us'%np.mean(run_times))\n",
+    "            print('\\tdeviation: +/-%.3f us'%np.std(run_times))            \n",
+    "            return result\n",
+    "        return new_func\n",
+    "    return wrapper\n",
+    "\n",
+    "def timeit(f, *args, **kwargs):\n",
+    "    func_name = str(f).split(' ')[1]\n",
+    "    def new_func(*args, **kwargs):\n",
+    "        t = utime.ticks_us()\n",
+    "        result = f(*args, **kwargs)\n",
+    "        print('execution time: ', utime.ticks_diff(utime.ticks_us(), t), ' us')\n",
+    "        return result\n",
+    "    return new_func"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__END_OF_DEFS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ulab utilities\n",
+    "\n",
+    "\n",
+    "There might be cases, when the format of your data does not conform to `ulab`, i.e., there is no obvious way to map the data to any of the five supported `dtype`s. A trivial example is an ADC or microphone signal with 32-bit resolution. For such cases, `ulab` defines the `utils` module, which, at the moment, has four functions that are not `numpy` compatible, but which should ease interfacing `ndarray`s to peripheral devices. \n",
+    "\n",
+    "The `utils` module can be enabled by setting the `ULAB_HAS_UTILS_MODULE` constant to 1 in  [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h):\n",
+    "\n",
+    "```c\n",
+    "#ifndef ULAB_HAS_UTILS_MODULE\n",
+    "#define ULAB_HAS_UTILS_MODULE               (1)\n",
+    "#endif\n",
+    "```\n",
+    "\n",
+    "This still does not compile any functions into the firmware. You can add a function by setting the corresponding pre-processor constant to 1. E.g., \n",
+    "\n",
+    "```c\n",
+    "#ifndef ULAB_UTILS_HAS_FROM_INT16_BUFFER\n",
+    "#define ULAB_UTILS_HAS_FROM_INT16_BUFFER    (1)\n",
+    "#endif\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## from_int32_buffer, from_uint32_buffer\n",
+    "\n",
+    "With the help of `utils.from_int32_buffer`, and `utils.from_uint32_buffer`, it is possible to convert 32-bit integer buffers to `ndarrays` of float type. These functions have a syntax similar to `numpy.frombuffer`; they support the `count=-1`, and `offset=0` keyword arguments.  However, in addition, they also accept `out=None`, and `byteswap=False`. \n",
+    "\n",
+    "Here is an example without keyword arguments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-03-05T06:53:26.256516Z",
+     "start_time": "2021-03-05T06:53:26.007070Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  bytearray(b'\\x01\\x01\\x00\\x00\\x00\\x00\\x00\\xff')\n",
+      "\n",
+      "unsigned integers:  array([257.0, 4278190080.000001], dtype=float64)\n",
+      "\n",
+      "b:   bytearray(b'\\x01\\x01\\x00\\x00\\x00\\x00\\x00\\xff')\n",
+      "\n",
+      "signed integers:  array([257.0, -16777216.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import utils\n",
+    "\n",
+    "a = bytearray([1, 1, 0, 0, 0, 0, 0, 255])\n",
+    "print('a: ', a)\n",
+    "print()\n",
+    "print('unsigned integers: ', utils.from_uint32_buffe\n",
+    "print('original vector:\\n', y)\n",
+    "print('\\nspectrum:\\n', a)r(a))\n",
+    "\n",
+    "b = bytearray([1, 1, 0, 0, 0, 0, 0, 255])\n",
+    "print('\\nb:  ', b)\n",
+    "print()\n",
+    "print('signed integers: ', utils.from_int32_buffer(b))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The meaning of `count`, and `offset` is similar to that in `numpy.frombuffer`. `count` is the number of floats that will be converted, while `offset` would discard the first `offset` number of bytes from the buffer before the conversion.\n",
+    "\n",
+    "In the example above, repeated calls to either of the functions returns a new `ndarray`.  You can save RAM by supplying the `out` keyword argument with a pre-defined `ndarray` of sufficient size, in which case the results will be inserted into the `ndarray`. If the `dtype` of `out` is not `float`, a `TypeError` exception will be raised."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-03-05T06:53:41.551440Z",
+     "start_time": "2021-03-05T06:53:41.534163Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "b:  bytearray(b'\\x01\\x00\\x01\\x00\\x00\\x01\\x00\\x01')\n",
+      "a:  array([65537.0, 16777472.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import utils\n",
+    "\n",
+    "a = np.array([1, 2], dtype=np.float)\n",
+    "b = bytearray([1, 0, 1, 0, 0, 1, 0, 1])\n",
+    "print('b: ', b)\n",
+    "utils.from_uint32_buffer(b, out=a)\n",
+    "print('a: ', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, since there is no guarantee that the endianness of a particular peripheral device supplying the buffer is the same as that of the microcontroller, `from_(u)intbuffer` allows a conversion via the `byteswap` keyword argument."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-03-05T06:53:52.242950Z",
+     "start_time": "2021-03-05T06:53:52.229160Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "a:  bytearray(b'\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x01')\n",
+      "buffer without byteswapping:  array([1.0, 16777216.0], dtype=float64)\n",
+      "buffer with byteswapping:  array([16777216.0, 1.0], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import utils\n",
+    "\n",
+    "a = bytearray([1, 0, 0, 0, 0, 0, 0, 1])\n",
+    "print('a: ', a)\n",
+    "print('buffer without byteswapping: ', utils.from_uint32_buffer(a))\n",
+    "print('buffer with byteswapping: ', utils.from_uint32_buffer(a, byteswap=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## from_int16_buffer, from_uint16_buffer\n",
+    "\n",
+    "These two functions are identical to `utils.from_int32_buffer`, and `utils.from_uint32_buffer`, with the exception that they convert 16-bit integers to floating point `ndarray`s. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## spectrogram\n",
+    "\n",
+    "In addition to the Fourier transform and its inverse, `ulab` also sports a function called `spectrogram`, which returns the absolute value of the Fourier transform, also known as the power spectrum. This could be used to find the dominant spectral component in a time series. The positional arguments are treated in the same way as in `fft`, and `ifft`. This means that, if the firmware was compiled with complex support and  `ULAB_FFT_IS_NUMPY_COMPATIBLE` is defined to be 1 in `ulab.h`, the input can also be a complex array. \n",
+    "\n",
+    "And easy way to find out if the FFT is `numpy`-compatible is to check the number of values `fft.fft` returns, when called with a single real argument of length other than 2: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "FFT is numpy compatible (complex inputs/outputs)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "if len(np.fft.fft(np.zeros(4))) == 2:\n",
+    "    print('FFT is NOT numpy compatible (real and imaginary parts are treated separately)')\n",
+    "else:\n",
+    "    print('FFT is numpy compatible (complex inputs/outputs)')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Depending on the `numpy`-compatibility of the FFT, the `spectrogram` function takes one or two positional arguments, and three keyword arguments. If the FFT is `numpy` compatible, one positional argument is allowed, and it is a 1D real or complex `ndarray`. If the FFT is not `numpy`-compatible, if a single argument is supplied, it will be treated as the real part of the input, and if two positional arguments are supplied, they are treated as the real and imaginary parts of the signal.\n",
+    "\n",
+    "The keyword arguments are as follows:\n",
+    "\n",
+    "1. `scratchpad = None`: must be a 1D, dense, floating point array, twice as long as the input array; the `scratchpad` will be used as a temporary internal buffer to perform the Fourier transform; the `scratchpad` can repeatedly be re-used.\n",
+    "1. `out = None`: must be a 1D, not necessarily dense, floating point array that will store the results\n",
+    "1. `log = False`: must be either `True`, or `False`; if `True`, the `spectrogram` returns the logarithm of the absolute values of the Fourier transform."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T16:59:56.400603Z",
+     "start_time": "2022-01-29T16:59:56.374748Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "original vector:\n",
+      " array([0.0, 0.009775015390171337, 0.01954909674625918, ..., -0.5275140569487312, -0.5357931822978732, -0.5440211108893697], dtype=float64)\n",
+      "\n",
+      "spectrum:\n",
+      " array([187.8635087634578, 315.3112063607119, 347.8814873399375, ..., 84.45888934298905, 347.8814873399374, 315.3112063607118], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import utils as utils\n",
+    "\n",
+    "x = np.linspace(0, 10, num=1024)\n",
+    "y = np.sin(x)\n",
+    "\n",
+    "a = utils.spectrogram(y)\n",
+    "\n",
+    "print('original vector:\\n', y)\n",
+    "print('\\nspectrum:\\n', a)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As such, `spectrogram` is really just a shorthand for `np.abs(np.fft.fft(signal))`, if the FFT is `numpy`-compatible, or `np.sqrt(a*a + b*b)` if the FFT returns the real (`a`) and imaginary (`b`) parts separately. However, `spectrogram` saves significant amounts of RAM: the expression `a*a + b*b` has to allocate memory for `a*a`, `b*b`, and finally, their sum. Similarly, `np.abs` returns a new array. This issue is compounded even more, if `np.log()` is used on the absolute value. \n",
+    "\n",
+    "In contrast, `spectrogram` handles all calculations in the same internal arrays, and allows one to re-use previously reserved RAM. This can be especially useful in cases, when `spectogram` is called repeatedly, as in the snippet below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-29T16:59:48.485610Z",
+     "start_time": "2022-01-29T16:59:48.462593Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "signal:  array([-27.38260169844543, 6.237834411021073, -0.4038327279002965, ..., -0.9795967096969854, -0.4038327279002969, 6.237834411021073], dtype=float64)\n",
+      "out:  array([-27.38260169844543, 6.237834411021073, -0.4038327279002965, ..., -0.9795967096969854, -0.4038327279002969, 6.237834411021073], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "from ulab import utils as utils\n",
+    "\n",
+    "n = 1024\n",
+    "t = np.linspace(0, 2 * np.pi, num=1024)\n",
+    "scratchpad = np.zeros(2 * n)\n",
+    "\n",
+    "for _ in range(10):\n",
+    "    signal = np.sin(t)\n",
+    "    utils.spectrogram(signal, out=signal, scratchpad=scratchpad, log=True)\n",
+    "\n",
+    "print('signal: ', signal)\n",
+    "\n",
+    "for _ in range(10):\n",
+    "    signal = np.sin(t)\n",
+    "    out = np.log(utils.spectrogram(signal))\n",
+    "\n",
+    "print('out: ', out)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `scratchpad` is reserved only once, and then is re-used in the first loop. By assigning `signal` to the output, we save additional RAM. This approach avoids the usual problem of memory fragmentation, which would happen in the second loop, where both `spectrogram`, and `np.log` must reserve RAM in each iteration."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "382.797px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tulip/shared/ulab/requirements.txt b/tulip/shared/ulab/requirements.txt
new file mode 100644
index 000000000..44a988dcd
--- /dev/null
+++ b/tulip/shared/ulab/requirements.txt
@@ -0,0 +1 @@
+sphinx-autoapi
diff --git a/tulip/shared/ulab/requirements_cp_dev.txt b/tulip/shared/ulab/requirements_cp_dev.txt
new file mode 100644
index 000000000..8a4cdba3e
--- /dev/null
+++ b/tulip/shared/ulab/requirements_cp_dev.txt
@@ -0,0 +1,18 @@
+# For docs
+mypy
+black
+isort
+astroid
+setuptools
+setuptools_scm
+
+Sphinx>=4.0.0
+sphinx-autoapi
+sphinx-rtd-theme
+sphinxcontrib-svg2pdfconverter
+readthedocs-sphinx-search
+myst-parser
+
+# For stubs and annotations
+adafruit-circuitpython-typing
+build
diff --git a/tulip/shared/ulab/run-tests b/tulip/shared/ulab/run-tests
new file mode 100755
index 000000000..880b13f04
--- /dev/null
+++ b/tulip/shared/ulab/run-tests
@@ -0,0 +1,570 @@
+#! /usr/bin/env python3
+
+import os
+import subprocess
+import sys
+import platform
+import argparse
+import re
+import threading
+import multiprocessing
+from multiprocessing.pool import ThreadPool
+from glob import glob
+
+if os.name == 'nt':
+    MICROPYTHON = os.getenv('MICROPY_MICROPYTHON', 'micropython/ports/windows/micropython.exe')
+else:
+    MICROPYTHON = os.getenv('MICROPY_MICROPYTHON', 'micropython/ports/unix/micropython')
+
+# mpy-cross is only needed if --via-mpy command-line arg is passed
+MPYCROSS = os.getenv('MICROPY_MPYCROSS', '../mpy-cross/mpy-cross')
+
+# Set PYTHONIOENCODING so that CPython will use utf-8 on systems which set another encoding in the locale
+os.environ['PYTHONIOENCODING'] = 'utf-8'
+
+def rm_f(fname):
+    if os.path.exists(fname):
+        os.remove(fname)
+
+
+# unescape wanted regex chars and escape unwanted ones
+def convert_regex_escapes(line):
+    cs = []
+    escape = False
+    for c in str(line, 'utf8'):
+        if escape:
+            escape = False
+            cs.append(c)
+        elif c == '\\':
+            escape = True
+        elif c in ('(', ')', '[', ']', '{', '}', '.', '*', '+', '^', '$'):
+            cs.append('\\' + c)
+        else:
+            cs.append(c)
+    # accept carriage-return(s) before final newline
+    if cs[-1] == '\n':
+        cs[-1] = '\r*\n'
+    return bytes(''.join(cs), 'utf8')
+
+
+def run_micropython(pyb, args, test_file, is_special=False):
+    special_tests = (
+        'micropython/meminfo.py', 'basics/bytes_compare3.py',
+        'basics/builtin_help.py', 'thread/thread_exc2.py',
+    )
+    had_crash = False
+    if pyb is None:
+        # run on PC
+        if test_file.startswith(('cmdline/', 'feature_check/')) or test_file in special_tests:
+            # special handling for tests of the unix cmdline program
+            is_special = True
+
+        if is_special:
+            # check for any cmdline options needed for this test
+            args = [MICROPYTHON]
+            with open(test_file, 'rb') as f:
+                line = f.readline()
+                if line.startswith(b'# cmdline:'):
+                    # subprocess.check_output on Windows only accepts strings, not bytes
+                    args += [str(c, 'utf-8') for c in line[10:].strip().split()]
+
+            # run the test, possibly with redirected input
+            try:
+                if 'repl_' in test_file:
+                    # Need to use a PTY to test command line editing
+                    try:
+                        import pty
+                    except ImportError:
+                        # in case pty module is not available, like on Windows
+                        return b'SKIP\n'
+                    import select
+
+                    def get(required=False):
+                        rv = b''
+                        while True:
+                            ready = select.select([emulator], [], [], 0.02)
+                            if ready[0] == [emulator]:
+                                rv += os.read(emulator, 1024)
+                            else:
+                                if not required or rv:
+                                    return rv
+
+                    def send_get(what):
+                        os.write(emulator, what)
+                        return get()
+
+                    with open(test_file, 'rb') as f:
+                        # instead of: output_mupy = subprocess.check_output(args, stdin=f)
+                        # openpty returns two read/write file descriptors.  The first one is
+                        # used by the program which provides the virtual
+                        # terminal service, and the second one is used by the
+                        # subprogram which requires a tty to work.
+                        emulator, subterminal = pty.openpty()
+                        p = subprocess.Popen(args, stdin=subterminal, stdout=subterminal,
+                                             stderr=subprocess.STDOUT, bufsize=0)
+                        banner = get(True)
+                        output_mupy = banner + b''.join(send_get(line) for line in f)
+                        send_get(b'\x04') # exit the REPL, so coverage info is saved
+                        p.kill()
+                        os.close(emulator)
+                        os.close(subterminal)
+                else:
+                    output_mupy = subprocess.check_output(args + [test_file], stderr=subprocess.STDOUT)
+            except subprocess.CalledProcessError:
+                return b'CRASH'
+
+        else:
+            # a standard test run on PC
+
+            # create system command
+            cmdlist = [MICROPYTHON, '-X', 'emit=' + args.emit]
+            if args.heapsize is not None:
+                cmdlist.extend(['-X', 'heapsize=' + args.heapsize])
+
+            # if running via .mpy, first compile the .py file
+            if args.via_mpy:
+                subprocess.check_output([MPYCROSS, '-mcache-lookup-bc', '-o', 'mpytest.mpy', test_file])
+                cmdlist.extend(['-m', 'mpytest'])
+            else:
+                cmdlist.append(test_file)
+
+            # run the actual test
+            e = {"MICROPYPATH": os.getcwd() + ":", "LANG": "en_US.UTF-8"}
+            p = subprocess.Popen(cmdlist, env=e, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+            output_mupy = b''
+            while p.poll() is None:
+                output_mupy += p.stdout.read()
+            output_mupy += p.stdout.read()
+            if p.returncode != 0:
+                output_mupy = b'CRASH'
+
+            # clean up if we had an intermediate .mpy file
+            if args.via_mpy:
+                rm_f('mpytest.mpy')
+
+    else:
+        # run on pyboard
+        import pyboard
+        pyb.enter_raw_repl()
+        try:
+            output_mupy = pyb.execfile(test_file)
+        except pyboard.PyboardError:
+            had_crash = True
+            output_mupy = b'CRASH'
+
+    # canonical form for all ports/platforms is to use \n for end-of-line
+    output_mupy = output_mupy.replace(b'\r\n', b'\n')
+
+    # don't try to convert the output if we should skip this test
+    if had_crash or output_mupy in (b'SKIP\n', b'CRASH'):
+        return output_mupy
+
+    if is_special or test_file in special_tests:
+        # convert parts of the output that are not stable across runs
+        with open(test_file + '.exp', 'rb') as f:
+            lines_exp = []
+            for line in f.readlines():
+                if line == b'########\n':
+                    line = (line,)
+                else:
+                    line = (line, re.compile(convert_regex_escapes(line)))
+                lines_exp.append(line)
+        lines_mupy = [line + b'\n' for line in output_mupy.split(b'\n')]
+        if output_mupy.endswith(b'\n'):
+            lines_mupy = lines_mupy[:-1] # remove erroneous last empty line
+        i_mupy = 0
+        for i in range(len(lines_exp)):
+            if lines_exp[i][0] == b'########\n':
+                # 8x #'s means match 0 or more whole lines
+                line_exp = lines_exp[i + 1]
+                skip = 0
+                while i_mupy + skip < len(lines_mupy) and not line_exp[1].match(lines_mupy[i_mupy + skip]):
+                    skip += 1
+                if i_mupy + skip >= len(lines_mupy):
+                    lines_mupy[i_mupy] = b'######## FAIL\n'
+                    break
+                del lines_mupy[i_mupy:i_mupy + skip]
+                lines_mupy.insert(i_mupy, b'########\n')
+                i_mupy += 1
+            else:
+                # a regex
+                if lines_exp[i][1].match(lines_mupy[i_mupy]):
+                    lines_mupy[i_mupy] = lines_exp[i][0]
+                else:
+                    #print("don't match: %r %s" % (lines_exp[i][1], lines_mupy[i_mupy])) # DEBUG
+                    pass
+                i_mupy += 1
+            if i_mupy >= len(lines_mupy):
+                break
+        output_mupy = b''.join(lines_mupy)
+
+    return output_mupy
+
+
+def run_feature_check(pyb, args, base_path, test_file):
+    return run_micropython(pyb, args, base_path + "/feature_check/" + test_file, is_special=True)
+
+class ThreadSafeCounter:
+    def __init__(self, start=0):
+        self._value = start
+        self._lock = threading.Lock()
+
+    def add(self, to_add):
+        with self._lock: self._value += to_add
+
+    def append(self, arg):
+        self.add([arg])
+
+    @property
+    def value(self):
+        return self._value
+
+def run_tests(pyb, tests, args, base_path=".", num_threads=1):
+    test_count = ThreadSafeCounter()
+    testcase_count = ThreadSafeCounter()
+    passed_count = ThreadSafeCounter()
+    failed_tests = ThreadSafeCounter([])
+    skipped_tests = ThreadSafeCounter([])
+
+    skip_tests = set()
+    skip_native = False
+    skip_int_big = False
+    skip_set_type = False
+    skip_async = False
+    skip_const = False
+    skip_revops = False
+    skip_endian = False
+    has_complex = True
+    has_coverage = False
+
+    upy_float_precision = 32
+
+    # Some tests shouldn't be run under Travis CI
+    if os.getenv('TRAVIS') == 'true':
+        skip_tests.add('basics/memoryerror.py')
+        skip_tests.add('thread/thread_gc1.py') # has reliability issues
+        skip_tests.add('thread/thread_lock4.py') # has reliability issues
+        skip_tests.add('thread/stress_heap.py') # has reliability issues
+        skip_tests.add('thread/stress_recurse.py') # has reliability issues
+
+    if upy_float_precision == 0:
+        skip_tests.add('extmod/ujson_dumps_float.py')
+        skip_tests.add('extmod/ujson_loads_float.py')
+        skip_tests.add('misc/rge_sm.py')
+    if upy_float_precision < 32:
+        skip_tests.add('float/float2int_intbig.py') # requires fp32, there's float2int_fp30_intbig.py instead
+        skip_tests.add('float/string_format.py') # requires fp32, there's string_format_fp30.py instead
+        skip_tests.add('float/bytes_construct.py') # requires fp32
+        skip_tests.add('float/bytearray_construct.py') # requires fp32
+    if upy_float_precision < 64:
+        skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead
+        skip_tests.add('float/float2int_doubleprec_intbig.py')
+        skip_tests.add('float/float_parse_doubleprec.py')
+
+    if not has_complex:
+        skip_tests.add('float/complex1.py')
+        skip_tests.add('float/complex1_intbig.py')
+        skip_tests.add('float/int_big_float.py')
+        skip_tests.add('float/true_value.py')
+        skip_tests.add('float/types.py')
+
+    if not has_coverage:
+        skip_tests.add('cmdline/cmd_parsetree.py')
+
+    # Some tests shouldn't be run on a PC
+    if args.target == 'unix':
+        # unix build does not have the GIL so can't run thread mutation tests
+        for t in tests:
+            if t.startswith('thread/mutate_'):
+                skip_tests.add(t)
+
+    # Some tests shouldn't be run on pyboard
+    if args.target != 'unix':
+        skip_tests.add('basics/exception_chain.py') # warning is not printed
+        skip_tests.add('micropython/meminfo.py') # output is very different to PC output
+        skip_tests.add('extmod/machine_mem.py') # raw memory access not supported
+
+        if args.target == 'wipy':
+            skip_tests.add('misc/print_exception.py')       # requires error reporting full
+            skip_tests.update({'extmod/uctypes_%s.py' % t for t in 'bytearray le native_le ptr_le ptr_native_le sizeof sizeof_native array_assign_le array_assign_native_le'.split()}) # requires uctypes
+            skip_tests.add('extmod/zlibd_decompress.py')    # requires zlib
+            skip_tests.add('extmod/uheapq1.py')             # uheapq not supported by WiPy
+            skip_tests.add('extmod/urandom_basic.py')       # requires urandom
+            skip_tests.add('extmod/urandom_extra.py')       # requires urandom
+        elif args.target == 'esp8266':
+            skip_tests.add('misc/rge_sm.py')                # too large
+        elif args.target == 'minimal':
+            skip_tests.add('basics/class_inplace_op.py')    # all special methods not supported
+            skip_tests.add('basics/subclass_native_init.py')# native subclassing corner cases not support
+            skip_tests.add('misc/rge_sm.py')                # too large
+            skip_tests.add('micropython/opt_level.py')      # don't assume line numbers are stored
+
+    # Some tests are known to fail on 64-bit machines
+    if pyb is None and platform.architecture()[0] == '64bit':
+        pass
+
+    # Some tests use unsupported features on Windows
+    if os.name == 'nt':
+        skip_tests.add('import/import_file.py') # works but CPython prints forward slashes
+
+    # Some tests are known to fail with native emitter
+    # Remove them from the below when they work
+    if args.emit == 'native':
+        skip_tests.update({'basics/%s.py' % t for t in 'gen_yield_from gen_yield_from_close gen_yield_from_ducktype gen_yield_from_exc gen_yield_from_executing gen_yield_from_iter gen_yield_from_send gen_yield_from_stopped gen_yield_from_throw gen_yield_from_throw2 gen_yield_from_throw3 generator1 generator2 generator_args generator_close generator_closure generator_exc generator_pend_throw generator_return generator_send'.split()}) # require yield
+        skip_tests.update({'basics/%s.py' % t for t in 'bytes_gen class_store_class globals_del string_join gen_stack_overflow'.split()}) # require yield
+        skip_tests.update({'basics/async_%s.py' % t for t in 'def await await2 for for2 with with2 coroutine'.split()}) # require yield
+        skip_tests.update({'basics/%s.py' % t for t in 'try_reraise try_reraise2'.split()}) # require raise_varargs
+        skip_tests.update({'basics/%s.py' % t for t in 'with_break with_continue with_return'.split()}) # require complete with support
+        skip_tests.add('basics/array_construct2.py') # requires generators
+        skip_tests.add('basics/bool1.py') # seems to randomly fail
+        skip_tests.add('basics/builtin_hash_gen.py') # requires yield
+        skip_tests.add('basics/class_bind_self.py') # requires yield
+        skip_tests.add('basics/del_deref.py') # requires checking for unbound local
+        skip_tests.add('basics/del_local.py') # requires checking for unbound local
+        skip_tests.add('basics/exception_chain.py') # raise from is not supported
+        skip_tests.add('basics/for_range.py') # requires yield_value
+        skip_tests.add('basics/try_finally_loops.py') # requires proper try finally code
+        skip_tests.add('basics/try_finally_return.py') # requires proper try finally code
+        skip_tests.add('basics/try_finally_return2.py') # requires proper try finally code
+        skip_tests.add('basics/unboundlocal.py') # requires checking for unbound local
+        skip_tests.add('import/gen_context.py') # requires yield_value
+        skip_tests.add('misc/features.py') # requires raise_varargs
+        skip_tests.add('misc/rge_sm.py') # requires yield
+        skip_tests.add('misc/print_exception.py') # because native doesn't have proper traceback info
+        skip_tests.add('misc/sys_exc_info.py') # sys.exc_info() is not supported for native
+        skip_tests.add('micropython/emg_exc.py') # because native doesn't have proper traceback info
+        skip_tests.add('micropython/heapalloc_traceback.py') # because native doesn't have proper traceback info
+        skip_tests.add('micropython/heapalloc_iter.py') # requires generators
+        skip_tests.add('micropython/schedule.py') # native code doesn't check pending events
+        skip_tests.add('stress/gc_trace.py') # requires yield
+        skip_tests.add('stress/recursive_gen.py') # requires yield
+        skip_tests.add('extmod/vfs_userfs.py') # because native doesn't properly handle globals across different modules
+        skip_tests.add('../extmod/ulab/tests/argminmax.py') # requires yield
+
+    def run_one_test(test_file):
+        test_file = test_file.replace('\\', '/')
+
+        if args.filters:
+            # Default verdict is the opposit of the first action
+            verdict = "include" if args.filters[0][0] == "exclude" else "exclude"
+            for action, pat in args.filters:
+                if pat.search(test_file):
+                    verdict = action
+            if verdict == "exclude":
+                return
+
+        test_basename = os.path.basename(test_file)
+        test_name = os.path.splitext(test_basename)[0]
+        is_native = test_name.startswith("native_") or test_name.startswith("viper_")
+        is_endian = test_name.endswith("_endian")
+        is_int_big = test_name.startswith("int_big") or test_name.endswith("_intbig")
+        is_set_type = test_name.startswith("set_") or test_name.startswith("frozenset")
+        is_async = test_name.startswith("async_")
+        is_const = test_name.startswith("const")
+
+        skip_it = test_file in skip_tests
+        skip_it |= skip_native and is_native
+        skip_it |= skip_endian and is_endian
+        skip_it |= skip_int_big and is_int_big
+        skip_it |= skip_set_type and is_set_type
+        skip_it |= skip_async and is_async
+        skip_it |= skip_const and is_const
+        skip_it |= skip_revops and test_name.startswith("class_reverse_op")
+
+        if args.list_tests:
+            if not skip_it:
+                print(test_file)
+            return
+
+        if skip_it:
+            print("skip ", test_file)
+            skipped_tests.append(test_name)
+            return
+
+        # get expected output
+        test_file_expected = test_file + '.exp'
+        if os.path.isfile(test_file_expected):
+            # expected output given by a file, so read that in
+            with open(test_file_expected, 'rb') as f:
+                output_expected = f.read()
+        else:
+            if not args.write_exp:
+                output_expected = b"NOEXP\n"
+            else:
+                # run CPython to work out expected output
+                e = {"PYTHONPATH": os.getcwd(),
+                     "PATH": os.environ["PATH"],
+                     "LANG": "en_US.UTF-8"}
+                p = subprocess.Popen([MICROPYTHON, test_file], env=e, stdout=subprocess.PIPE)
+                output_expected = b''
+                while p.poll() is None:
+                    output_expected += p.stdout.read()
+                output_expected += p.stdout.read()
+                with open(test_file_expected, 'wb') as f:
+                    f.write(output_expected)
+
+        # canonical form for all host platforms is to use \n for end-of-line
+        output_expected = output_expected.replace(b'\r\n', b'\n')
+
+        if args.write_exp:
+            return
+
+        # run MicroPython
+        output_mupy = run_micropython(pyb, args, test_file)
+
+        if output_mupy == b'SKIP\n':
+            print("skip ", test_file)
+            skipped_tests.append(test_name)
+            return
+
+        if output_expected == b'NOEXP\n':
+            print("noexp", test_file)
+            failed_tests.append(test_name)
+            return
+
+        testcase_count.add(len(output_expected.splitlines()))
+
+        filename_expected = test_basename + ".exp"
+        filename_mupy = test_basename + ".out"
+
+        if output_expected == output_mupy:
+            print("pass ", test_file)
+            passed_count.add(1)
+            rm_f(filename_expected)
+            rm_f(filename_mupy)
+        else:
+            with open(filename_expected, "wb") as f:
+                f.write(output_expected)
+            with open(filename_mupy, "wb") as f:
+                f.write(output_mupy)
+            print("### Expected")
+            print(output_expected)
+            print("### Actual")
+            print(output_mupy)
+            print("FAIL ", test_file)
+            failed_tests.append(test_name)
+
+        test_count.add(1)
+
+    if args.list_tests:
+        return True
+
+    if num_threads > 1:
+        pool = ThreadPool(num_threads)
+        pool.map(run_one_test, tests)
+    else:
+        for test in tests:
+            run_one_test(test)
+
+    print("{} tests performed ({} individual testcases)".format(test_count.value, testcase_count.value))
+    print("{} tests passed".format(passed_count.value))
+
+    if len(skipped_tests.value) > 0:
+        print("{} tests skipped: {}".format(len(skipped_tests.value), ' '.join(sorted(skipped_tests.value))))
+    if len(failed_tests.value) > 0:
+        print("{} tests failed: {}".format(len(failed_tests.value), ' '.join(sorted(failed_tests.value))))
+        return False
+
+    # all tests succeeded
+    return True
+
+
+class append_filter(argparse.Action):
+
+    def __init__(self, option_strings, dest, **kwargs):
+        super().__init__(option_strings, dest, default=[], **kwargs)
+
+    def __call__(self, parser, args, value, option):
+        if not hasattr(args, self.dest):
+            args.filters = []
+        if option.startswith(("-e", "--e")):
+            option = "exclude"
+        else:
+            option = "include"
+        args.filters.append((option, re.compile(value)))
+
+
+def main():
+    cmd_parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description='Run and manage tests for MicroPython.',
+        epilog='''\
+Options -i and -e can be multiple and processed in the order given. Regex
+"search" (vs "match") operation is used. An action (include/exclude) of
+the last matching regex is used:
+  run-tests -i async - exclude all, then include tests containg "async" anywhere
+  run-tests -e '/big.+int' - include all, then exclude by regex
+  run-tests -e async -i async_foo - include all, exclude async, yet still include async_foo
+''')
+    cmd_parser.add_argument('--target', default='unix', help='the target platform')
+    cmd_parser.add_argument('--device', default='/dev/ttyACM0', help='the serial device or the IP address of the pyboard')
+    cmd_parser.add_argument('-b', '--baudrate', default=115200, help='the baud rate of the serial device')
+    cmd_parser.add_argument('-u', '--user', default='micro', help='the telnet login username')
+    cmd_parser.add_argument('-p', '--password', default='python', help='the telnet login password')
+    cmd_parser.add_argument('-d', '--test-dirs', nargs='*', help='input test directories (if no files given)')
+    cmd_parser.add_argument('-e', '--exclude', action=append_filter, metavar='REGEX', dest='filters', help='exclude test by regex on path/name.py')
+    cmd_parser.add_argument('-i', '--include', action=append_filter, metavar='REGEX', dest='filters', help='include test by regex on path/name.py')
+    cmd_parser.add_argument('--write-exp', action='store_true', help='save .exp files to run tests w/o CPython')
+    cmd_parser.add_argument('--list-tests', action='store_true', help='list tests instead of running them')
+    cmd_parser.add_argument('--emit', default='bytecode', help='MicroPython emitter to use (bytecode or native)')
+    cmd_parser.add_argument('--heapsize', help='heapsize to use (use default if not specified)')
+    cmd_parser.add_argument('--via-mpy', action='store_true', help='compile .py files to .mpy first')
+    cmd_parser.add_argument('--keep-path', action='store_true', help='do not clear MICROPYPATH when running tests')
+    cmd_parser.add_argument('-j', '--jobs', default=1, metavar='N', type=int, help='Number of tests to run simultaneously')
+    cmd_parser.add_argument('--auto-jobs', action='store_const', dest='jobs', const=multiprocessing.cpu_count(), help='Set the -j values to the CPU (thread) count')
+    cmd_parser.add_argument('files', nargs='*', help='input test files')
+    args = cmd_parser.parse_args()
+
+    EXTERNAL_TARGETS = ('pyboard', 'wipy', 'esp8266', 'esp32', 'minimal')
+    if args.target == 'unix' or args.list_tests:
+        pyb = None
+    elif args.target in EXTERNAL_TARGETS:
+        import pyboard
+        pyb = pyboard.Pyboard(args.device, args.baudrate, args.user, args.password)
+        pyb.enter_raw_repl()
+    else:
+        raise ValueError('target must be either %s or unix' % ", ".join(EXTERNAL_TARGETS))
+
+    if len(args.files) == 0:
+        if args.test_dirs is None:
+            if args.target == 'pyboard':
+                # run pyboard tests
+                test_dirs = ('basics', 'micropython', 'float', 'misc', 'stress', 'extmod', 'pyb', 'pybnative', 'inlineasm')
+            elif args.target in ('esp8266', 'esp32', 'minimal'):
+                test_dirs = ('basics', 'micropython', 'float', 'misc', 'extmod')
+            elif args.target == 'wipy':
+                # run WiPy tests
+                test_dirs = ('basics', 'micropython', 'misc', 'extmod', 'wipy')
+            else:
+                # run PC tests
+                test_dirs = (
+                    'basics', 'micropython', 'float', 'import', 'io', 'misc',
+                    'stress', 'unicode', 'extmod', '../extmod/ulab/tests', 'unix', 'cmdline',
+                )
+        else:
+            # run tests from these directories
+            test_dirs = args.test_dirs
+        tests = sorted(test_file for test_files in (glob('{}/*.py'.format(dir)) for dir in test_dirs) for test_file in test_files)
+    else:
+        # tests explicitly given
+        tests = args.files
+
+    if not args.keep_path:
+        # clear search path to make sure tests use only builtin modules
+        os.environ['MICROPYPATH'] = ''
+
+    # Even if we run completely different tests in a different directory,
+    # we need to access feature_check's from the same directory as the
+    # run-tests script itself.
+    base_path = os.path.dirname(sys.argv[0]) or "."
+    try:
+        res = run_tests(pyb, tests, args, base_path, args.jobs)
+    finally:
+        if pyb:
+            pyb.close()
+
+    if not res:
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/tulip/shared/ulab/snippets/json_to_ndarray.py b/tulip/shared/ulab/snippets/json_to_ndarray.py
new file mode 100644
index 000000000..eaeb7f830
--- /dev/null
+++ b/tulip/shared/ulab/snippets/json_to_ndarray.py
@@ -0,0 +1,85 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Zoltán Vörös
+
+import sys
+
+use_ulab = False
+
+try:
+    from ubinascii import a2b_base64 as b64decode
+    from ubinascii import unhexlify
+    import ujson as json
+    from ulab import numpy as np
+    use_ulab = True
+except:
+    from base64 import b64decode
+    import json
+    import numpy as np
+    from numpy.lib.format import descr_to_dtype
+
+def ulab_descr_to_dtype(descriptor):
+    descriptor = descriptor[1:]
+
+    if descriptor == 'u1':
+        return np.uint8
+    elif descriptor == 'i1':
+        return np.int8
+    if descriptor == 'u2':
+        return np.uint16
+    if descriptor == 'i2':
+        return np.int16
+    elif descriptor == 'f8':
+        if np.float != ord('d'):
+            raise TypeError('doubles are not supported')
+        else:
+            return np.float
+    elif descriptor == 'f16':
+        if np.float != ord('f'):
+            raise TypeError('')
+        else:
+            return np.float
+    else:
+        raise TypeError('descriptor could not be decoded')
+
+
+def json_to_ndarray(json_string, b64=True):
+    """
+    Turn a json string into an ndarray
+    The string must be the representation of a dictionary with the three keys
+
+    - dtype: a valid numpy dtype string (one of |u1, |i1, <u2, <i2, <f4, <f8, <c8, <c16, >u2, >i2, >f4, >f8, >c8, >c16)
+    - array: the hexified, or base64-encoded raw data array
+    - shape: the shape of the array (a list or tuple of integers)
+
+    Usage:
+        str = '{"dtype": "<f8", "array": "AAAAAAAAAAAAAAAAAADwPwAAAAAAAABAAAAAAAAACEAAAAAAAAAQQAAAAAAAABRAAAAAAAAAGEAAAAAAAAAcQAAAAAAAACBA\n", "shape": [3, 3]}'
+        json_to_ndarray(str, b64=True)
+    """
+    obj = json.loads(json_string)
+
+    if not isinstance(obj, dict):
+         raise TypeError('input argument must be a dictionary')
+    if set(obj.keys()) != {'array', 'dtype', 'shape'}:
+        raise ValueError('input must have the keys "array", "dtype", "shape"')
+
+    descriptor = obj['dtype']
+    if use_ulab:
+        dtype = ulab_descr_to_dtype(descriptor)
+    else:
+        dtype = descr_to_dtype(descriptor)
+
+    if not b64:
+        data = unhexlify(obj['array'])
+    else:
+        data = b64decode(obj['array'])
+
+    ndarray = np.frombuffer(data, dtype=dtype).reshape(tuple(obj['shape']))
+
+    if dtype in (np.uint16, np.int16, np.float):
+        if sys.byteorder != descriptor[1]:
+            ndarray.byteswap()
+
+    return ndarray
diff --git a/tulip/shared/ulab/snippets/ndarray_to_json.py b/tulip/shared/ulab/snippets/ndarray_to_json.py
new file mode 100644
index 000000000..908c637e6
--- /dev/null
+++ b/tulip/shared/ulab/snippets/ndarray_to_json.py
@@ -0,0 +1,74 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Zoltán Vörös
+
+import sys
+
+use_ulab = False
+
+try:
+    from ubinascii import b2a_base64 as b64encode
+    from ubinascii import hexlify
+    import ujson as json
+    from ulab import numpy as np
+    use_ulab = True
+except:
+    from base64 import b64encode
+    import json
+    import numpy as np
+    from numpy.lib.format import dtype_to_descr
+
+def ulab_dtype_to_descr(dtype):
+    desc = '>'
+    if sys.byteorder == 'little':
+        desc = '<'
+
+    if dtype == ord('B'):
+        desc = '|u1'
+    elif dtype == ord('b'):
+        desc = '|i1'
+    elif dtype == ord('H'):
+        desc = desc + 'u2'
+    elif dtype == ord('h'):
+        desc = desc + 'i2'
+    elif dtype == ord('d'):
+        desc = desc + 'f8'
+    elif dtype == ord('f'):
+        desc = desc + 'f4'
+    elif dtype == ord('c'):
+        desc = desc + 'c16'
+        if np.array([1], dtype=np.float).itemsize == 4:
+            desc = desc + 'c8'
+
+    return desc
+
+def ndarray_to_json(obj, b64=True):
+    """
+    Turn an ndarray into a json string, using either base64 encoding or hexify
+    Returns a serialised dictionary with three keys:
+
+    - dtype: a valid numpy dtype string (one of |u1, |i1, <u2, <i2, <f4, <f8, <c8, <c16, >u2, >i2, >f4, >f8, >c8, >c16)
+    - array: the hexified, or base64-encoded raw data array
+    - shape: the shape of the array (a list or tuple of integers)
+
+    Usage:
+        ndarray = np.array([1, 2, 3], dtype=np.uint8)
+        ndarray_to_json(ndarray, b64=True)
+    """
+
+    if not isinstance(obj, np.ndarray):
+         raise TypeError('input argument must be an ndarray')
+
+    if use_ulab:
+        dtype_desciptor = ulab_dtype_to_descr(obj.dtype)
+    else:
+        dtype_desciptor = dtype_to_descr(obj.dtype)
+
+    if not b64:
+        data = hexlify(obj.tobytes())
+    else:
+        data = b64encode(obj.tobytes())
+
+    return json.dumps({'array': data, 'dtype': dtype_desciptor, 'shape': obj.shape})
diff --git a/tulip/shared/ulab/snippets/numpy/__init__.py b/tulip/shared/ulab/snippets/numpy/__init__.py
new file mode 100644
index 000000000..84779d4d0
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/__init__.py
@@ -0,0 +1,5 @@
+
+from . import core
+from .core import *
+from . import lib
+from .lib import *
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/core/__init__.py b/tulip/shared/ulab/snippets/numpy/core/__init__.py
new file mode 100644
index 000000000..3a64f5a8e
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/core/__init__.py
@@ -0,0 +1,5 @@
+
+from .multiarray import *
+from .numeric import *
+from .fromnumeric import *
+from .shape_base import *
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/core/fromnumeric.py b/tulip/shared/ulab/snippets/numpy/core/fromnumeric.py
new file mode 100644
index 000000000..0a078764b
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/core/fromnumeric.py
@@ -0,0 +1,83 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+from .overrides import set_module
+from .multiarray import asarray
+from ulab import numpy as np
+from ... import numpy
+
+def prod(arr):
+    result = 1
+    for x in arr:
+        result = result * x
+    return result
+
+def size(a, axis=None):
+    """
+    Return the number of elements along a given axis.
+    Parameters
+    ----------
+    a : array_like
+        Input data.
+    axis : int, optional
+        Axis along which the elements are counted.  By default, give
+        the total number of elements.
+    Returns
+    -------
+    element_count : int
+        Number of elements along the specified axis.
+    See Also
+    --------
+    shape : dimensions of array
+    ndarray.shape : dimensions of array
+    ndarray.size : number of elements in array
+    Examples
+    --------
+    >>> a = np.array([[1,2,3],[4,5,6]])
+    >>> np.size(a)
+    6
+    >>> np.size(a,1)
+    3
+    >>> np.size(a,0)
+    2
+    """
+    if axis is None:
+        try:
+            return a.size
+        except AttributeError:
+            return asarray(a).size
+    else:
+        try:
+            return a.shape[axis]
+        except AttributeError:
+            return asarray(a).shape[axis]
+
+def nonzero(a):
+    if not isinstance(a,(np.ndarray)):
+        a = asarray(a)
+    x = a.shape
+    row = x[0]
+    if len(x) == 1:
+       column = 0
+    else:
+       column = x[1]
+
+    nonzero_row = np.array([],dtype=np.float)
+    nonzero_col = np.array([],dtype=np.float)
+
+    if column == 0:
+        for i in range(0,row):
+           if a[i] != 0:
+             nonzero_row = numpy.append(nonzero_row,i)
+        return (np.array(nonzero_row, dtype=np.int8),)
+
+    for i in range(0,row):
+        for j in range(0,column):
+            if a[i,j] != 0:
+                nonzero_row = numpy.append(nonzero_row,i)
+                nonzero_col = numpy.append(nonzero_col,j)
+                
+    return (np.array(nonzero_row, dtype=np.int8), np.array(nonzero_col, dtype=np.int8))
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/core/multiarray.py b/tulip/shared/ulab/snippets/numpy/core/multiarray.py
new file mode 100644
index 000000000..0cf0606a7
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/core/multiarray.py
@@ -0,0 +1,27 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+from ulab import numpy as np
+
+def asarray(a, dtype=None):
+    if isinstance(a,(np.ndarray)):
+        return a
+    try:
+        if dtype is not None:
+          a = np.array([a], dtype=dtype)
+        elif isinstance(a, list) or isinstance(a, tuple):
+          a = np.array(a)
+        else:
+          a = np.array([a])
+        return a
+    except Exception as e:
+        if "can't convert complex to float" in e.args or "'complex' object isn't iterable" in e.args:
+          try:
+             a = np.array([a], dtype=np.complex).flatten()
+             return a
+          except:
+              pass
+        raise ValueError('Could not cast %s to array' % (a))
diff --git a/tulip/shared/ulab/snippets/numpy/core/numeric.py b/tulip/shared/ulab/snippets/numpy/core/numeric.py
new file mode 100644
index 000000000..71e18b690
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/core/numeric.py
@@ -0,0 +1,65 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+from ulab import numpy as np
+from .multiarray import (asarray)
+
+def zeros_like(a, dtype=None, order='K', subok=True, shape=None):
+    """
+    Return an array of zeros with the same shape and type as a given array.
+    Parameters
+    ----------
+    a : array_like
+        The shape and data-type of `a` define these same attributes of
+        the returned array.
+    dtype : data-type, optional
+        Overrides the data type of the result.
+        .. versionadded:: 1.6.0
+    order : {'C', 'F', 'A', or 'K'}, optional
+        Overrides the memory layout of the result. 'C' means C-order,
+        'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous,
+        'C' otherwise. 'K' means match the layout of `a` as closely
+        as possible.
+        .. versionadded:: 1.6.0
+    subok : bool, optional.
+        If True, then the newly created array will use the sub-class
+        type of `a`, otherwise it will be a base-class array. Defaults
+        to True.
+    shape : int or sequence of ints, optional.
+        Overrides the shape of the result. If order='K' and the number of
+        dimensions is unchanged, will try to keep order, otherwise,
+        order='C' is implied.
+        .. versionadded:: 1.17.0
+    Returns
+    -------
+    out : ndarray
+        Array of zeros with the same shape and type as `a`.
+    See Also
+    --------
+    empty_like : Return an empty array with shape and type of input.
+    ones_like : Return an array of ones with shape and type of input.
+    full_like : Return a new array with shape of input filled with value.
+    zeros : Return a new array setting values to zero.
+    Examples
+    --------
+    >>> x = np.arange(6)
+    >>> x = x.reshape((2, 3))
+    >>> x
+    array([[0, 1, 2],
+           [3, 4, 5]])
+    >>> np.zeros_like(x)
+    array([[0, 0, 0],
+           [0, 0, 0]])
+    >>> y = np.arange(3, dtype=float)
+    >>> y
+    array([0., 1., 2.])
+    >>> np.zeros_like(y)
+    array([0.,  0.,  0.])
+    """
+
+    res = np.full(a.shape, 0, dtype=a.dtype)
+    return res
+
diff --git a/tulip/shared/ulab/snippets/numpy/core/overrides.py b/tulip/shared/ulab/snippets/numpy/core/overrides.py
new file mode 100644
index 000000000..25ebe3419
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/core/overrides.py
@@ -0,0 +1,22 @@
+
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+import sys
+
+def set_module(module):
+    """Decorator for overriding __module__ on a function or class.
+    Example usage::
+        @set_module('numpy')
+        def example():
+            pass
+        assert example.__module__ == 'numpy'
+    """
+    def decorator(func):
+        if module is not None:
+            sys.modules[func.__globals__['__name__']] = module
+        return func
+    return decorator
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/core/shape_base.py b/tulip/shared/ulab/snippets/numpy/core/shape_base.py
new file mode 100644
index 000000000..2ac0ebafb
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/core/shape_base.py
@@ -0,0 +1,22 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+from ulab import numpy as np
+from .multiarray import asarray
+
+def atleast_1d(*arys):
+    res = []
+    for ary in arys:
+        ary = asarray(ary)
+        if not isinstance(ary,(np.ndarray)):
+            result = ary.reshape((1,))
+        else:
+            result = ary
+        res.append(result)
+    if len(res) == 1:
+        return res[0]
+    else:
+        return res
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/lib/__init__.py b/tulip/shared/ulab/snippets/numpy/lib/__init__.py
new file mode 100644
index 000000000..698d29fb5
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/lib/__init__.py
@@ -0,0 +1,5 @@
+
+from .function_base import *
+from .polynomial import *
+from .type_check import *
+from .block import *
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/lib/block.py b/tulip/shared/ulab/snippets/numpy/lib/block.py
new file mode 100644
index 000000000..eacacc1fb
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/lib/block.py
@@ -0,0 +1,17 @@
+from ulab.numpy import zeros
+
+def block(S):
+    w = sum([len(m[0]) for m in S[0]])
+    h = sum([len(row[0]) for row in S])
+    M = zeros((h, w))
+    i = 0
+    j = 0
+    for row in S:
+        di = len(row[0])
+        for matrix in row:
+            dj = len(matrix[0])
+            M[i:i + di, j:j + dj] = matrix
+            j += dj
+        i += di
+        j = 0
+    return M
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/lib/function_base.py b/tulip/shared/ulab/snippets/numpy/lib/function_base.py
new file mode 100644
index 000000000..66797fd6d
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/lib/function_base.py
@@ -0,0 +1,20 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+from ulab import numpy as np
+from ..core.multiarray import (asarray)
+from ..core.overrides import set_module
+
+@set_module('numpy')
+def append(arr, values, axis=None):
+    arr = asarray(arr)
+    values = asarray(values)
+    if axis is None:
+        if len(arr.shape) != 1:
+            arr = arr.flatten()
+        values = values.flatten()
+        axis = len(arr.shape)-1
+    return np.concatenate((arr, values), axis=axis)
diff --git a/tulip/shared/ulab/snippets/numpy/lib/polynomial.py b/tulip/shared/ulab/snippets/numpy/lib/polynomial.py
new file mode 100644
index 000000000..3fd9f4b44
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/lib/polynomial.py
@@ -0,0 +1,42 @@
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+from ..core import (atleast_1d, asarray)
+from ..core.overrides import set_module
+from ulab import numpy as np
+
+@set_module('numpy')
+def poly(seq_of_zeros):
+    seq_of_zeros = atleast_1d(seq_of_zeros)
+    sh = seq_of_zeros.shape
+    
+    if len(sh) == 2 and sh[0] == sh[1] and sh[0] != 0:
+        seq_of_zeros = np.linalg.eig(seq_of_zeros)
+    elif len(sh) == 1:
+        dt = seq_of_zeros.dtype
+        # Let object arrays slip through, e.g. for arbitrary precision
+        if dt != object:
+            seq_of_zeros = seq_of_zeros #seq_of_zeros.astype(mintypecode(dt.char))
+    else:
+        raise ValueError("input must be 1d or non-empty square 2d array.")
+
+    if len(seq_of_zeros) == 0:
+        return 1.0
+    dt = seq_of_zeros.dtype
+    a = np.ones((1,), dtype=dt)
+
+    for k in range(len(seq_of_zeros)):
+        a = np.convolve(a, np.array([1, -seq_of_zeros[k]], dtype=dt))
+
+    if a.dtype == np.complex:
+        # if complex roots are all complex conjugates, the roots are real.
+        roots = asarray(seq_of_zeros, complex)
+        p = np.sort_complex(roots)
+        c = np.real(p) - np.imag(p) * 1j
+        q = np.sort_complex(c)
+        if np.all(p == q):
+            a = a.real.copy()
+    return a
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/numpy/lib/type_check.py b/tulip/shared/ulab/snippets/numpy/lib/type_check.py
new file mode 100644
index 000000000..2f28095b9
--- /dev/null
+++ b/tulip/shared/ulab/snippets/numpy/lib/type_check.py
@@ -0,0 +1,70 @@
+
+from ulab import numpy as np
+from ..core.multiarray import (asarray)
+from ..core.overrides import set_module
+
+@set_module('numpy')
+
+# This file is part of the micropython-ulab project, https://github.com/v923z/micropython-ulab
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2022 Phil Jepsen
+
+def _isreal(a):
+    result = []
+    for x in a:
+        if isinstance(x, float):
+          result.append(True)
+        elif isinstance(x, complex) and x.imag == 0:
+              result.append(True)
+        else:
+          result.append(False)
+    return result
+
+def isreal(x):
+    """
+    Returns a bool array, where True if input element is real.
+    If element has complex type with zero complex part, the return value
+    for that element is True.
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    Returns
+    -------
+    out : ndarray, bool
+        Boolean array of same shape as `x`.
+    Notes
+    -----
+    `isreal` may behave unexpectedly for string or object arrays (see examples)
+    See Also
+    --------
+    iscomplex
+    isrealobj : Return True if x is not a complex type.
+    Examples
+    --------
+    >>> a = np.array([1+1j, 1+0j, 4.5, 3, 2, 2j], dtype=complex)
+    >>> np.isreal(a)
+    array([False,  True,  True,  True,  True, False])
+
+    The function does not work on string arrays.
+    >>> a = np.array([2j, "a"], dtype="U")
+    >>> np.isreal(a)  # Warns about non-elementwise comparison
+    False
+
+    Returns True for all elements in input array of ``dtype=object`` even if
+    any of the elements is complex.
+    >>> a = np.array([1, "2", 3+4j], dtype=object)
+    >>> np.isreal(a)
+    array([ True,  True,  True])
+
+    isreal should not be used with object arrays
+
+    >>> a = np.array([1+2j, 2+1j], dtype=object)
+    >>> np.isreal(a)
+    array([ True,  True])
+    """
+    x = asarray(x)
+    result =  _isreal(x)
+    return result if len(result) > 1 else result[0]
diff --git a/tulip/shared/ulab/snippets/rclass.py b/tulip/shared/ulab/snippets/rclass.py
new file mode 100644
index 000000000..cb95021a1
--- /dev/null
+++ b/tulip/shared/ulab/snippets/rclass.py
@@ -0,0 +1,75 @@
+from typing import List, Tuple, Union  # upip.install("pycopy-typing")
+from ulab import numpy as np
+
+_DType = int
+_RClassKeyType = Union[slice, int, float, list, tuple, np.ndarray]
+
+# this is a stripped down version of RClass (used by np.r_[...etc])
+# it doesn't include support for string arguments as the first index element
+class RClass:
+
+    def __getitem__(self, key: Union[_RClassKeyType, Tuple[_RClassKeyType, ...]]):
+
+        if not isinstance(key, tuple):
+            key = (key,)
+
+        objs: List[np.ndarray] = []
+        scalars: List[int] = []
+        arraytypes: List[_DType] = []
+        scalartypes: List[_DType] = []
+
+        # these may get overridden in following loop
+        axis = 0
+
+        for idx, item in enumerate(key):
+            scalar = False
+
+            try:
+                if isinstance(item, np.ndarray):
+                    newobj = item
+
+                elif isinstance(item, slice):
+                    step = item.step
+                    start = item.start
+                    stop = item.stop
+                    if start is None:
+                        start = 0
+                    if step is None:
+                        step = 1
+                    if isinstance(step, complex):
+                        size = int(abs(step))
+                        newobj: np.ndarray = np.linspace(start, stop, num=size)
+                    else:
+                        newobj = np.arange(start, stop, step)
+
+                # if is number
+                elif isinstance(item, (int, float, bool)):
+                    newobj = np.array([item])
+                    scalars.append(len(objs))
+                    scalar = True
+                    scalartypes.append(newobj.dtype())
+                    
+                else:
+                    newobj = np.array(item)
+                    
+            except TypeError:
+                raise Exception("index object %s of type %s is not supported by r_[]" % (
+                    str(item), type(item)))
+
+            objs.append(newobj)
+            if not scalar and isinstance(newobj, np.ndarray):
+                arraytypes.append(newobj.dtype())
+
+        # Ensure that scalars won't up-cast unless warranted
+        final_dtype = min(arraytypes + scalartypes)
+        for idx, obj in enumerate(objs):
+            if obj.dtype != final_dtype:
+                objs[idx] = np.array(objs[idx], dtype=final_dtype)
+
+        return np.concatenate(tuple(objs), axis=axis)
+
+    # this seems weird - not sure what it's for
+    def __len__(self):
+        return 0
+        
+r_ = RClass()
diff --git a/tulip/shared/ulab/snippets/scipy/__init__.py b/tulip/shared/ulab/snippets/scipy/__init__.py
new file mode 100644
index 000000000..f2de8d89f
--- /dev/null
+++ b/tulip/shared/ulab/snippets/scipy/__init__.py
@@ -0,0 +1,3 @@
+
+from . import signal
+from .signal import *
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/scipy/signal/__init__.py b/tulip/shared/ulab/snippets/scipy/signal/__init__.py
new file mode 100644
index 000000000..776f79d40
--- /dev/null
+++ b/tulip/shared/ulab/snippets/scipy/signal/__init__.py
@@ -0,0 +1,2 @@
+
+from .filter_design import *
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/scipy/signal/filter_design.py b/tulip/shared/ulab/snippets/scipy/signal/filter_design.py
new file mode 100644
index 000000000..840b4a44a
--- /dev/null
+++ b/tulip/shared/ulab/snippets/scipy/signal/filter_design.py
@@ -0,0 +1,1479 @@
+"""Filter design."""
+import math
+from ulab import numpy
+from ulab import numpy as np
+from ulab import scipy as spy
+
+from ...numpy import (atleast_1d, poly, asarray, prod, size, append, nonzero, zeros_like, isreal)
+
+def butter(N, Wn, btype='low', analog=False, output='ba', fs=None):
+    """
+    Butterworth digital and analog filter design.
+
+    Design an Nth-order digital or analog Butterworth filter and return
+    the filter coefficients.
+
+    Parameters
+    ----------
+    N : int
+        The order of the filter.
+    Wn : array_like
+        The critical frequency or frequencies. For lowpass and highpass
+        filters, Wn is a scalar; for bandpass and bandstop filters,
+        Wn is a length-2 sequence.
+
+        For a Butterworth filter, this is the point at which the gain
+        drops to 1/sqrt(2) that of the passband (the "-3 dB point").
+
+        For digital filters, `Wn` are in the same units as `fs`.  By default,
+        `fs` is 2 half-cycles/sample, so these are normalized from 0 to 1,
+        where 1 is the Nyquist frequency. (`Wn` is thus in
+        half-cycles / sample.)
+
+        For analog filters, `Wn` is an angular frequency (e.g. rad/s).
+        btype : {'lowpass', 'highpass', 'bandpass', 'bandstop'}, optional
+        The type of filter.  Default is 'lowpass'.
+        analog : bool, optional
+        When True, return an analog filter, otherwise a digital filter is
+        returned.
+        output : {'ba', 'zpk', 'sos'}, optional
+        Type of output:  numerator/denominator ('ba'), pole-zero ('zpk'), or
+        second-order sections ('sos'). Default is 'ba' for backwards
+        compatibility, but 'sos' should be used for general-purpose filtering.
+    fs : float, optional
+        The sampling frequency of the digital system.
+
+        .. versionadded:: 1.2.0
+
+    Returns
+    -------
+    b, a : ndarray, ndarray
+        Numerator (`b`) and denominator (`a`) polynomials of the IIR filter.
+        Only returned if ``output='ba'``.
+    z, p, k : ndarray, ndarray, float
+        Zeros, poles, and system gain of the IIR filter transfer
+        function.  Only returned if ``output='zpk'``.
+    sos : ndarray
+        Second-order sections representation of the IIR filter.
+        Only returned if ``output=='sos'``.
+
+    See Also
+    --------
+    buttord, buttap
+
+    Notes
+    -----
+    The Butterworth filter has maximally flat frequency response in the
+    passband.
+
+    The ``'sos'`` output parameter was added in 0.16.0.
+
+    If the transfer function form ``[b, a]`` is requested, numerical
+    problems can occur since the conversion between roots and
+    the polynomial coefficients is a numerically sensitive operation,
+    even for N >= 4. It is recommended to work with the SOS
+    representation.
+
+    Examples
+    --------
+    Design an analog filter and plot its frequency response, showing the
+    critical points:
+
+    >>> from scipy import signal
+    >>> import matplotlib.pyplot as plt
+
+    >>> b, a = signal.butter(4, 100, 'low', analog=True)
+    >>> w, h = signal.freqs(b, a)
+    >>> plt.semilogx(w, 20 * np.log10(abs(h)))
+    >>> plt.title('Butterworth filter frequency response')
+    >>> plt.xlabel('Frequency [radians / second]')
+    >>> plt.ylabel('Amplitude [dB]')
+    >>> plt.margins(0, 0.1)
+    >>> plt.grid(which='both', axis='both')
+    >>> plt.axvline(100, color='green') # cutoff frequency
+    >>> plt.show()
+
+    Generate a signal made up of 10 Hz and 20 Hz, sampled at 1 kHz
+
+    >>> t = np.linspace(0, 1, 1000, False)  # 1 second
+    >>> sig = np.sin(2*np.pi*10*t) + np.sin(2*np.pi*20*t)
+    >>> fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
+    >>> ax1.plot(t, sig)
+    >>> ax1.set_title('10 Hz and 20 Hz sinusoids')
+    >>> ax1.axis([0, 1, -2, 2])
+
+    Design a digital high-pass filter at 15 Hz to remove the 10 Hz tone, and
+    apply it to the signal. (It's recommended to use second-order sections
+    format when filtering, to avoid numerical error with transfer function
+    (``ba``) format):
+
+    >>> sos = signal.butter(10, 15, 'hp', fs=1000, output='sos')
+    >>> filtered = signal.sosfilt(sos, sig)
+    >>> ax2.plot(t, filtered)
+    >>> ax2.set_title('After 15 Hz high-pass filter')
+    >>> ax2.axis([0, 1, -2, 2])
+    >>> ax2.set_xlabel('Time [seconds]')
+    >>> plt.tight_layout()
+    >>> plt.show()
+    """
+    return iirfilter(N, Wn, btype=btype, analog=analog,
+                     output=output, ftype='butter', fs=fs)
+
+def iirfilter(N, Wn, rp=None, rs=None, btype='band', analog=False,
+              ftype='butter', output='ba', fs=None):
+    """
+    IIR digital and analog filter design given order and critical points.
+
+    Design an Nth-order digital or analog filter and return the filter
+    coefficients.
+
+    Parameters
+    ----------
+    N : int
+        The order of the filter.
+    Wn : array_like
+        A scalar or length-2 sequence giving the critical frequencies.
+
+        For digital filters, `Wn` are in the same units as `fs`. By default,
+        `fs` is 2 half-cycles/sample, so these are normalized from 0 to 1,
+        where 1 is the Nyquist frequency. (`Wn` is thus in
+        half-cycles / sample.)
+
+        For analog filters, `Wn` is an angular frequency (e.g., rad/s).
+    rp : float, optional
+        For Chebyshev and elliptic filters, provides the maximum ripple
+        in the passband. (dB)
+    rs : float, optional
+        For Chebyshev and elliptic filters, provides the minimum attenuation
+        in the stop band. (dB)
+    btype : {'bandpass', 'lowpass', 'highpass', 'bandstop'}, optional
+        The type of filter.  Default is 'bandpass'.
+    analog : bool, optional
+        When True, return an analog filter, otherwise a digital filter is
+        returned.
+    ftype : str, optional
+        The type of IIR filter to design:
+
+            - Butterworth   : 'butter'
+            - Chebyshev I   : 'cheby1'
+            - Chebyshev II  : 'cheby2'
+            - Cauer/elliptic: 'ellip'
+            - Bessel/Thomson: 'bessel'
+
+    output : {'ba', 'zpk', 'sos'}, optional
+        Filter form of the output:
+
+            - second-order sections (recommended): 'sos'
+            - numerator/denominator (default)    : 'ba'
+            - pole-zero                          : 'zpk'
+
+        In general the second-order sections ('sos') form  is
+        recommended because inferring the coefficients for the
+        numerator/denominator form ('ba') suffers from numerical
+        instabilities. For reasons of backward compatibility the default
+        form is the numerator/denominator form ('ba'), where the 'b'
+        and the 'a' in 'ba' refer to the commonly used names of the
+        coefficients used.
+
+        Note: Using the second-order sections form ('sos') is sometimes
+        associated with additional computational costs: for
+        data-intense use cases it is therefore recommended to also
+        investigate the numerator/denominator form ('ba').
+
+    fs : float, optional
+        The sampling frequency of the digital system.
+
+        .. versionadded:: 1.2.0
+
+    Returns
+    -------
+    b, a : ndarray, ndarray
+        Numerator (`b`) and denominator (`a`) polynomials of the IIR filter.
+        Only returned if ``output='ba'``.
+    z, p, k : ndarray, ndarray, float
+        Zeros, poles, and system gain of the IIR filter transfer
+        function.  Only returned if ``output='zpk'``.
+    sos : ndarray
+        Second-order sections representation of the IIR filter.
+        Only returned if ``output=='sos'``.
+
+    See Also
+    --------
+    butter : Filter design using order and critical points
+    cheby1, cheby2, ellip, bessel
+    buttord : Find order and critical points from passband and stopband spec
+    cheb1ord, cheb2ord, ellipord
+    iirdesign : General filter design using passband and stopband spec
+
+    Notes
+    -----
+    The ``'sos'`` output parameter was added in 0.16.0.
+
+    Examples
+    --------
+    Generate a 17th-order Chebyshev II analog bandpass filter from 50 Hz to
+    200 Hz and plot the frequency response:
+
+    >>> from scipy import signal
+    >>> import matplotlib.pyplot as plt
+
+    >>> b, a = signal.iirfilter(17, [2*np.pi*50, 2*np.pi*200], rs=60,
+    ...                         btype='band', analog=True, ftype='cheby2')
+    >>> w, h = signal.freqs(b, a, 1000)
+    >>> fig = plt.figure()
+    >>> ax = fig.add_subplot(1, 1, 1)
+    >>> ax.semilogx(w / (2*np.pi), 20 * np.log10(np.maximum(abs(h), 1e-5)))
+    >>> ax.set_title('Chebyshev Type II bandpass frequency response')
+    >>> ax.set_xlabel('Frequency [Hz]')
+    >>> ax.set_ylabel('Amplitude [dB]')
+    >>> ax.axis((10, 1000, -100, 10))
+    >>> ax.grid(which='both', axis='both')
+    >>> plt.show()
+
+    Create a digital filter with the same properties, in a system with
+    sampling rate of 2000 Hz, and plot the frequency response. (Second-order
+    sections implementation is required to ensure stability of a filter of
+    this order):
+
+    >>> sos = signal.iirfilter(17, [50, 200], rs=60, btype='band',
+    ...                        analog=False, ftype='cheby2', fs=2000,
+    ...                        output='sos')
+    >>> w, h = signal.sosfreqz(sos, 2000, fs=2000)
+    >>> fig = plt.figure()
+    >>> ax = fig.add_subplot(1, 1, 1)
+    >>> ax.semilogx(w, 20 * np.log10(np.maximum(abs(h), 1e-5)))
+    >>> ax.set_title('Chebyshev Type II bandpass frequency response')
+    >>> ax.set_xlabel('Frequency [Hz]')
+    >>> ax.set_ylabel('Amplitude [dB]')
+    >>> ax.axis((10, 1000, -100, 10))
+    >>> ax.grid(which='both', axis='both')
+    >>> plt.show()
+
+    """
+    ftype, btype, output = [x.lower() for x in (ftype, btype, output)]
+    Wn = asarray(Wn)
+    if fs is not None:
+        if analog:
+            raise ValueError("fs cannot be specified for an analog filter")
+        Wn = 2*Wn/fs
+
+    try:
+        btype = band_dict[btype]
+    except KeyError as e:
+        raise ValueError("'%s' is an invalid bandtype for filter." % btype) from e
+
+    try:
+        typefunc = filter_dict[ftype][0]
+    except KeyError as e:
+        raise ValueError("'%s' is not a valid basic IIR filter." % ftype) from e
+
+    if output not in ['ba', 'zpk', 'sos']:
+        raise ValueError("'%s' is not a valid output form." % output)
+
+    if rp is not None and rp < 0:
+        raise ValueError("passband ripple (rp) must be positive")
+
+    if rs is not None and rs < 0:
+        raise ValueError("stopband attenuation (rs) must be positive")
+
+    # Get analog lowpass prototype
+    if typefunc == buttap:
+        z, p, k = typefunc(N)
+    else:
+        raise NotImplementedError("'%s' not implemented in iirfilter." % ftype)
+
+    # Pre-warp frequencies for digital filter design
+    if not analog:
+        if numpy.any(Wn <= 0) or numpy.any(Wn >= 1):
+            if fs is not None:
+                raise ValueError("Digital filter critical frequencies "
+                                 "must be 0 < Wn < fs/2 (fs={} -> fs/2={})".format(fs, fs/2))
+            raise ValueError("Digital filter critical frequencies "
+                             "must be 0 < Wn < 1")
+        fs = 2.0
+        b = []
+        for x in Wn:
+            b.append(2 * fs * math.tan(np.pi * x / fs))
+        warped = np.array(b)
+    else:
+        warped = Wn
+
+    # transform to lowpass, bandpass, highpass, or bandstop
+    if btype in ('lowpass', 'highpass'):
+        if size(Wn) != 1:
+            raise ValueError('Must specify a single critical frequency Wn for lowpass or highpass filter')
+
+        if btype == 'lowpass':
+            z, p, k = lp2lp_zpk(z, p, k, wo=warped[0])
+        elif btype == 'highpass':
+            z, p, k = lp2hp_zpk(z, p, k, wo=warped[0])
+    elif btype in ('bandpass', 'bandstop'):
+        try:
+            bw = warped[1] - warped[0]
+            wo = math.sqrt(warped[0] * warped[1])
+        except IndexError as e:
+            raise ValueError('Wn must specify start and stop frequencies for bandpass or bandstop '
+                             'filter') from e
+
+        if btype == 'bandpass':
+            z, p, k = lp2bp_zpk(z, p, k, wo=wo, bw=bw)
+        elif btype == 'bandstop':
+            z, p, k = lp2bs_zpk(z, p, k, wo=wo, bw=bw)
+    else:
+        raise NotImplementedError("'%s' not implemented in iirfilter." % btype)
+    # Find discrete equivalent if necessary
+    if not analog:
+        z, p, k = bilinear_zpk(z, p, k, fs=fs)
+
+    # Transform to proper out type (pole-zero, state-space, numer-denom)
+    if output == 'zpk':
+        return z, p, k
+    elif output == 'ba':
+        return zpk2tf(z, p, k)
+    elif output == 'sos':
+        return zpk2sos(z, p, k)
+
+def zpk2tf(z, p, k):
+    """
+    Return polynomial transfer function representation from zeros and poles
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the transfer function.
+    p : array_like
+        Poles of the transfer function.
+    k : float
+        System gain.
+
+    Returns
+    -------
+    b : ndarray
+        Numerator polynomial coefficients.
+    a : ndarray
+        Denominator polynomial coefficients.
+
+    """
+    z = atleast_1d(z)
+    k = atleast_1d(k)
+    if len(z.shape) > 1:
+        temp = poly(z[0])
+        b = np.empty((z.shape[0], z.shape[1] + 1), temp.dtype.char)
+        if len(k) == 1:
+            k = [k[0]] * z.shape[0]
+        for i in range(z.shape[0]):
+            b[i] = k[i] * poly(z[i])
+    else:
+        b = k * poly(z)
+    a = atleast_1d(poly(p))
+    # Use real output if possible. Copied from numpy.np.poly, since
+    # we can't depend on a specific version of numpy.
+    if b.dtype == np.complex:
+        # if complex roots are all complex conjugates, the roots are real.
+        roots = asarray(z, complex)
+        pos_roots = np.compress(roots.imag > 0, roots)
+        neg_roots = np.conjugate(np.compress(roots.imag < 0, roots))
+        if len(pos_roots) > 0 and len(pos_roots) == len(neg_roots):
+            p = np.sort_complex(neg_roots)
+            q = np.sort_complex(pos_roots)
+            if np.all(p == q):
+                b = b.real.copy()
+
+    if a.dtype == np.complex:
+        # if complex roots are all complex conjugates, the roots are real.
+        roots = asarray(p, complex)
+        pos_roots = np.compress(roots.imag > 0, roots)
+        neg_roots = np.conjugate(np.compress(roots.imag < 0, roots))
+        if len(pos_roots) > 0 and len(pos_roots) == len(neg_roots):
+            p = np.sort_complex(neg_roots)
+            q = np.sort_complex(pos_roots)
+            if np.all(p == q):
+                a = a.real.copy()
+
+    return b, a
+
+def _to_tuple(a):
+    result = []
+    for x in a:
+       result.append([x.real, x.imag])
+    return result
+
+def _to_complex(a):
+    result = np.array([], dtype=np.complex)
+    for x in a:
+       t = np.array([complex(x[0] + x[1] * 1j)], dtype=np.complex)
+       result = np.concatenate((result, t), axis=0)
+    return result
+
+def lexsort(z):
+   z = _to_tuple(z)
+   return sorted(range(len(z)), key=lambda i: (z[i][0],abs(z[i][1])))
+
+def _lexsort(z):
+   z = _to_tuple(z)
+   z = sorted(z,key=lambda x:(x[0], abs(x[1])))
+   return _to_complex(z)
+
+def _cplxreal(z, tol=None):
+    """
+    Split into complex and real parts, combining conjugate pairs.
+    The 1-D input vector `z` is split up into its complex (`zc`) and real (`zr`)
+    elements. Every complex element must be part of a complex-conjugate pair,
+    which are combined into a single number (with positive imaginary part) in
+    the output. Two complex numbers are considered a conjugate pair if their
+    real and imaginary parts differ in magnitude by less than ``tol * abs(z)``.
+    Parameters
+    ----------
+    z : array_like
+        Vector of complex numbers to be sorted and split
+    tol : float, optional
+        Relative tolerance for testing realness and conjugate equality.
+        Default is ``100 * spacing(1)`` of `z`'s data type (i.e., 2e-14 for
+        float64)
+    Returns
+    -------
+    zc : ndarray
+        Complex elements of `z`, with each pair represented by a single value
+        having positive imaginary part, sorted first by real part, and then
+        by magnitude of imaginary part. The pairs are averaged when combined
+        to reduce error.
+    zr : ndarray
+        Real elements of `z` (those having imaginary part less than
+        `tol` times their magnitude), sorted by value.
+    Raises
+    ------
+    ValueError
+        If there are any complex numbers in `z` for which a conjugate
+        cannot be found.
+    See Also
+    --------
+    _cplxpair
+    Examples
+    --------
+    >>> a = [4, 3, 1, 2-2j, 2+2j, 2-1j, 2+1j, 2-1j, 2+1j, 1+1j, 1-1j]
+    >>> zc, zr = _cplxreal(a)
+    >>> print(zc)
+    [ 1.+1.j  2.+1.j  2.+1.j  2.+2.j]
+    >>> print(zr)
+    [ 1.  3.  4.]
+    """
+
+    z = atleast_1d(z)
+    if z.size == 0:
+        return z, z
+    elif len(np.ndarray(z, dtype=np.complex).shape) != 1:
+        raise ValueError('_cplxreal only accepts 1-D input')
+
+    if tol is None:
+        # Get tolerance from dtype of input
+        tol = 100 * abs(7./3 - 4./3 - 1) #np.finfo((1.0 * z).dtype).eps
+
+    # Sort by real part, magnitude of imaginary part (speed up further sorting)
+    z = _lexsort(z)
+    # Split reals from conjugate pairs
+    real_indices = abs(z.imag) <= tol * abs(z)
+    zr = z[real_indices].real
+
+    if len(zr) == len(z):
+        # Input is entirely real
+        return np.array([],dtype=np.float), zr
+
+    # Split positive and negative halves of conjugates
+    inv_real_indices = np.array([not elem for elem in real_indices], dtype=np.bool)
+    z = z[inv_real_indices]
+    zp = z[z.imag > 0]
+    zn = z[z.imag < 0]
+
+    if len(zp) != len(zn):
+        raise ValueError('Array contains complex value with no matching '
+                         'conjugate.')
+
+    # Find runs of (approximately) the same real part
+    same_real = np.diff(zp.real) <= tol * abs(zp[:-1])
+    same_real = np.array(same_real * 1, dtype=np.uint8)
+    a = np.array([0], dtype=np.uint8)
+    b = np.array([0], dtype=np.uint8)
+    x = np.concatenate((a, same_real, b))
+    diffs = numpy.diff(np.array(x, dtype=np.float))
+    start = np.array((diffs > 0) * 1, dtype=np.uint16)
+    stop = np.array((diffs < 0) * 1, dtype=np.uint16)
+    run_starts = nonzero(start)[0]
+    run_stops = nonzero(stop)[0]
+
+    # Sort each run by their imaginary parts
+    for i in range(len(run_starts)):
+        start = int(run_starts[i])
+        stop = int(run_stops[i]) + 1
+        for chunk in (zp[start:stop], zn[start:stop]):
+            a = 1
+
+
+    # Check that negatives match positives
+    if any(abs(zp - np.conjugate(zn)) > tol * abs(zn)):
+        raise ValueError('Array contains complex value with no matching '
+                         'conjugate.')
+
+    # Average out numerical inaccuracy in real vs imag parts of pairs
+    zc = (zp + np.conjugate(zn)) / 2
+
+    return zc, zr
+
+def zpk2sos(z, p, k, pairing='nearest'):
+    """
+    Return second-order sections from zeros, poles, and gain of a system
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the transfer function.
+    p : array_like
+        Poles of the transfer function.
+    k : float
+        System gain.
+    pairing : {'nearest', 'keep_odd'}, optional
+        The method to use to combine pairs of poles and zeros into sections.
+        See Notes below.
+
+    Returns
+    -------
+    sos : ndarray
+        Array of second-order filter coefficients, with shape
+        ``(n_sections, 6)``. See `sosfilt` for the SOS filter format
+        specification.
+
+    See Also
+    --------
+    sosfilt
+
+    Notes
+    -----
+    The algorithm used to convert ZPK to SOS format is designed to
+    minimize errors due to numerical precision issues. The pairing
+    algorithm attempts to minimize the peak gain of each biquadratic
+    section. This is done by pairing poles with the nearest zeros, starting
+    with the poles closest to the unit circle.
+
+    *Algorithms*
+
+    The current algorithms are designed specifically for use with digital
+    filters. (The output coefficients are not correct for analog filters.)
+
+    The steps in the ``pairing='nearest'`` and ``pairing='keep_odd'``
+    algorithms are mostly shared. The ``nearest`` algorithm attempts to
+    minimize the peak gain, while ``'keep_odd'`` minimizes peak gain under
+    the constraint that odd-order systems should retain one section
+    as first order. The algorithm steps and are as follows:
+
+    As a pre-processing step, add poles or zeros to the origin as
+    necessary to obtain the same number of poles and zeros for pairing.
+    If ``pairing == 'nearest'`` and there are an odd number of poles,
+    add an additional pole and a zero at the origin.
+
+    The following steps are then iterated over until no more poles or
+    zeros remain:
+
+    1. Take the (next remaining) pole (complex or real) closest to the
+       unit circle to begin a new filter section.
+
+    2. If the pole is real and there are no other remaining real poles [#]_,
+       add the closest real zero to the section and leave it as a first
+       order section. Note that after this step we are guaranteed to be
+       left with an even number of real poles, complex poles, real zeros,
+       and complex zeros for subsequent pairing iterations.
+
+    3. Else:
+
+        1. If the pole is complex and the zero is the only remaining real
+           zero*, then pair the pole with the *next* closest zero
+           (guaranteed to be complex). This is necessary to ensure that
+           there will be a real zero remaining to eventually create a
+           first-order section (thus keeping the odd order).
+
+        2. Else pair the pole with the closest remaining zero (complex or
+           real).
+
+        3. Proceed to complete the second-order section by adding another
+           pole and zero to the current pole and zero in the section:
+
+            1. If the current pole and zero are both complex, add their
+               conjugates.
+
+            2. Else if the pole is complex and the zero is real, add the
+               conjugate pole and the next closest real zero.
+
+            3. Else if the pole is real and the zero is complex, add the
+               conjugate zero and the real pole closest to those zeros.
+
+            4. Else (we must have a real pole and real zero) add the next
+               real pole closest to the unit circle, and then add the real
+               zero closest to that pole.
+
+    .. [#] This conditional can only be met for specific odd-order inputs
+           with the ``pairing == 'keep_odd'`` method.
+
+    .. versionadded:: 0.16.0
+
+    Examples
+    --------
+
+    Design a 6th order low-pass elliptic digital filter for a system with a
+    sampling rate of 8000 Hz that has a pass-band corner frequency of
+    1000 Hz. The ripple in the pass-band should not exceed 0.087 dB, and
+    the attenuation in the stop-band should be at least 90 dB.
+
+    In the following call to `signal.ellip`, we could use ``output='sos'``,
+    but for this example, we'll use ``output='zpk'``, and then convert to SOS
+    format with `zpk2sos`:
+
+    >>> from scipy import signal
+    >>> z, p, k = signal.ellip(6, 0.087, 90, 1000/(0.5*8000), output='zpk')
+
+    Now convert to SOS format.
+
+    >>> sos = signal.zpk2sos(z, p, k)
+
+    The coefficients of the numerators of the sections:
+
+    >>> sos[:, :3]
+    array([[ 0.0014154 ,  0.00248707,  0.0014154 ],
+           [ 1.        ,  0.72965193,  1.        ],
+           [ 1.        ,  0.17594966,  1.        ]])
+
+    The symmetry in the coefficients occurs because all the zeros are on the
+    unit circle.
+
+    The coefficients of the denominators of the sections:
+
+    >>> sos[:, 3:]
+    array([[ 1.        , -1.32543251,  0.46989499],
+           [ 1.        , -1.26117915,  0.6262586 ],
+           [ 1.        , -1.25707217,  0.86199667]])
+
+    The next example shows the effect of the `pairing` option.  We have a
+    system with three poles and three zeros, so the SOS array will have
+    shape (2, 6). The means there is, in effect, an extra pole and an extra
+    zero at the origin in the SOS representation.
+
+    >>> z1 = np.array([-1, -0.5-0.5j, -0.5+0.5j])
+    >>> p1 = np.array([0.75, 0.8+0.1j, 0.8-0.1j])
+
+    With ``pairing='nearest'`` (the default), we obtain
+
+    >>> signal.zpk2sos(z1, p1, 1)
+    array([[ 1.  ,  1.  ,  0.5 ,  1.  , -0.75,  0.  ],
+           [ 1.  ,  1.  ,  0.  ,  1.  , -1.6 ,  0.65]])
+
+    The first section has the zeros {-0.5-0.05j, -0.5+0.5j} and the poles
+    {0, 0.75}, and the second section has the zeros {-1, 0} and poles
+    {0.8+0.1j, 0.8-0.1j}. Note that the extra pole and zero at the origin
+    have been assigned to different sections.
+
+    With ``pairing='keep_odd'``, we obtain:
+
+    >>> signal.zpk2sos(z1, p1, 1, pairing='keep_odd')
+    array([[ 1.  ,  1.  ,  0.  ,  1.  , -0.75,  0.  ],
+           [ 1.  ,  1.  ,  0.5 ,  1.  , -1.6 ,  0.65]])
+
+    The extra pole and zero at the origin are in the same section.
+    The first section is, in effect, a first-order section.
+
+    """
+    # TODO in the near future:
+    # 1. Add SOS capability to `filtfilt`, `freqz`, etc. somehow (#3259).
+    # 2. Make `decimate` use `sosfilt` instead of `lfilter`.
+    # 3. Make sosfilt automatically simplify sections to first order
+    #    when possible. Note this might make `sosfiltfilt` a bit harder (ICs).
+    # 4. Further optimizations of the section ordering / pole-zero pairing.
+    # See the wiki for other potential issues.
+
+    valid_pairings = ['nearest', 'keep_odd']
+    if pairing not in valid_pairings:
+        raise ValueError('pairing must be one of %s, not %s'
+                         % (valid_pairings, pairing))
+    if len(z) == len(p) == 0:
+        return np.array([[k, 0., 0., 1., 0., 0.]])
+
+    # ensure we have the same number of poles and zeros, and make copies
+
+    if len(z) != len(p):
+        if max(len(z) - len(p),0) > 0:  
+            p = np.concatenate((p, np.zeros((max(len(z) - len(p), 0)),dtype=np.complex)))
+        if max(len(p) - len(z),0) > 0:   
+            z = np.concatenate((z, np.zeros((max(len(p) - len(z), 0)), dtype=np.complex)))
+
+    n_sections = (max(len(p), len(z)) + 1) // 2
+    sos = np.zeros((n_sections, 6))
+    
+
+    if len(p) % 2 == 1 and pairing == 'nearest':
+        p = np.concatenate((p, np.array([0.],dtype=np.complex)))
+        z = np.concatenate((z, np.array([0.],dtype=np.complex)))
+    assert len(p) == len(z)
+
+    z = np.array(z, dtype=np.complex)
+
+
+    # Ensure we have complex conjugate pairs
+    # (note that _cplxreal only gives us one element of each complex pair):
+    cplx, rel = _cplxreal(p)
+    if len(rel) > 0 and len(cplx) > 0:
+        p = np.concatenate((cplx, np.array(rel, dtype=np.complex))) 
+    else:
+        p = cplx
+   
+    
+    cplx, rel = _cplxreal(z)
+    if len(rel) > 0 and len(cplx) > 0:
+        z = np.concatenate((cplx, np.array(rel, dtype=np.complex))) 
+    else:
+        z = rel
+
+    p_sos = np.zeros((n_sections, 2))
+    z_sos = zeros_like(p_sos)
+
+    for si in range(n_sections):
+        # Select the next "worst" pole
+        p1_idx = np.argmin(abs(1 - abs(p)))
+        p1 = p[p1_idx]
+        p = np.delete(p, p1_idx)
+        # Pair that pole with a zero
+        if isreal(p1) and np.sum([isreal(p)]) == 0:
+            # Special case to set a first-order section
+            z1_idx = _nearest_real_complex_idx(z, p1, 'real')
+            z1 = z[z1_idx]
+            z = np.delete(z, z1_idx)
+            p2 = z2 = 0
+        else:
+            if not isreal(p1) and np.sum(isreal(z)) == 1:
+                # Special case to ensure we choose a complex zero to pair
+                # with so later (setting up a first-order section)
+                z1_idx = _nearest_real_complex_idx(z, p1, 'complex')
+                assert not isreal(z[z1_idx])
+            else:
+                # Pair the pole with the closest zero (real or complex)
+                z1_idx = np.argmin(abs(p1 - z))
+            z1 = z[z1_idx]
+            z = np.delete(z, z1_idx)
+            # Now that we have p1 and z1, figure out what p2 and z2 need to be
+            if not isreal(p1):
+                if not isreal(z1):  # complex pole, complex zero
+                    p2 = np.conjugate(p1)
+                    z2 = np.conjugate(z1)
+                else:  # complex pole, real zero
+                    p2 = np.conjugate(p1) #p1.conj()
+                    z2_idx = _nearest_real_complex_idx(z, p1, 'real')
+                    z2 = z[z2_idx]
+                    assert isreal(z2)
+                    z = np.delete(z, z2_idx)
+            else:
+                if not isreal(z1):  # real pole, complex zero
+                    z2 = np.conjugate(z1)
+                    p2_idx = _nearest_real_complex_idx(p, z1, 'real')
+                    p2 = p[p2_idx]
+                    assert isreal(p2)
+                else:  # real pole, real zero
+                    # pick the next "worst" pole to use
+                    idx = nonzero(isreal(p))[0]
+                    assert len(idx) > 0
+ 
+                    a = abs(abs(p[idx[0]]) - 1)
+                    a = np.array([a])
+                    p2_idx = idx[np.argmin(a) - 1]
+                    p2 = p[p2_idx]
+                    # find a real zero to match the added pole
+                    assert isreal(p2)
+                    z2_idx = _nearest_real_complex_idx(z, p2, 'real')
+                    z2 = z[z2_idx]
+                    assert isreal(z2)
+                    z = np.delete(z, z2_idx)
+
+                p = np.delete(p, p2_idx)
+
+        p_sos = np.array(p_sos, dtype=np.complex)
+        p_sos[si] = np.array([p1, p2], dtype=np.complex)
+        z_sos[si] = np.array([z1, z2], dtype=np.float)
+
+    assert len(p) == len(z) == 0  # we've consumed all poles and zeros
+    del p, z
+
+    # Construct the system, reversing order so the "worst" are last
+    p_sos = p_sos[::-1].reshape((n_sections, 2))
+    z_sos = z_sos[::-1].reshape((n_sections, 2))
+
+    gains = np.ones(n_sections, dtype=np.float)
+    gains[0] = k
+    for si in range(n_sections):
+        x = zpk2tf(z_sos[si], p_sos[si], gains[si])
+        sos[si] = np.concatenate(x)
+    return sos
+
+def lp2bp_zpk(z, p, k, wo=1.0, bw=1.0):
+    r"""
+    Transform a lowpass filter prototype to a bandpass filter.
+
+    Return an analog band-pass filter with center frequency `wo` and
+    bandwidth `bw` from an analog low-pass filter prototype with unity
+    cutoff frequency, using zeros, poles, and gain ('zpk') representation.
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the analog filter transfer function.
+    p : array_like
+        Poles of the analog filter transfer function.
+    k : float
+        System gain of the analog filter transfer function.
+    wo : float
+        Desired passband center, as angular frequency (e.g., rad/s).
+        Defaults to no change.
+    bw : float
+        Desired passband width, as angular frequency (e.g., rad/s).
+        Defaults to 1.
+
+    Returns
+    -------
+    z : ndarray
+        Zeros of the transformed band-pass filter transfer function.
+    p : ndarray
+        Poles of the transformed band-pass filter transfer function.
+    k : float
+        System gain of the transformed band-pass filter.
+
+    See Also
+    --------
+    lp2lp_zpk, lp2hp_zpk, lp2bs_zpk, bilinear
+    lp2bp
+
+    Notes
+    -----
+    This is derived from the s-plane substitution
+
+    .. math:: s \rightarrow \frac{s^2 + {\omega_0}^2}{s \cdot \mathrm{BW}}
+
+    This is the "wideband" transformation, producing a passband with
+    geometric (log frequency) symmetry about `wo`.
+
+    .. versionadded:: 1.1.0
+
+    """
+    z = atleast_1d(z)
+    p = atleast_1d(p)
+    wo = float(wo)
+    bw = float(bw)
+
+    degree = _relative_degree(z, p)
+
+    # Scale poles and zeros to desired bandwidth
+    z_lp = []
+    for x in z:
+        z_lp.append(x * bw/2)
+    z_lp = np.array(z_lp, dtype=np.complex)
+
+    p_lp = []
+    for x in p:
+        p_lp.append(x * bw/2)
+    p_lp = np.array(p_lp, dtype=np.complex)
+
+
+
+    # Square root needs to produce complex result, not NaN
+
+
+    # Duplicate poles and zeros and shift from baseband to +wo and -wo
+    if len(z_lp) > 0:
+        z_bp = np.concatenate((z_lp + np.sqrt(z_lp*z_lp - wo*wo, dtype=np.complex),
+                            z_lp - np.sqrt(z_lp*z_lp - wo*wo, dtype=np.complex)))                 
+        z_bp = append(z_bp, np.zeros(degree),dtype=np.complex)                    
+    else:
+         z_bp = np.zeros(degree, dtype=np.complex)
+
+    p_bp = np.concatenate((p_lp + np.sqrt(p_lp*p_lp - wo*wo, dtype=np.complex),
+                           p_lp - np.sqrt(p_lp*p_lp - wo*wo, dtype=np.complex)))
+
+
+    # Move degree zeros to origin, leaving degree zeros at infinity for BPF
+    
+   
+    # Cancel out gain change from frequency scaling
+    k_bp = k * bw**degree
+
+    return z_bp, p_bp, k_bp
+
+def lp2bs_zpk(z, p, k, wo=1.0, bw=1.0):
+    r"""
+    Transform a lowpass filter prototype to a bandstop filter.
+
+    Return an analog band-stop filter with center frequency `wo` and
+    stopband width `bw` from an analog low-pass filter prototype with unity
+    cutoff frequency, using zeros, poles, and gain ('zpk') representation.
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the analog filter transfer function.
+    p : array_like
+        Poles of the analog filter transfer function.
+    k : float
+        System gain of the analog filter transfer function.
+    wo : float
+        Desired stopband center, as angular frequency (e.g., rad/s).
+        Defaults to no change.
+    bw : float
+        Desired stopband width, as angular frequency (e.g., rad/s).
+        Defaults to 1.
+
+    Returns
+    -------
+    z : ndarray
+        Zeros of the transformed band-stop filter transfer function.
+    p : ndarray
+        Poles of the transformed band-stop filter transfer function.
+    k : float
+        System gain of the transformed band-stop filter.
+
+    See Also
+    --------
+    lp2lp_zpk, lp2hp_zpk, lp2bp_zpk, bilinear
+    lp2bs
+
+    Notes
+    -----
+    This is derived from the s-plane substitution
+
+    .. math:: s \rightarrow \frac{s \cdot \mathrm{BW}}{s^2 + {\omega_0}^2}
+
+    This is the "wideband" transformation, producing a stopband with
+    geometric (log frequency) symmetry about `wo`.
+
+    .. versionadded:: 1.1.0
+
+    """
+    z = atleast_1d(z)
+    p = atleast_1d(p)
+    wo = float(wo)
+    bw = float(bw)
+
+    degree = _relative_degree(z, p)
+
+    # Invert to a highpass filter with desired bandwidth
+    z_hp = (bw/2) / z
+    p_hp = (bw/2) / p
+
+    if z_hp == float('inf'):
+        z_hp = []
+
+    # Square root needs to produce complex result, not NaN
+    z_hp = np.array(z_hp, dtype=np.complex)
+    p_hp = np.array(p_hp, dtype=np.complex)
+
+    if len(z_hp) > 0:
+        # Duplicate poles and zeros and shift from baseband to +wo and -wo
+        z_bs = np.concatenate((z_hp + np.sqrt(z_hp*z_hp - wo*wo, dtype=np.complex),
+                            z_hp - np.sqrt(z_hp*z_hp - wo*wo, dtype=np.complex)))
+    else:
+        z_bs = np.array([], dtype=np.complex)               
+
+    p_bs = np.concatenate((p_hp + np.sqrt(p_hp*p_hp - wo*wo, dtype=np.complex),
+                           p_hp - np.sqrt(p_hp*p_hp - wo*wo, dtype=np.complex)))
+
+    # Move any zeros that were at infinity to the center of the stopband
+    z_bs = append(z_bs, np.full(degree, 1j*wo, dtype=np.complex))
+    z_bs = append(z_bs, np.full(degree, -1j*wo, dtype=np.complex))
+
+    # Cancel out gain change caused by inversion
+    k_bs = k * (prod(-z) / prod(-p)).real
+
+    return z_bs, p_bs, k_bs
+
+def bilinear_zpk(z, p, k, fs):
+    r"""
+    Return a digital IIR filter from an analog one using a bilinear transform.
+
+    Transform a set of poles and zeros from the analog s-plane to the digital
+    z-plane using Tustin's method, which substitutes ``(z-1) / (z+1)`` for
+    ``s``, maintaining the shape of the frequency response.
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the analog filter transfer function.
+    p : array_like
+        Poles of the analog filter transfer function.
+    k : float
+        System gain of the analog filter transfer function.
+    fs : float
+        Sample rate, as ordinary frequency (e.g., hertz). No prewarping is
+        done in this function.
+
+    Returns
+    -------
+    z : ndarray
+        Zeros of the transformed digital filter transfer function.
+    p : ndarray
+        Poles of the transformed digital filter transfer function.
+    k : float
+        System gain of the transformed digital filter.
+
+    See Also
+    --------
+    lp2lp_zpk, lp2hp_zpk, lp2bp_zpk, lp2bs_zpk
+    bilinear
+
+    Notes
+    -----
+    .. versionadded:: 1.1.0
+
+    Examples
+    --------
+    >>> from scipy import signal
+    >>> import matplotlib.pyplot as plt
+
+    >>> fs = 100
+    >>> bf = 2 * np.pi * np.array([7, 13])
+    >>> filts = signal.lti(*signal.butter(4, bf, btype='bandpass', analog=True,
+    ...                                   output='zpk'))
+    >>> filtz = signal.lti(*signal.bilinear_zpk(filts.zeros, filts.poles,
+    ...                                         filts.gain, fs))
+    >>> wz, hz = signal.freqz_zpk(filtz.zeros, filtz.poles, filtz.gain)
+    >>> ws, hs = signal.freqs_zpk(filts.zeros, filts.poles, filts.gain,
+    ...                           worN=fs*wz)
+    >>> plt.semilogx(wz*fs/(2*np.pi), 20*np.log10(np.abs(hz).clip(1e-15)),
+    ...              label=r'$|H_z(e^{j \omega})|$')
+    >>> plt.semilogx(wz*fs/(2*np.pi), 20*np.log10(np.abs(hs).clip(1e-15)),
+    ...              label=r'$|H(j \omega)|$')
+    >>> plt.legend()
+    >>> plt.xlabel('Frequency [Hz]')
+    >>> plt.ylabel('Magnitude [dB]')
+    >>> plt.grid()
+    """
+    
+    z = atleast_1d(z)
+    p = atleast_1d(p)
+
+    degree = _relative_degree(z, p)
+
+    fs2 = 2.0*fs
+
+    # Bilinear transform the poles and zeros
+    z_z = (fs2 + z) / (fs2 - z)
+    p_z = (fs2 + p) / (fs2 - p)
+
+    # Any zeros that were at infinity get moved to the Nyquist frequency
+    a = -np.ones(degree) + 0j
+
+    if len(a) > 0:
+        z_z = append(z_z, a)
+
+    # Compensate for gain change
+    k_z = k * (prod(fs2 - z) / prod(fs2 - p)).real
+
+    return z_z, p_z, k_z
+
+def _nearest_real_complex_idx(fro, to, which):
+    """Get the next closest real or complex element based on distance"""
+    assert which in ('real', 'complex')
+    a = np.array(abs(fro - to), dtype=np.float)
+    order = np.argsort(a, axis=0)   # Differs from numpy  TODO
+    fo = [fro[i] for i in order]
+    sorted_array_list = [fro[i] for i in order]
+    mask = isreal(np.array(sorted_array_list, dtype=np.float))
+    if which == 'complex':
+        mask = ~mask
+    mask = np.array([mask], dtype=np.uint16)
+    nzm = np.array(nonzero(mask)[0],dtype=np.int8)
+    return order[nzm[0]]
+
+def _relative_degree(z, p):
+    """
+    Return relative degree of transfer function from zeros and poles
+    """
+    degree = len(p) - len(z)
+    if degree < 0:
+        raise ValueError("Improper transfer function. "
+                         "Must have at least as many poles as zeros.")
+    else:
+        return degree
+
+def lp2lp_zpk(z, p, k, wo=1.0):
+    r"""
+    Transform a lowpass filter prototype to a different frequency.
+
+    Return an analog low-pass filter with cutoff frequency `wo`
+    from an analog low-pass filter prototype with unity cutoff frequency,
+    using zeros, poles, and gain ('zpk') representation.
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the analog filter transfer function.
+    p : array_like
+        Poles of the analog filter transfer function.
+    k : float
+        System gain of the analog filter transfer function.
+    wo : float
+        Desired cutoff, as angular frequency (e.g., rad/s).
+        Defaults to no change.
+
+    Returns
+    -------
+    z : ndarray
+        Zeros of the transformed low-pass filter transfer function.
+    p : ndarray
+        Poles of the transformed low-pass filter transfer function.
+    k : float
+        System gain of the transformed low-pass filter.
+
+    See Also
+    --------
+    lp2hp_zpk, lp2bp_zpk, lp2bs_zpk, bilinear
+    lp2lp
+
+    Notes
+    -----
+    This is derived from the s-plane substitution
+
+    .. math:: s \rightarrow \frac{s}{\omega_0}
+
+    .. versionadded:: 1.1.0
+
+    """
+    z = atleast_1d(z)
+    p = atleast_1d(p)
+    wo = float(wo)  # Avoid int wraparound
+
+    degree = _relative_degree(z, p)
+
+    # Scale all points radially from origin to shift cutoff frequency
+
+    z_lp = []
+    for x in z:
+        z_lp.append(wo * x)
+    z_lp = np.array(z_lp, dtype=np.complex)
+
+    p_lp = []
+    for x in p:
+        p_lp.append(wo * x)
+    p_lp = np.array(p_lp, dtype=np.complex)
+
+    # Each shifted pole decreases gain by wo, each shifted zero increases it.
+    # Cancel out the net change to keep overall gain the same
+    k_lp = k * wo**degree
+
+    return z_lp, p_lp, k_lp
+
+def lp2hp_zpk(z, p, k, wo=1.0):
+    r"""
+    Transform a lowpass filter prototype to a highpass filter.
+
+    Return an analog high-pass filter with cutoff frequency `wo`
+    from an analog low-pass filter prototype with unity cutoff frequency,
+    using zeros, poles, and gain ('zpk') representation.
+
+    Parameters
+    ----------
+    z : array_like
+        Zeros of the analog filter transfer function.
+    p : array_like
+        Poles of the analog filter transfer function.
+    k : float
+        System gain of the analog filter transfer function.
+    wo : float
+        Desired cutoff, as angular frequency (e.g., rad/s).
+        Defaults to no change.
+
+    Returns
+    -------
+    z : ndarray
+        Zeros of the transformed high-pass filter transfer function.
+    p : ndarray
+        Poles of the transformed high-pass filter transfer function.
+    k : float
+        System gain of the transformed high-pass filter.
+
+    See Also
+    --------
+    lp2lp_zpk, lp2bp_zpk, lp2bs_zpk, bilinear
+    lp2hp
+
+    Notes
+    -----
+    This is derived from the s-plane substitution
+
+    .. math:: s \rightarrow \frac{\omega_0}{s}
+
+    This maintains symmetry of the lowpass and highpass responses on a
+    logarithmic scale.
+
+    .. versionadded:: 1.1.0
+
+    """
+    z = atleast_1d(z)
+    p = atleast_1d(p)
+    
+    wo = float(wo)
+
+    degree = _relative_degree(z, p)
+
+    # Invert positions radially about unit circle to convert LPF to HPF
+    # Scale all points radially from origin to shift cutoff frequency
+    z_hp = wo / z
+    p_hp = wo / p
+
+    if z_hp == float('inf'):
+        z_hp = []
+
+    # If lowpass had zeros at infinity, inverting moves them to origin.
+    z_hp = append(z_hp, np.zeros(degree))
+
+    # Cancel out gain change caused by inversion
+    k_hp = k * (prod(-z) / prod(-p)).real
+
+    return z_hp, p_hp, k_hp
+
+def buttord(wp, ws, gpass, gstop, analog=False, fs=None):
+    """Butterworth filter order selection.
+
+    Return the order of the lowest order digital or analog Butterworth filter
+    that loses no more than `gpass` dB in the passband and has at least
+    `gstop` dB attenuation in the stopband.
+
+    Parameters
+    ----------
+    wp, ws : float
+        Passband and stopband edge frequencies.
+
+        For digital filters, these are in the same units as `fs`. By default,
+        `fs` is 2 half-cycles/sample, so these are normalized from 0 to 1,
+        where 1 is the Nyquist frequency. (`wp` and `ws` are thus in
+        half-cycles / sample.) For example:
+
+            - Lowpass:   wp = 0.2,          ws = 0.3
+            - Highpass:  wp = 0.3,          ws = 0.2
+            - Bandpass:  wp = [0.2, 0.5],   ws = [0.1, 0.6]
+            - Bandstop:  wp = [0.1, 0.6],   ws = [0.2, 0.5]
+
+        For analog filters, `wp` and `ws` are angular frequencies (e.g., rad/s).
+    gpass : float
+        The maximum loss in the passband (dB).
+    gstop : float
+        The minimum attenuation in the stopband (dB).
+    analog : bool, optional
+        When True, return an analog filter, otherwise a digital filter is
+        returned.
+    fs : float, optional
+        The sampling frequency of the digital system.
+
+        .. versionadded:: 1.2.0
+
+    Returns
+    -------
+    ord : int
+        The lowest order for a Butterworth filter which meets specs.
+    wn : ndarray or float
+        The Butterworth natural frequency (i.e. the "3dB frequency"). Should
+        be used with `butter` to give filter results. If `fs` is specified,
+        this is in the same units, and `fs` must also be passed to `butter`.
+
+    See Also
+    --------
+    butter : Filter design using order and critical points
+    cheb1ord : Find order and critical points from passband and stopband spec
+    cheb2ord, ellipord
+    iirfilter : General filter design using order and critical frequencies
+    iirdesign : General filter design using passband and stopband spec
+
+    Examples
+    --------
+    Design an analog bandpass filter with passband within 3 dB from 20 to
+    50 rad/s, while rejecting at least -40 dB below 14 and above 60 rad/s.
+    Plot its frequency response, showing the passband and stopband
+    constraints in gray.
+
+    >>> from scipy import signal
+    >>> import matplotlib.pyplot as plt
+
+    >>> N, Wn = signal.buttord([20, 50], [14, 60], 3, 40, True)
+    >>> b, a = signal.butter(N, Wn, 'band', True)
+    >>> w, h = signal.freqs(b, a, np.logspace(1, 2, 500))
+    >>> plt.semilogx(w, 20 * np.log10(abs(h)))
+    >>> plt.title('Butterworth bandpass filter fit to constraints')
+    >>> plt.xlabel('Frequency [radians / second]')
+    >>> plt.ylabel('Amplitude [dB]')
+    >>> plt.grid(which='both', axis='both')
+    >>> plt.fill([1,  14,  14,   1], [-40, -40, 99, 99], '0.9', lw=0) # stop
+    >>> plt.fill([20, 20,  50,  50], [-99, -3, -3, -99], '0.9', lw=0) # pass
+    >>> plt.fill([60, 60, 1e9, 1e9], [99, -40, -40, 99], '0.9', lw=0) # stop
+    >>> plt.axis([10, 100, -60, 3])
+    >>> plt.show()
+
+    """
+
+    _validate_gpass_gstop(gpass, gstop)
+
+    wp = atleast_1d(wp)
+    ws = atleast_1d(ws)
+    if fs is not None:
+        if analog:
+            raise ValueError("fs cannot be specified for an analog filter")
+        wp = 2*wp/fs
+        ws = 2*ws/fs
+
+    filter_type = 2 * (len(wp) - 1)
+    filter_type += 1
+    if wp[0] >= ws[0]:
+        filter_type += 1
+
+    # Pre-warp frequencies for digital filter design
+    if not analog:
+        passb = math.tan(np.pi * wp / 2.0)
+        stopb = math.tan(np.pi * ws / 2.0)
+    else:
+        passb = wp * 1.0
+        stopb = ws * 1.0
+
+    if filter_type == 1:            # low
+        nat = stopb / passb
+    elif filter_type == 2:          # high
+        nat = passb / stopb
+    elif filter_type == 3:          # stop
+        wp0 = optimize.fminbound(band_stop_obj, passb[0], stopb[0] - 1e-12,   #TODO
+                                 args=(0, passb, stopb, gpass, gstop,
+                                       'butter'),
+                                 disp=0)
+        passb[0] = wp0
+        wp1 = optimize.fminbound(band_stop_obj, stopb[1] + 1e-12, passb[1], #TODO
+                                 args=(1, passb, stopb, gpass, gstop,
+                                       'butter'),
+                                 disp=0)
+        passb[1] = wp1
+        nat = ((stopb * (passb[0] - passb[1])) /
+               (stopb ** 2 - passb[0] * passb[1]))
+    elif filter_type == 4:          # pass
+        nat = ((stopb ** 2 - passb[0] * passb[1]) /
+               (stopb * (passb[0] - passb[1])))
+
+    nat = min(abs(nat))
+
+    GSTOP = 10 ** (0.1 * abs(gstop))
+    GPASS = 10 ** (0.1 * abs(gpass))
+    ord = int(math.ceil(math.log10((GSTOP - 1.0) / (GPASS - 1.0)) / (2 * math.log10(nat))))
+
+    # Find the Butterworth natural frequency WN (or the "3dB" frequency")
+    # to give exactly gpass at passb.
+    try:
+        W0 = (GPASS - 1.0) ** (-1.0 / (2.0 * ord))
+    except ZeroDivisionError:
+        W0 = 1.0
+        print("Warning, order is zero...check input parameters.")
+
+    # now convert this frequency back from lowpass prototype
+    # to the original analog filter
+
+    if filter_type == 1:  # low
+        WN = W0 * passb
+    elif filter_type == 2:  # high
+        WN = passb / W0
+    elif filter_type == 3:  # stop
+        WN = numpy.empty(2, float)
+        discr = math.sqrt((passb[1] - passb[0]) ** 2 +
+                     4 * W0 ** 2 * passb[0] * passb[1])
+        WN[0] = ((passb[1] - passb[0]) + discr) / (2 * W0)
+        WN[1] = ((passb[1] - passb[0]) - discr) / (2 * W0)
+        WN = numpy.sort(abs(WN))
+    elif filter_type == 4:  # pass
+        W0 = numpy.array([-W0, W0], float)
+        WN = (-W0 * (passb[1] - passb[0]) / 2.0 +
+              math.sqrt(W0 ** 2 / 4.0 * (passb[1] - passb[0]) ** 2 +
+                   passb[0] * passb[1]))
+        WN = numpy.sort(abs(WN))
+    else:
+        raise ValueError("Bad type: %s" % filter_type)
+
+    if not analog:
+        wn = (2.0 / np.pi) * math.arctan(WN)
+    else:
+        wn = WN
+
+    if len(wn) == 1:
+        wn = wn[0]
+
+    if fs is not None:
+        wn = wn*fs/2
+
+    return ord, wn
+
+def buttap(N):
+    """Return (z,p,k) for analog prototype of Nth-order Butterworth filter.
+
+    The filter will have an angular (e.g., rad/s) cutoff frequency of 1.
+
+    See Also
+    --------
+    butter : Filter design function using this prototype
+
+    """
+    if abs(int(N)) != N:
+        raise ValueError("Filter order must be a nonnegative integer")
+    z = numpy.array([])
+    m = numpy.arange(-N+1, N, 2)
+    # Middle value is 0 to ensure an exactly real pole
+    a = np.pi * m / (2 * N)
+    b = []
+    for x in a:
+        b.append(1j * x)
+    p = np.array(b, dtype=np.complex)
+    p = -numpy.exp(p)
+    k = 1
+    return z, p, k
+
+def butter_bandpass(lowcut, highcut, fs, order=5):
+    nyq = 0.5 * fs
+    low = lowcut / nyq
+    high = highcut / nyq
+    sos = butter(order, [low, high], analog=False, btype='band', output='sos')
+    return sos
+
+def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
+    sos = butter_bandpass(lowcut, highcut, fs, order=order)
+    y = spy.sosfilter(sos, data)
+    return y
+
+def fft(x):
+    n = len(x)
+    if n <= 1:
+        return x
+    even = fft(x[0::2])
+    odd =  fft(x[1::2])
+    return [even[m] + math.e**(-2j*math.pi*m/n)*odd[m] for m in range(n//2)] + [even[m] - math.e**(-2j*math.pi*m/n)*odd[m] for m in range(n//2)]
+
+filter_dict = {'butter': [buttap, buttord],
+               'butterworth': [buttap, buttord]
+               }
+
+band_dict = {'band': 'bandpass',
+             'bandpass': 'bandpass',
+             'pass': 'bandpass',
+             'bp': 'bandpass',
+
+             'bs': 'bandstop',
+             'bandstop': 'bandstop',
+             'bands': 'bandstop',
+             'stop': 'bandstop',
+
+             'l': 'lowpass',
+             'low': 'lowpass',
+             'lowpass': 'lowpass',
+             'lp': 'lowpass',
+
+             'high': 'highpass',
+             'highpass': 'highpass',
+             'h': 'highpass',
+             'hp': 'highpass',
+             }
+
+
+
+
+
+
+
diff --git a/tulip/shared/ulab/snippets/tests/numpy/core/fromnumeric.py b/tulip/shared/ulab/snippets/tests/numpy/core/fromnumeric.py
new file mode 100644
index 000000000..3d851622d
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/core/fromnumeric.py
@@ -0,0 +1,27 @@
+import math
+import sys
+sys.path.append('.')
+
+from snippets import numpy
+from ulab import numpy as np
+
+a = np.array([[1,2,3],[4,5,6]])
+print(numpy.size(a))
+print(numpy.size(a,1))
+print(numpy.size(a,0))
+
+print(numpy.prod([1, 10, 100, 5]))
+print(numpy.prod([]))
+print(numpy.prod([1.,2.]))
+
+
+a = np.array([1,2,3])
+print(numpy.nonzero(a))
+b = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+print(numpy.nonzero(b > 3))
+
+c =  np.array([0,1,0,-1])
+print(numpy.nonzero(c > 0))
+print(numpy.nonzero(c < 0))
+
+
diff --git a/tulip/shared/ulab/snippets/tests/numpy/core/multiarray.py b/tulip/shared/ulab/snippets/tests/numpy/core/multiarray.py
new file mode 100644
index 000000000..785f43277
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/core/multiarray.py
@@ -0,0 +1,11 @@
+import math
+import sys
+sys.path.append('.')
+
+from snippets import numpy
+from ulab import numpy as np
+np.set_printoptions(threshold=100)
+
+print (numpy.asarray([1]))
+print (numpy.asarray([1.0, 2.0, 3j]))
+print (numpy.asarray([4, 3, 1, (2-2j), (2+2j), (2-1j), (2+1j), (2-1j), (2+1j), (1+1j), (1-1j)]))
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/tests/numpy/core/numeric.py b/tulip/shared/ulab/snippets/tests/numpy/core/numeric.py
new file mode 100644
index 000000000..b903598e5
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/core/numeric.py
@@ -0,0 +1,14 @@
+import math
+import sys
+sys.path.append('.')
+
+from ulab import numpy as np
+from snippets import numpy
+
+x = np.array([[0, 1, 2],
+           [3, 4, 5]])
+print(numpy.zeros_like(x))
+
+y = np.array([[0, 1j, -2j],[3, 4, 5]], dtype=np.complex)
+print(numpy.zeros_like(y))
+
diff --git a/tulip/shared/ulab/snippets/tests/numpy/core/shape_base.py b/tulip/shared/ulab/snippets/tests/numpy/core/shape_base.py
new file mode 100644
index 000000000..ee3c07804
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/core/shape_base.py
@@ -0,0 +1,15 @@
+import math
+import sys
+sys.path.append('.')
+
+from ulab import numpy as np
+from snippets import numpy
+
+print(numpy.atleast_1d(1.0))
+
+x = np.arange(9.0).reshape((3,3))
+
+print(numpy.atleast_1d(x))
+print(numpy.atleast_1d(x) is x)
+
+print(numpy.atleast_1d(1, [3, 4]))
diff --git a/tulip/shared/ulab/snippets/tests/numpy/lib/block.py b/tulip/shared/ulab/snippets/tests/numpy/lib/block.py
new file mode 100644
index 000000000..71bfb9203
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/lib/block.py
@@ -0,0 +1,19 @@
+from ulab.numpy import array, zeros, eye, ones
+from snippets import numpy
+
+a = array([[1, 1]])
+b = array([[2]])
+c = array(
+    [[3, 3],
+     [3, 3],
+     [3, 3]])
+d = array(
+    [[4],
+     [4],
+     [4]])
+print(numpy.block([[a, b], [c, d]]))
+a = zeros((2, 3))
+b = eye(2) * 2
+c = eye(3) * 5
+d = ones((3, 2))
+print(numpy.block([[a, b], [c, d]]))
diff --git a/tulip/shared/ulab/snippets/tests/numpy/lib/function_base.py b/tulip/shared/ulab/snippets/tests/numpy/lib/function_base.py
new file mode 100644
index 000000000..76e0f8f44
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/lib/function_base.py
@@ -0,0 +1,11 @@
+import math
+import sys
+sys.path.append('.')
+
+from snippets import numpy
+from ulab import numpy as np
+
+print(numpy.append([1, 2, 3], [[4, 5, 6], [7, 8, 9]]))
+
+print(numpy.append([[1, 2, 3], [4, 5, 6]], [[7, 8, 9]], axis=0))
+
diff --git a/tulip/shared/ulab/snippets/tests/numpy/lib/polynomial.py b/tulip/shared/ulab/snippets/tests/numpy/lib/polynomial.py
new file mode 100644
index 000000000..7b56212c3
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/lib/polynomial.py
@@ -0,0 +1,16 @@
+import math
+import sys
+sys.path.append('.')
+
+from snippets import numpy
+from ulab import numpy as np
+
+
+print(numpy.poly((0, 0, 0)))
+
+print(numpy.poly((-1./2, 0, 1./2)))
+
+print(numpy.poly((0.847, 0, 0.9883)))
+
+#P = np.array([[0, 1./3], [-1./2, 0]])
+#print(numpy.poly(P))
diff --git a/tulip/shared/ulab/snippets/tests/numpy/lib/type_check.py b/tulip/shared/ulab/snippets/tests/numpy/lib/type_check.py
new file mode 100644
index 000000000..1d23a3cfb
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/numpy/lib/type_check.py
@@ -0,0 +1,17 @@
+import math
+import sys
+sys.path.append('.')
+
+from snippets import numpy
+from ulab import numpy as np
+
+a = np.array([1+1j, 1+0j, 4.5, 3, 2, 2j], dtype=np.complex)
+print(numpy.isreal(a))
+
+
+a = np.array([1+2j, 2+1j], dtype=np.complex)
+print(numpy.isreal(a))
+
+
+print(numpy.isreal(1))
+print(numpy.isreal(1j))
\ No newline at end of file
diff --git a/tulip/shared/ulab/snippets/tests/scipy/signal/filter_design.py b/tulip/shared/ulab/snippets/tests/scipy/signal/filter_design.py
new file mode 100644
index 000000000..8861ba188
--- /dev/null
+++ b/tulip/shared/ulab/snippets/tests/scipy/signal/filter_design.py
@@ -0,0 +1,59 @@
+import math
+import sys
+sys.path.append('.')
+
+from snippets import scipy
+from ulab import numpy as np
+
+np.set_printoptions(threshold=100)
+
+a = [4, 3, 1, 2-2j, 2+2j, 2-1j, 2+1j, 2-1j, 2+1j, 1+1j, 1-1j]
+#print('_cplxreal: ', scipy.cplxreal(a))
+f = np.array([-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0], dtype=np.float)
+t = (0.9984772174419884+0.01125340518638924j)
+w = 'real'
+#print('nearest_real_complex_idx: ', scipy.nearest_real_complex_idx(f,t,w))
+
+
+nyquistRate = 48000 * 2
+centerFrequency_Hz = 480.0
+lowerCutoffFrequency_Hz = centerFrequency_Hz/math.sqrt(2)
+upperCutoffFrequenc_Hz = centerFrequency_Hz*math.sqrt(2)
+wn = np.array([ lowerCutoffFrequency_Hz, upperCutoffFrequenc_Hz])/nyquistRate
+
+z = [] 
+p = np.array([-0.1564344650402309+0.9876883405951379j, -0.4539904997395468+0.8910065241883679j, 
+-0.7071067811865476+0.7071067811865475j, -0.8910065241883679+0.4539904997395467j, -0.9876883405951379+0.1564344650402309j, 
+-0.9876883405951379-0.1564344650402309j, -0.8910065241883679-0.4539904997395467j, -0.7071067811865476-0.7071067811865475j, 
+-0.4539904997395468-0.8910065241883679j, -0.1564344650402309-0.9876883405951379j], dtype=np.complex) 
+k = 1
+wo = 0.1886352115099219
+
+print(scipy.lp2hp_zpk(z,p,k,wo))
+
+z = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float)
+p = np.array([-0.02950904840030544-0.1863127990340476j, -0.08563859394186457-0.1680752041469931j, 
+-0.1333852372292245-0.1333852372292244j, -0.1680752041469931-0.08563859394186453j, -0.1863127990340476-0.02950904840030543j,
+ -0.1863127990340476+0.02950904840030543j, -0.1680752041469931+0.08563859394186453j, -0.1333852372292245+0.1333852372292244j,
+  -0.08563859394186457+0.1680752041469931j, -0.02950904840030544+0.1863127990340476j], dtype=np.complex)
+k = 1.0
+fs = 2.0
+
+print(scipy.bilinear_zpk(z,p,k,fs)) 
+
+z = np.array([], dtype=np.float)
+p = np.array([-0.3826834323650898+0.9238795325112868j, 
+-0.9238795325112868+0.3826834323650898j, -0.9238795325112868-0.3826834323650898j, 
+-0.3826834323650898-0.9238795325112868j], dtype=np.complex)
+k = 1
+wo = 0.03141673402115484
+bw = 0.02221601345771878
+print(scipy.lp2bs_zpk(z, p, k, wo=wo, bw=bw))
+
+print(scipy.butter(N=4, Wn=wn, btype='bandpass', analog=False, output='ba'))
+print(scipy.butter(N=4, Wn=wn, btype='bandpass', analog=False, output='zpk'))
+print(scipy.butter(N=4, Wn=wn, btype='bandpass', analog=False, output='sos'))
+print(scipy.butter(N=4, Wn=wn, btype='bandstop', analog=False, output='ba'))
+print(scipy.butter(10, 15, 'lp', fs=1000, output='sos'))
+print(scipy.butter(10, 15, 'hp', fs=1000, output='sos'))
+
diff --git a/tulip/shared/ulab/test-common.sh b/tulip/shared/ulab/test-common.sh
new file mode 100755
index 000000000..45b9e94a6
--- /dev/null
+++ b/tulip/shared/ulab/test-common.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+set -e
+dims="$1"
+micropython="$2"
+for level1 in $(printf "%dd " $(seq 1 ${dims}))
+do
+    for level2 in numpy scipy utils complex; do
+        rm -f *.exp
+        if [ ! -d tests/"$level1"/"$level2" ]; then
+            break;
+        fi
+        for file in tests/"$level1"/"$level2"/*.py; do
+            if [ ! -f "$file"".exp"  ]; then
+                echo "" > "$file"".exp"
+            fi
+        done
+	    if ! env MICROPY_MICROPYTHON="$micropython" ./run-tests -d tests/"$level1"/"$level2"; then
+            for exp in *.exp; do
+                testbase=$(basename $exp .exp);
+                echo "\nFAILURE $testbase";
+                diff -u $testbase.exp $testbase.out;
+            done
+            exit 1
+	    fi
+    done
+done
+
diff --git a/tulip/shared/ulab/test-snippets.sh b/tulip/shared/ulab/test-snippets.sh
new file mode 100755
index 000000000..fcbbfc833
--- /dev/null
+++ b/tulip/shared/ulab/test-snippets.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+set -e
+micropython="$1"
+for level1 in numpy scipy;
+do
+    for level2 in core lib signal; do
+        rm -f *.exp
+	if ! env MICROPY_MICROPYTHON="$micropython" ./run-tests -d snippets/tests/"$level1"/"$level2"; then
+            for exp in *.exp; do
+                testbase=$(basename $exp .exp);
+                echo -e "\nFAILURE $testbase";
+                diff -u $testbase.exp $testbase.out;
+            done
+            exit 1
+	fi
+    done
+done
+
diff --git a/tulip/shared/ulab/tests/1d/complex/complex_exp.py b/tulip/shared/ulab/tests/1d/complex/complex_exp.py
new file mode 100644
index 000000000..979b5b8e1
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/complex/complex_exp.py
@@ -0,0 +1,17 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    print('\narray:\n', a)
+    print('\nexponential:\n', np.exp(a))
+
+b = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+print('\narray:\n', b)
+print('\nexponential:\n', np.exp(b))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/1d/complex/complex_sqrt.py b/tulip/shared/ulab/tests/1d/complex/complex_sqrt.py
new file mode 100644
index 000000000..aa709aef6
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/complex/complex_sqrt.py
@@ -0,0 +1,18 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    outtype = np.float if dtype is not np.complex else np.complex
+    print('\narray:\n', a)
+    print('\nsquare root:\n', np.sqrt(a, dtype=outtype))
+
+b = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+print('\narray:\n', b)
+print('\nsquare root:\n', np.sqrt(b, dtype=np.complex))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/1d/complex/imag_real.py b/tulip/shared/ulab/tests/1d/complex/imag_real.py
new file mode 100644
index 000000000..e05783b6b
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/complex/imag_real.py
@@ -0,0 +1,19 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(5), dtype=dtype)
+    print('real part: ', np.real(a))
+    print('imaginary part: ', np.imag(a))
+
+
+b = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+print('real part: ', np.real(b))
+print('imaginary part: ', np.imag(b))
+
diff --git a/tulip/shared/ulab/tests/1d/numpy/00smoke.py b/tulip/shared/ulab/tests/1d/numpy/00smoke.py
new file mode 100644
index 000000000..c75627399
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/00smoke.py
@@ -0,0 +1,3 @@
+from ulab import numpy as np
+
+print(np.ones(3))
diff --git a/tulip/shared/ulab/tests/1d/numpy/argminmax.py b/tulip/shared/ulab/tests/1d/numpy/argminmax.py
new file mode 100644
index 000000000..e2aa0bc99
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/argminmax.py
@@ -0,0 +1,62 @@
+from ulab import numpy as np
+
+# Adapted from https://docs.python.org/3.8/library/itertools.html#itertools.permutations
+def permutations(iterable, r=None):
+    # permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
+    # permutations(range(3)) --> 012 021 102 120 201 210
+    pool = tuple(iterable)
+    n = len(pool)
+    r = n if r is None else r
+    if r > n:
+        return
+    indices = list(range(n))
+    cycles = list(range(n, n-r, -1))
+    yield tuple(pool[i] for i in indices[:r])
+    while n:
+        for i in reversed(range(r)):
+            cycles[i] -= 1
+            if cycles[i] == 0:
+                indices[i:] = indices[i+1:] + indices[i:i+1]
+                cycles[i] = n - i
+            else:
+                j = cycles[i]
+                indices[i], indices[-j] = indices[-j], indices[i]
+                yield tuple(pool[i] for i in indices[:r])
+                break
+        else:
+            return
+
+# Combinations expected to throw
+try:
+    print(np.argmin([]))
+except ValueError:
+    print("ValueError")
+
+try:
+    print(np.argmax([]))
+except ValueError:
+    print("ValueError")
+
+# Combinations expected to succeed
+print(np.argmin([1]))
+print(np.argmax([1]))
+print(np.argmin(np.array([1])))
+print(np.argmax(np.array([1])))
+
+print()
+print("max tests")
+for p in permutations((100,200,300)):
+    m1 = np.argmax(p)
+    m2 = np.argmax(np.array(p))
+    print(p, m1, m2)
+    if m1 != m2 or p[m1] != max(p):
+        print("FAIL", p, m1, m2, max(p))
+
+print()
+print("min tests")
+for p in permutations((100,200,300)):
+    m1 = np.argmin(p)
+    m2 = np.argmin(np.array(p))
+    print(p, m1, m2)
+    if m1 != m2 or p[m1] != min(p):
+        print("FAIL", p, m1, m2, min(p))
diff --git a/tulip/shared/ulab/tests/1d/numpy/compare.py b/tulip/shared/ulab/tests/1d/numpy/compare.py
new file mode 100644
index 000000000..cd9fb98e4
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/compare.py
@@ -0,0 +1,13 @@
+from ulab import numpy as np
+
+a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)
+b = np.array([5, 4, 3, 2, 1], dtype=np.float)
+print(np.minimum(a, b))
+print(np.maximum(a, b))
+print(np.maximum(1, 5.5))
+
+a = np.array(range(9), dtype=np.uint8)
+print(np.clip(a, 3, 7))
+
+b = 3 * np.ones(len(a), dtype=np.float)
+print(np.clip(a, b, 7))
diff --git a/tulip/shared/ulab/tests/1d/numpy/convolve.py b/tulip/shared/ulab/tests/1d/numpy/convolve.py
new file mode 100644
index 000000000..93aa23f70
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/convolve.py
@@ -0,0 +1,15 @@
+import math
+
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+x = np.array((1,2,3))
+y = np.array((1,10,100,1000))
+result = (np.convolve(x, y))
+ref_result = np.array([1, 12, 123, 1230, 2300, 3000],dtype=np.float)
+cmp_result = []
+for p,q in zip(list(result), list(ref_result)):
+    cmp_result.append(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+print(cmp_result)
diff --git a/tulip/shared/ulab/tests/1d/numpy/fft.py b/tulip/shared/ulab/tests/1d/numpy/fft.py
new file mode 100644
index 000000000..6b79f74c8
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/fft.py
@@ -0,0 +1,45 @@
+import math
+try:
+    from ulab import numpy as np
+    use_ulab = True
+except ImportError:
+    import numpy as np
+    use_ulab = False
+
+x = np.linspace(-np.pi, np.pi, num=8)
+y = np.sin(x)
+
+if use_ulab:
+    if 'real' in dir(np):
+        a = np.fft.fft(y)
+        c = np.real(np.fft.ifft(a))
+    else:
+        a, b = np.fft.fft(y)
+        c, d = np.fft.ifft(a, b)
+    # c should be equal to y
+    cmp_result = []
+    for p,q in zip(list(y), list(c)):
+        cmp_result.append(math.isclose(p, q, rel_tol=1e-09, abs_tol=1e-09))
+    print(cmp_result)
+
+    z = np.zeros(len(x))
+    if 'real' in dir(np):
+        a = np.fft.fft(y)
+        c = np.real(np.fft.ifft(a))
+    else:
+        a, b = np.fft.fft(y, z)
+        c, d = np.fft.ifft(a, b)
+    # c should be equal to y
+    cmp_result = []
+    for p,q in zip(list(y), list(c)):
+        cmp_result.append(math.isclose(p, q, rel_tol=1e-09, abs_tol=1e-09))
+    print(cmp_result)
+
+else:
+    a = np.fft.fft(y)
+    c = np.fft.ifft(a)
+    # c should be equal to y
+    cmp_result = []
+    for p,q in zip(list(y), list(c.real)):
+        cmp_result.append(math.isclose(p, q, rel_tol=1e-09, abs_tol=1e-09))
+    print(cmp_result) 
diff --git a/tulip/shared/ulab/tests/1d/numpy/gc.py b/tulip/shared/ulab/tests/1d/numpy/gc.py
new file mode 100644
index 000000000..4dbf079cf
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/gc.py
@@ -0,0 +1,11 @@
+from ulab import numpy as np
+import gc
+
+data = np.ones(1000)[6:-6]
+print(sum(data))
+print(data)
+
+gc.collect()
+
+print(sum(data))
+print(data)
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/1d/numpy/interp.py b/tulip/shared/ulab/tests/1d/numpy/interp.py
new file mode 100644
index 000000000..09d3dc3c6
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/interp.py
@@ -0,0 +1,12 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+x = np.array([1, 2, 3, 4, 5])
+xp = np.array([1, 2, 3, 4])
+fp = np.array([1, 2, 3, 4])    
+print(np.interp(x, xp, fp))
+print(np.interp(x, xp, fp, left=0.0))
+print(np.interp(x, xp, fp, right=10.0))
+print(np.interp(x, xp, fp, left=0.0, right=10.0))
diff --git a/tulip/shared/ulab/tests/1d/numpy/optimize.py b/tulip/shared/ulab/tests/1d/numpy/optimize.py
new file mode 100644
index 000000000..fce86724c
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/optimize.py
@@ -0,0 +1,28 @@
+import math
+
+try:
+    from ulab import scipy as spy
+except ImportError:
+	import scipy as spy
+
+def f(x):
+    return x**2 - 2.0
+
+ref_result = 1.4142135623715149
+result = (spy.optimize.bisect(f, 1.0, 3.0))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+ref_result = -7.105427357601002e-15
+result = spy.optimize.fmin(f, 3.0, fatol=1e-15)
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+ref_result = -7.105427357601002e-15
+result = spy.optimize.fmin(f, 3.0, xatol=1e-8, fatol=1e-15, maxiter=500)
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+ref_result = 1.41421826342255
+result = (spy.optimize.newton(f, 3.0, tol=0.001, rtol=0.01))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (spy.optimize.newton(f, 3.0, tol=0.001, rtol=0.01, maxiter=100))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
diff --git a/tulip/shared/ulab/tests/1d/numpy/poly.py b/tulip/shared/ulab/tests/1d/numpy/poly.py
new file mode 100644
index 000000000..02ce7f5be
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/poly.py
@@ -0,0 +1,51 @@
+import math
+
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+    
+p = [1, 1, 1, 0]
+x = [0, 1, 2, 3, 4]
+result = np.polyval(p, x)
+ref_result = np.array([0, 3, 14, 39, 84])
+for i in range(len(x)):
+    print(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array(x)
+result = np.polyval(p, a)
+ref_result = np.array([0, 3, 14, 39, 84])
+for i in range(len(x)):
+    print(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+
+# linear fit
+x = np.linspace(-10, 10, 20)
+y = 1.5*x + 3
+result = np.polyfit(x, y, 1)
+ref_result = np.array([ 1.5, 3.0])
+for i in range(2):
+    print(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+
+# 2nd degree fit
+x = np.linspace(-10, 10, 20)
+y = x*x*2.5 - x*0.5 + 1.2
+result = np.polyfit(x, y, 2)
+ref_result = np.array([2.5,  -0.5, 1.2])
+for i in range(3):
+    print(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+
+# 3rd degree fit
+x = np.linspace(-10, 10, 20)
+y = x*x*x*1.255 + x*x*1.0 - x*0.75 + 0.0
+result = np.polyfit(x, y, 3)
+ref_result = np.array([1.255, 1.0, -0.75, 0.0])
+for i in range(4):
+    print(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+
+# 4th degree fit
+x = np.linspace(-10, 10, 20)
+y = x*x*x*x + x*x*x*1.255 + x*x*1.0 - x*0.75 + 0.0
+result = np.polyfit(x, y, 4)
+ref_result = np.array([1.0, 1.255, 1.0, -0.75, 0.0])
+for i in range(5):
+    print(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))    
diff --git a/tulip/shared/ulab/tests/1d/numpy/slicing.py b/tulip/shared/ulab/tests/1d/numpy/slicing.py
new file mode 100644
index 000000000..466c3b2c7
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/slicing.py
@@ -0,0 +1,23 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+for num in range(1,4):
+    for start in range(-num, num+1):
+        for end in range(-num, num+1):
+            for stride in (-3, -2, -1, 1, 2, 3):
+                l = list(range(num))
+                a = np.array(l, dtype=np.int8)
+                sl = l[start:end:stride]
+                ll = len(sl)
+                try:
+                    sa = list(a[start:end:stride])
+                except IndexError as e:
+                    sa = str(e)
+                print("%2d [% d:% d:% d]     %-24r %-24r%s" % (
+                    num, start, end, stride, sl, sa, " ***" if sa != sl else ""))
+
+                a[start:end:stride] = np.ones(len(sl)) * -1
+                print("%2d [% d:% d:% d]     %r" % (
+                    num, start, end, stride, list(a)))
diff --git a/tulip/shared/ulab/tests/1d/numpy/slicing2.py b/tulip/shared/ulab/tests/1d/numpy/slicing2.py
new file mode 100644
index 000000000..05b2d792d
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/slicing2.py
@@ -0,0 +1,8 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+a = np.array(range(9), dtype=np.float)
+print("a:\t", list(a))
+print("a < 5:\t", list(a[a < 5]))
diff --git a/tulip/shared/ulab/tests/1d/numpy/sum.py b/tulip/shared/ulab/tests/1d/numpy/sum.py
new file mode 100644
index 000000000..a0293136a
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/sum.py
@@ -0,0 +1,21 @@
+from ulab import numpy as np
+
+r = range(15)
+
+a = np.array(r, dtype=np.uint8)
+print(np.sum(a))
+
+a = np.array(r, dtype=np.int8)
+print(np.sum(a))
+
+a = np.array(r, dtype=np.uint16)
+print(np.sum(a))
+
+a = np.array(r, dtype=np.int16)
+print(np.sum(a))
+
+a = np.array(r, dtype=np.float)
+print(np.sum(a))
+
+a = np.array([False] + [True]*15, dtype=np.bool)
+print(np.sum(a))
diff --git a/tulip/shared/ulab/tests/1d/numpy/trapz.py b/tulip/shared/ulab/tests/1d/numpy/trapz.py
new file mode 100644
index 000000000..7060c12e1
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/trapz.py
@@ -0,0 +1,9 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+x = np.linspace(0, 9, num=10)
+y = x*x
+print(np.trapz(y))
+print(np.trapz(y, x=x))
diff --git a/tulip/shared/ulab/tests/1d/numpy/universal_functions.py b/tulip/shared/ulab/tests/1d/numpy/universal_functions.py
new file mode 100644
index 000000000..c41cf9481
--- /dev/null
+++ b/tulip/shared/ulab/tests/1d/numpy/universal_functions.py
@@ -0,0 +1,148 @@
+import math
+
+try:
+    from ulab import numpy as np
+    from ulab import scipy as spy
+except ImportError:
+    import numpy as np
+    import scipy as spy
+    
+result = (np.sin(np.pi/2))
+ref_result = 1.0
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.cos(np.pi/2))
+ref_result = 0.0
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.tan(np.pi/2))
+ref_result = 1.633123935319537e+16
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.sinh(np.pi/2))
+ref_result = 2.3012989023072947
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.cosh(np.pi/2))
+ref_result = 2.5091784786580567
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.tanh(np.pi/2))
+ref_result = 0.9171523356672744
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+ref_result = np.pi/2
+result = (np.asin(np.sin(np.pi/2)))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.acos(np.cos(np.pi/2)))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.atan(np.tan(np.pi/2)))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.cosh(np.acosh(np.pi/2)))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+result = (np.sinh(np.asinh(np.pi/2)))
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+    
+print(np.degrees(np.pi))
+print(np.radians(np.degrees(np.pi)))
+print(np.floor(np.pi))
+print(np.ceil(np.pi))
+print(np.sqrt(np.pi))
+print(np.exp(1))
+print(np.log(np.exp(1)))
+
+print(np.log2(2**1))
+
+print(np.log10(10**1))
+print(math.isclose(np.exp(1) - np.expm1(1), 1))
+
+x = np.array([-1, +1, +1, -1])
+y = np.array([-1, -1, +1, +1])
+result = (np.arctan2(y, x) * 180 / np.pi)
+ref_result = np.array([-135.0, -45.0, 45.0, 135.0], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+x = np.linspace(-2*np.pi, 2*np.pi, 5)
+result = np.sin(x)
+ref_result = np.array([2.4492936e-16, -1.2246468e-16,  0.0000000e+00,  1.2246468e-16, -2.4492936e-16], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = np.cos(x)
+ref_result = np.array([1., -1.,  1., -1.,  1.], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = np.tan(x)
+ref_result = np.array([2.4492936e-16, 1.2246468e-16, 0.0000000e+00, -1.2246468e-16, -2.4492936e-16], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = np.sinh(x)
+ref_result = np.array([-267.74489404, -11.54873936, 0., 11.54873936, 267.74489404], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)    
+
+result  = np.cosh(x)
+ref_result = np.array([267.74676148,  11.59195328, 1.0, 11.59195328, 267.74676148], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = np.tanh(x)
+ref_result = np.array([-0.9999930253396107, -0.99627207622075, 0.0, 0.99627207622075, 0.9999930253396107], dtype=np.float)
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = np.sinc(x)
+ref_result = np.array([0.03935584386392389, -0.04359862862918773, 1.0, -0.04359862862918773, 0.03935584386392389])
+cmp_result = []
+for i in range(len(x)):
+    cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = (spy.special.erf(np.linspace(-3, 3, num=5)))
+ref_result = np.array([-0.9999779095030014, -0.9661051464753108, 0.0, 0.9661051464753108, 0.9999779095030014], dtype=np.float)
+cmp_result = []
+for i in range(len(ref_result)):
+	cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = (spy.special.erfc(np.linspace(-3, 3, num=5)))
+ref_result = np.array([1.99997791e+00, 1.96610515e+00, 1.00000000e+00, 3.38948535e-02, 2.20904970e-05], dtype=np.float)                          
+cmp_result = []
+for i in range(len(ref_result)):
+	cmp_result.append(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
+print(cmp_result)
+
+result = (spy.special.gamma(np.array([0, 0.5, 1, 5])))
+ref_result = np.array([1.77245385,  1.0, 24.0])
+cmp_result = []
+cmp_result.append(math.isinf(result[0]))
+for i in range(len(ref_result)):
+	cmp_result.append(math.isclose(result[i+1], ref_result[i], rel_tol=1E-9, abs_tol=1E-9))
+print(cmp_result)
+
+result = (spy.special.gammaln([0, -1, -2, -3, -4]))
+cmp_result = []
+for i in range(len(ref_result)):
+	cmp_result.append(math.isinf(result[i]))
+print(cmp_result)
diff --git a/tulip/shared/ulab/tests/2d/complex/binary_op.py b/tulip/shared/ulab/tests/2d/complex/binary_op.py
new file mode 100644
index 000000000..36efa76f7
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/complex/binary_op.py
@@ -0,0 +1,26 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+n = 5
+a = np.array(range(n), dtype=np.complex)
+c = np.array(range(n), dtype=np.complex)
+
+print(a == c)
+print(a != c)
+print()
+
+c = np.array(range(n), dtype=np.complex) * 1j
+print(a == c)
+print(a != c)
+print()
+
+for dtype in dtypes:
+    b = np.array(range(n), dtype=dtype)
+    print(b == a)
+    print(b != a)
+    print()
+
diff --git a/tulip/shared/ulab/tests/2d/complex/complex_exp.py b/tulip/shared/ulab/tests/2d/complex/complex_exp.py
new file mode 100644
index 000000000..90b3adf72
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/complex/complex_exp.py
@@ -0,0 +1,24 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    b = a.reshape((2, 2))
+    print('\narray:\n', a)
+    print('\nexponential:\n', np.exp(a))
+    print('\narray:\n', b)
+    print('\nexponential:\n', np.exp(b))
+
+b = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+print('\narray:\n', b)
+print('\nexponential:\n', np.exp(b))
+
+b = np.array([[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], dtype=np.complex)
+print('\narray:\n', b)
+print('\nexponential:\n', np.exp(b))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/complex/complex_sqrt.py b/tulip/shared/ulab/tests/2d/complex/complex_sqrt.py
new file mode 100644
index 000000000..5baebb5f3
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/complex/complex_sqrt.py
@@ -0,0 +1,25 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    b = a.reshape((2, 2))
+    outtype = np.float if dtype is not np.complex else np.complex
+    print('\narray:\n', a)
+    print('\nsquare root:\n', np.sqrt(a, dtype=outtype))
+    print('\narray:\n', b)
+    print('\nsquare root:\n', np.sqrt(b, dtype=outtype))
+
+b = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+print('\narray:\n', b)
+print('\nsquare root:\n', np.sqrt(b, dtype=np.complex))
+
+b = np.array([[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], dtype=np.complex)
+print('\narray:\n', b)
+print('\nsquare root:\n', np.sqrt(b, dtype=np.complex))
diff --git a/tulip/shared/ulab/tests/2d/complex/conjugate.py b/tulip/shared/ulab/tests/2d/complex/conjugate.py
new file mode 100644
index 000000000..eafaf5747
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/complex/conjugate.py
@@ -0,0 +1,12 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    print(np.conjugate(np.array(range(5), dtype=dtype)))
+
+a = np.array([1, 2+2j, 3-3j, 4j], dtype=np.complex)
+print(np.conjugate(a))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/complex/imag_real.py b/tulip/shared/ulab/tests/2d/complex/imag_real.py
new file mode 100644
index 000000000..536d7297c
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/complex/imag_real.py
@@ -0,0 +1,28 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    b = a.reshape((2, 2))
+    print('\narray:\n', a)
+    print('\nreal part:\n', np.real(a))
+    print('\nimaginary part:\n', np.imag(a))
+    print('\narray:\n', b)
+    print('\nreal part:\n', np.real(b))
+    print('\nimaginary part:\n', np.imag(b), '\n')
+
+
+b = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+print('\nreal part:\n', np.real(b))
+print('\nimaginary part:\n', np.imag(b))
+
+b = np.array([[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], dtype=np.complex)
+print('\nreal part:\n', np.real(b))
+print('\nimaginary part:\n', np.imag(b))
+
diff --git a/tulip/shared/ulab/tests/2d/complex/sort_complex.py b/tulip/shared/ulab/tests/2d/complex/sort_complex.py
new file mode 100644
index 000000000..a4154730e
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/complex/sort_complex.py
@@ -0,0 +1,28 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    print(np.sort_complex(np.array(range(5, 0, -1), dtype=dtype)))
+    # these should all return an empty complex array
+    print(np.sort_complex(np.array(range(5, 0, 1), dtype=dtype)))
+
+print()
+n = 6
+a = np.array(range(n, 0, -1)) * 1j
+b = np.array([1] * n)
+print(np.sort_complex(a + b))
+
+a = np.array(range(n)) * 1j
+b = np.array([1] * n)
+print(np.sort_complex(a + b))
+
+print()
+a = np.array([0, -3j, 1+2j, 1-2j, 2j], dtype=np.complex)
+print(np.sort_complex(a))
+
+a = np.array([0, 3j, 1-2j, 1+2j, -2j], dtype=np.complex)
+print(np.sort_complex(a))
diff --git a/tulip/shared/ulab/tests/2d/numpy/00smoke.py b/tulip/shared/ulab/tests/2d/numpy/00smoke.py
new file mode 100644
index 000000000..bc7dcf8fd
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/00smoke.py
@@ -0,0 +1,3 @@
+from ulab import numpy as np
+
+print(np.eye(3))
diff --git a/tulip/shared/ulab/tests/2d/numpy/and.py b/tulip/shared/ulab/tests/2d/numpy/and.py
new file mode 100644
index 000000000..0c881a192
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/and.py
@@ -0,0 +1,21 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+for dtype_a in dtypes:
+    a = np.array(range(5), dtype=dtype_a)
+    for dtype_b in dtypes:
+        b = np.array(range(250, 255), dtype=dtype_b)
+        try:
+            print('a & b: ', a & b)
+        except Exception as e:
+            print(e)
+
+    b = np.array([False, True, False, True, False], dtype=np.bool)
+    try:
+        print('a & b (bool): ', a & b)
+    except Exception as e:
+        print(e)
diff --git a/tulip/shared/ulab/tests/2d/numpy/any_all.py b/tulip/shared/ulab/tests/2d/numpy/any_all.py
new file mode 100644
index 000000000..08788bc54
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/any_all.py
@@ -0,0 +1,11 @@
+from ulab import numpy as np
+
+a = np.array(range(12)).reshape((3, 4))
+
+print(np.all(a))
+print(np.all(a, axis=0))
+print(np.all(a, axis=1))
+
+print(np.any(a))
+print(np.any(a, axis=0))
+print(np.any(a, axis=1))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/arange.py b/tulip/shared/ulab/tests/2d/numpy/arange.py
new file mode 100644
index 000000000..79cd0b80f
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/arange.py
@@ -0,0 +1,25 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    print(np.arange(10, dtype=dtype))
+    print(np.arange(2, 10, dtype=dtype))
+    print(np.arange(2, 10, 3, dtype=dtype))
+    # test empty range
+    print(np.arange(0, 0, dtype=dtype))
+
+# test for ZeroDivisionError exception
+try:
+    np.arange(0, 10, 0)
+except ZeroDivisionError as e:
+    print('ZeroDivisionError: ', e)
+
+# test for NAN length exception
+try:
+    np.arange(0, np.nan)
+except ValueError as e:
+    print('ValueError: ', e)
diff --git a/tulip/shared/ulab/tests/2d/numpy/asarray.py b/tulip/shared/ulab/tests/2d/numpy/asarray.py
new file mode 100644
index 000000000..1166a05e3
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/asarray.py
@@ -0,0 +1,15 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    a = np.ones((2, 2), dtype=dtype)
+    print()
+    for _dtype in dtypes:
+        b = np.asarray(a, dtype=_dtype)
+        print('a: ', a)
+        print('b: ', b)
+        print('a is b: {}\n'.format(a is b))
diff --git a/tulip/shared/ulab/tests/2d/numpy/bitwise_and.py b/tulip/shared/ulab/tests/2d/numpy/bitwise_and.py
new file mode 100644
index 000000000..77b784d81
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/bitwise_and.py
@@ -0,0 +1,16 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+test_values1 = (0, 1, 0, 1, 2, 5)
+test_values2 = (0, 1, 1, 0, 2, 7)
+
+
+for dtype1 in dtypes:
+    x1 = np.array(test_values1, dtype=dtype1)
+    for dtype2 in dtypes:
+        x2 = np.array(test_values2, dtype=dtype2)
+        print(np.bitwise_and(x1, x2))
diff --git a/tulip/shared/ulab/tests/2d/numpy/bitwise_or.py b/tulip/shared/ulab/tests/2d/numpy/bitwise_or.py
new file mode 100644
index 000000000..6c13fccf8
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/bitwise_or.py
@@ -0,0 +1,16 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+test_values1 = (0, 1, 0, 1, 2, 5)
+test_values2 = (0, 1, 1, 0, 2, 7)
+
+
+for dtype1 in dtypes:
+    x1 = np.array(test_values1, dtype=dtype1)
+    for dtype2 in dtypes:
+        x2 = np.array(test_values2, dtype=dtype2)
+        print(np.bitwise_or(x1, x2))
diff --git a/tulip/shared/ulab/tests/2d/numpy/bitwise_xor.py b/tulip/shared/ulab/tests/2d/numpy/bitwise_xor.py
new file mode 100644
index 000000000..5e7af037c
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/bitwise_xor.py
@@ -0,0 +1,14 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+for dtype1 in dtypes:
+    x1 = np.array(range(5), dtype=dtype1)
+    for dtype2 in dtypes:
+        x2 = np.array(range(5, 0, -1), dtype=dtype2)
+
+        print(np.bitwise_xor(x1, x2))
diff --git a/tulip/shared/ulab/tests/2d/numpy/buffer.py b/tulip/shared/ulab/tests/2d/numpy/buffer.py
new file mode 100644
index 000000000..5cce5b9c7
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/buffer.py
@@ -0,0 +1,17 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+def print_as_buffer(a):
+    print(len(memoryview(a)), list(memoryview(a)))
+print_as_buffer(np.ones(3))
+print_as_buffer(np.zeros(3))
+print_as_buffer(np.eye(4))
+print_as_buffer(np.ones(1, dtype=np.int8))
+print_as_buffer(np.ones(2, dtype=np.uint8))
+print_as_buffer(np.ones(3, dtype=np.int16))
+print_as_buffer(np.ones(4, dtype=np.uint16))
+print_as_buffer(np.ones(5, dtype=np.float))
+print_as_buffer(np.linspace(0, 1, 9))
+
diff --git a/tulip/shared/ulab/tests/2d/numpy/cholesky.py b/tulip/shared/ulab/tests/2d/numpy/cholesky.py
new file mode 100644
index 000000000..beab3c1d5
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/cholesky.py
@@ -0,0 +1,14 @@
+from ulab import numpy as np
+
+a = np.array([[1, 2], [2, 5]])
+print(np.linalg.cholesky(a))
+
+b = a = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]])
+print(np.linalg.cholesky(b))
+
+c = np.array([[18, 22,  54,  42], [22, 70,  86,  62], [54, 86, 174, 134], [42, 62, 134, 106]])
+print(np.linalg.cholesky(c))
+
+
+
+
diff --git a/tulip/shared/ulab/tests/2d/numpy/concatenate.py b/tulip/shared/ulab/tests/2d/numpy/concatenate.py
new file mode 100644
index 000000000..1a7a440a5
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/concatenate.py
@@ -0,0 +1,30 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+# test input types; the following should raise ValueErrors
+objects = [([1, 2], [3, 4]),
+            ((1, 2), (3, 4)), 
+            (1, 2, 3)]
+
+for obj in objects:
+    try:
+        np.concatenate(obj)
+    except ValueError as e:
+        print('ValueError: {}; failed with object {}\n'.format(e, obj))
+
+
+a = np.array([1,2,3], dtype=np.float)
+b = np.array([4,5,6], dtype=np.float)
+
+print(np.concatenate((a,b)))
+print(np.concatenate((a,b), axis=0))
+
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.float)
+b = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.float)
+
+print(np.concatenate((a,b), axis=0))
+print(np.concatenate((a,b), axis=1))
+print(np.concatenate((b,a), axis=0))
+print(np.concatenate((b,a), axis=1))
diff --git a/tulip/shared/ulab/tests/2d/numpy/delete.py b/tulip/shared/ulab/tests/2d/numpy/delete.py
new file mode 100644
index 000000000..fdb96a876
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/delete.py
@@ -0,0 +1,28 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+np.set_printoptions(threshold=200)
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    a = np.array(range(25), dtype=dtype).reshape((5,5))
+    print(np.delete(a, [1, 2], axis=0))
+    print(np.delete(a, [1, 2], axis=1))
+    print(np.delete(a, [], axis=1))
+    print(np.delete(a, [1, 5, 10]))
+    print(np.delete(a, []))
+
+for dtype in dtypes:
+    a = np.array(range(25), dtype=dtype).reshape((5,5))
+    print(np.delete(a, 2, axis=0))
+    print(np.delete(a, 2, axis=1))
+    print(np.delete(a, 2))
+
+for dtype in dtypes:
+    a = np.array(range(25), dtype=dtype).reshape((5,5))
+    print(np.delete(a, -3, axis=0))
+    print(np.delete(a, -3, axis=1))
+    print(np.delete(a, -3))
diff --git a/tulip/shared/ulab/tests/2d/numpy/diag.py b/tulip/shared/ulab/tests/2d/numpy/diag.py
new file mode 100644
index 000000000..5eed16339
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/diag.py
@@ -0,0 +1,29 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+a = np.arange(25).reshape((5,5))
+
+print(np.diag(a))
+print(np.diag(a, k=2))
+print(np.diag(a, k=-2))
+print(np.diag(a, k=10))
+print(np.diag(a, k=-10))
+
+a = np.arange(4)
+
+print(np.diag(a))
+print(np.diag(a, k=2))
+print(np.diag(a, k=-2))
+
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    print(np.diag(a))
+
+for dtype in dtypes:
+    a = np.array(range(16), dtype=dtype).reshape((4,4))
+    print(np.diag(a))
diff --git a/tulip/shared/ulab/tests/2d/numpy/eye.py b/tulip/shared/ulab/tests/2d/numpy/eye.py
new file mode 100644
index 000000000..630eed4e0
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/eye.py
@@ -0,0 +1,30 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+print(np.ones(3))
+print(np.ones((3,3)))
+
+print(np.eye(3))
+print(np.eye(3, M=4))
+print(np.eye(3, M=4, k=0))
+print(np.eye(3, M=4, k=-1))
+print(np.eye(3, M=4, k=-2))
+print(np.eye(3, M=4, k=-3))
+print(np.eye(3, M=4, k=1))
+print(np.eye(3, M=4, k=2))
+print(np.eye(3, M=4, k=3))
+print(np.eye(4, M=4))
+print(np.eye(4, M=3, k=0))
+print(np.eye(4, M=3, k=-1))
+print(np.eye(4, M=3, k=-2))
+print(np.eye(4, M=3, k=-3))
+print(np.eye(4, M=3, k=1))
+print(np.eye(4, M=3, k=2))
+print(np.eye(4, M=3, k=3))
+
+for dtype in dtypes:
+    print(np.eye(3, dtype=dtype))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/full.py b/tulip/shared/ulab/tests/2d/numpy/full.py
new file mode 100644
index 000000000..474f5185e
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/full.py
@@ -0,0 +1,9 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    print(np.full((2, 4), 3, dtype=dtype))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/initialisation.py b/tulip/shared/ulab/tests/2d/numpy/initialisation.py
new file mode 100644
index 000000000..6136d5139
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/initialisation.py
@@ -0,0 +1,10 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype1 in dtypes:
+    for dtype2 in dtypes:
+        print(np.array(np.array(range(5), dtype=dtype1), dtype=dtype2))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/isinf.py b/tulip/shared/ulab/tests/2d/numpy/isinf.py
new file mode 100644
index 000000000..7beff9d6f
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/isinf.py
@@ -0,0 +1,24 @@
+
+from ulab import numpy as np
+
+print('isinf(0): ', np.isinf(0))
+
+a = np.array([1, 2, np.nan])
+print('\n' + '='*20)
+print('a:\n', a)
+print('\nisinf(a):\n', np.isinf(a))
+
+b = np.array([1, 2, np.inf])
+print('\n' + '='*20)
+print('b:\n', b)
+print('\nisinf(b):\n', np.isinf(b))
+
+c = np.array([1, 2, 3], dtype=np.uint16)
+print('\n' + '='*20)
+print('c:\n', c)
+print('\nisinf(c):\n', np.isinf(c))
+
+d = np.eye(5) * 1e999
+print('\n' + '='*20)
+print('d:\n', d)
+print('\nisinf(d):\n', np.isinf(d))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/left_shift.py b/tulip/shared/ulab/tests/2d/numpy/left_shift.py
new file mode 100644
index 000000000..5deb7701f
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/left_shift.py
@@ -0,0 +1,21 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+np.set_printoptions(threshold=100)
+
+
+shift_values = (
+    (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+    (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
+)
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+
+for shift_value in shift_values:
+    for dtype1 in dtypes:
+        x1 = np.array([0, 1, 2, 4, 8, 16, 32, 3, 5, 7, 11, 13], dtype=dtype1)
+        for dtype2 in dtypes:
+            x2 = np.array(shift_value, dtype=dtype2)
+            print(np.left_shift(x1, x2))
diff --git a/tulip/shared/ulab/tests/2d/numpy/linalg.py b/tulip/shared/ulab/tests/2d/numpy/linalg.py
new file mode 100644
index 000000000..ead6f1fee
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/linalg.py
@@ -0,0 +1,95 @@
+import math
+
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+def matrix_is_close(A, B, n):
+    # primitive (i.e., independent of other functions) check of closeness of two square matrices
+    for i in range(n):
+        for j in range(n):
+            print(math.isclose(A[i][j], B[i][j], rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array([1,2,3], dtype=np.int16)
+b = np.array([4,5,6], dtype=np.int16)
+ab = np.dot(a.transpose(), b)
+print(math.isclose(ab, 32.0, rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array([1,2,3], dtype=np.int16)
+b = np.array([4,5,6], dtype=np.float)
+ab = np.dot(a.transpose(), b)
+print(math.isclose(ab, 32.0, rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array([[1, 2], [3, 4]])
+b = np.array([[5, 6], [7, 8]])
+
+c = np.array([[19, 22], [43, 50]])
+matrix_is_close(np.dot(a, b), c, 2)
+
+c = np.array([[26, 30], [38, 44]])
+matrix_is_close(np.dot(a.transpose(), b), c, 2)
+
+c = np.array([[17, 23], [39, 53]])
+matrix_is_close(np.dot(a, b.transpose()), c, 2)
+
+c = np.array([[23, 31], [34, 46]])
+matrix_is_close(np.dot(a.transpose(), b.transpose()), c, 2)
+
+a = np.array([[1., 2.], [3., 4.]])
+b = np.linalg.inv(a)
+ab = np.dot(a, b)
+c = np.eye(2)
+matrix_is_close(ab, c, 2)
+
+a = np.array([[1, 2, 3, 4], [4, 5, 6, 4], [7, 8.6, 9, 4], [3, 4, 5, 6]])
+b = np.linalg.inv(a)
+ab = np.dot(a, b)
+c = np.eye(4)
+matrix_is_close(ab, c, 4)
+
+a = np.array([[1, 2, 3, 4], [4, 5, 6, 4], [7, 8.6, 9, 4], [3, 4, 5, 6]])
+result = (np.linalg.det(a))
+ref_result = 7.199999999999995
+print(math.isclose(result, ref_result, rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array([1, 2, 3])
+w, v = np.linalg.eig(np.diag(a))
+for i in range(3):
+    print(math.isclose(w[i], a[i], rel_tol=1E-9, abs_tol=1E-9))
+for i in range(3):
+    for j in range(3):
+        if i == j:
+            print(math.isclose(v[i][j], 1.0, rel_tol=1E-9, abs_tol=1E-9))
+        else:
+            print(math.isclose(v[i][j], 0.0, rel_tol=1E-9, abs_tol=1E-9))
+
+
+a = np.array([[25, 15, -5], [15, 18,  0], [-5,  0, 11]])
+result = (np.linalg.cholesky(a))
+ref_result = np.array([[5.,  0.,  0.], [ 3.,  3.,  0.], [-1.,  1.,  3.]])
+for i in range(3):
+    for j in range(3):
+        print(math.isclose(result[i][j], ref_result[i][j], rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array([1,2,3,4,5], dtype=np.float)
+result = (np.linalg.norm(a))
+ref_result = 7.416198487095663
+print(math.isclose(result, ref_result, rel_tol=1E-9, abs_tol=1E-9))
+
+a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+result = (np.linalg.norm(a))  ## Here is a problem
+ref_result = 16.881943016134134
+print(math.isclose(result, ref_result, rel_tol=1E-6, abs_tol=1E-6))
+
+a = np.array([[0, 1, 2], [3, 4 ,5], [5, 4, 8], [4, 4, 8] ], dtype=np.int16)
+result = (np.linalg.norm(a,axis=0)) # fails on low tolerance
+ref_result = np.array([7.071068, 7.0, 12.52996])
+for i in range(3):
+        print(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
+
+a = np.array([[0, 1, 2], [3, 4 ,5], [5, 4, 8], [4, 4, 8] ], dtype=np.int16)
+result = (np.linalg.norm(a,axis=1)) # fails on low tolerance
+ref_result = np.array([2.236068, 7.071068, 10.24695, 9.797959])
+for i in range(4):
+        print(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
diff --git a/tulip/shared/ulab/tests/2d/numpy/linspace.py b/tulip/shared/ulab/tests/2d/numpy/linspace.py
new file mode 100644
index 000000000..c97199a2d
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/linspace.py
@@ -0,0 +1,10 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    print(np.linspace(0, 10, num=5, dtype=dtype))
+    print(np.linspace(0, 10, num=5, endpoint=True, dtype=dtype))
diff --git a/tulip/shared/ulab/tests/2d/numpy/load_save.py b/tulip/shared/ulab/tests/2d/numpy/load_save.py
new file mode 100644
index 000000000..6fb9d2a35
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/load_save.py
@@ -0,0 +1,14 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    a = np.array(range(25), dtype=dtype)
+    b = a.reshape((5, 5))
+    np.save('out.npy', a)
+    print(np.load('out.npy'))
+    np.save('out.npy', b)
+    print(np.load('out.npy'))
diff --git a/tulip/shared/ulab/tests/2d/numpy/loadtxt.py b/tulip/shared/ulab/tests/2d/numpy/loadtxt.py
new file mode 100644
index 000000000..f08a91649
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/loadtxt.py
@@ -0,0 +1,37 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+a = np.array(range(8)).reshape((2, 4))
+np.savetxt('loadtxt.dat', a, header='test file data')
+
+print(np.loadtxt('loadtxt.dat'))
+print()
+
+for dtype in dtypes:
+    print(np.loadtxt('loadtxt.dat', dtype=dtype))
+    print()
+
+np.savetxt('loadtxt.dat', a, delimiter=',', header='test file data')
+
+print(np.loadtxt('loadtxt.dat', delimiter=','))
+print()
+
+np.savetxt('loadtxt.dat', a, delimiter=',', comments='!', header='test file data')
+
+print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!'))
+print()
+print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=1))
+print()
+print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=(0, 1)))
+print()
+
+a = np.array(range(36)).reshape((9, 4))
+np.savetxt('loadtxt.dat', a, header='9 data rows and a comment')
+print(np.loadtxt('loadtxt.dat', max_rows=5))
+
+print()
+print(np.loadtxt('loadtxt.dat', skiprows=5, dtype=np.uint16))
diff --git a/tulip/shared/ulab/tests/2d/numpy/logspace.py b/tulip/shared/ulab/tests/2d/numpy/logspace.py
new file mode 100644
index 000000000..e6f2047ba
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/logspace.py
@@ -0,0 +1,10 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    print(np.logspace(0, 10, num=5, endpoint=False, dtype=dtype))
+    print(np.logspace(0, 10, num=5, endpoint=True, dtype=dtype))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/methods.py b/tulip/shared/ulab/tests/2d/numpy/methods.py
new file mode 100644
index 000000000..1c687d13c
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/methods.py
@@ -0,0 +1,44 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+a = np.array([1, 2, 3, 4], dtype=np.int8)
+b = a.copy()
+print(b)
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.int16)
+b = a.copy()
+print(b)
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.float)
+b = a.copy()
+print(b)
+print(a.dtype)
+print(a.flatten())
+print(np.array([1,2,3], dtype=np.uint8).itemsize)
+print(np.array([1,2,3], dtype=np.uint16).itemsize)
+print(np.array([1,2,3], dtype=np.int8).itemsize)
+print(np.array([1,2,3], dtype=np.int16).itemsize)
+print(np.array([1,2,3], dtype=np.float).itemsize)
+print(np.array([1,2,3], dtype=np.float).shape)
+print(np.array([[1],[2],[3]], dtype=np.float).shape)
+print(np.array([[1],[2],[3]], dtype=np.float).reshape((1,3)))
+print(np.array([[1],[2],[3]]).size)
+print(np.array([1,2,3], dtype=np.float).size)
+print(np.array([1,2,3], dtype=np.uint8).tobytes())
+print(np.array([1,2,3], dtype=np.int8).tobytes())
+print(np.array([1,2,3], dtype=np.float).transpose().shape)
+print(np.array([[1],[2],[3]], dtype=np.float).transpose().shape)
+a = np.array([1, 2, 3, 4, 5, 6], dtype=np.uint8)
+b = a.byteswap(inplace=False)
+print(a)
+print(b)
+c = a.byteswap(inplace=True)
+print(a)
+print(c)
+a = np.array([1, 2, 3, 4, 5, 6], dtype=np.uint16)
+b = a.byteswap(inplace=False)
+print(a)
+print(b)
+c = a.byteswap(inplace=True)
+print(a)
+print(c)
diff --git a/tulip/shared/ulab/tests/2d/numpy/nonzero.py b/tulip/shared/ulab/tests/2d/numpy/nonzero.py
new file mode 100644
index 000000000..510ef4156
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/nonzero.py
@@ -0,0 +1,16 @@
+try:
+   from ulab import numpy as np
+except:
+   import numpy as np
+
+array = np.array(range(16)).reshape((4,4))
+print(array)
+print(array < 5)
+print(np.nonzero(array < 5))
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    array = (np.arange(2, 12, 3, dtype=dtype)).reshape((2,2)) - 2
+    print(array)
+    print(np.nonzero(array))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/numericals.py b/tulip/shared/ulab/tests/2d/numpy/numericals.py
new file mode 100644
index 000000000..909929f0a
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/numericals.py
@@ -0,0 +1,214 @@
+import math
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+print("Testing np.min:")
+print(np.min([1]))
+print(np.min(np.array([1], dtype=np.float)))
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.uint8)
+print(np.min(a))
+print(np.min(a, axis=0))
+print(np.min(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint8)
+print(np.min(a))
+print(np.min(a, axis=0))
+print(np.min(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int8)
+print(np.min(a)) ## Problem here
+print(np.min(a, axis=0))
+print(np.min(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint16)
+print(np.min(a))
+print(np.min(a, axis=0))
+print(np.min(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int16)
+print(np.min(a))
+print(np.min(a, axis=0))
+print(np.min(a, axis=1))
+a = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.min(a))
+print(np.min(a, axis=0))
+print(np.min(a, axis=1))
+
+print("Testing np.max:")
+print(np.max([1]))
+print(np.max(np.array([1], dtype=np.float)))
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.uint8)
+print(np.max(a))
+print(np.max(a, axis=0))
+print(np.max(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint8)
+print(np.max(a))
+print(np.max(a, axis=0))
+print(np.max(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int8)
+print(np.max(a)) ## Problem here
+print(np.max(a, axis=0))
+print(np.max(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint16)
+print(np.max(a))
+print(np.max(a, axis=0))
+print(np.max(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int16)
+print(np.max(a))
+print(np.max(a, axis=0))
+print(np.max(a, axis=1))
+a = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.max(a))
+print(np.max(a, axis=0))
+print(np.max(a, axis=1))
+
+print("Testing np.argmin:")
+print(np.argmin([1]))
+print(np.argmin(np.array([1], dtype=np.float)))
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.uint8)
+print(np.argmin(a))
+print(np.argmin(a, axis=0))
+print(np.argmin(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint8)
+print(np.argmin(a))
+print(np.argmin(a, axis=0))
+print(np.argmin(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int8)
+print(np.argmin(a)) ## Problem here
+print(np.argmin(a, axis=0))
+print(np.argmin(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint16)
+print(np.argmin(a))
+print(np.argmin(a, axis=0))
+print(np.argmin(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int16)
+print(np.argmin(a))
+print(np.argmin(a, axis=0))
+print(np.argmin(a, axis=1))
+a = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.argmin(a))
+print(np.argmin(a, axis=0))
+print(np.argmin(a, axis=1))
+
+print("Testing np.argmax:")
+print(np.argmax([1]))
+print(np.argmax(np.array([1], dtype=np.float)))
+a = np.array([[1,2,3],[4,5,6],[7,8,9]], dtype=np.uint8)
+print(np.argmax(a))
+print(np.argmax(a, axis=0))
+print(np.argmax(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint8)
+print(np.argmax(a))
+print(np.argmax(a, axis=0))
+print(np.argmax(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int8)
+print(np.argmax(a)) ## Problem here
+print(np.argmax(a, axis=0))
+print(np.argmax(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.uint16)
+print(np.argmax(a))
+print(np.argmax(a, axis=0))
+print(np.argmax(a, axis=1))
+a = np.array([range(255-5, 255),range(240-5, 240),range(250-5,250)], dtype=np.int16)
+print(np.argmax(a))
+print(np.argmax(a, axis=0))
+print(np.argmax(a, axis=1))
+a = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.argmax(a))
+print(np.argmax(a, axis=0))
+print(np.argmax(a, axis=1))
+
+print("Testing np.minimum:")
+print(np.minimum(10, 9))
+print(np.minimum(10.0, 9.0))
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.float)
+b = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.minimum(a, b))
+
+print("Testing np.maximum:")
+print(np.maximum(a, b))
+print(np.maximum(10, 9))
+print(np.maximum(10.0, 9.0))
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.float)
+b = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.maximum(a, b))
+
+print("Testing np.sort:")
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.uint8)
+b = np.array([range(2**56-3, 2**56),range(2**16-3, 2**16),range(2**8-3, 2**8)], dtype=np.float)
+print(np.sort(a, axis=None))
+print(np.sort(b, axis=None))
+print(np.sort(a, axis=0))
+print(np.sort(b, axis=0))
+print(np.sort(a, axis=1))
+print(np.sort(b, axis=1))
+
+print("Testing np.sum:")
+a = np.array([253, 254, 255], dtype=np.uint8)
+print(np.sum(a))
+print(np.sum(a, axis=0))
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.float)
+print(np.sum(a))
+print(np.sum(a, axis=0))
+print(np.sum(a, axis=1))
+
+print("Testing np.mean:")
+a = np.array([253, 254, 255], dtype=np.uint8)
+print(np.mean(a))
+print(np.mean(a, axis=0))
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.float)
+#print(np.mean(a))
+print(math.isclose(np.mean(a), 246.3333333333333, rel_tol=1e-06, abs_tol=1e-06))
+#print(np.mean(a, axis=0))
+result = np.mean(a, axis=0)
+ref_result = [245.33333333, 246.33333333, 247.33333333]
+for p, q in zip(list(result), ref_result):
+    print(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+
+#print(np.mean(a, axis=1))
+result = np.mean(a, axis=1)
+ref_result = [253., 238., 248.]
+for p, q in zip(list(result), ref_result):
+    print(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+
+print("Testing np.std:")
+a = np.array([253, 254, 255], dtype=np.uint8)
+#print(np.std(a))
+print(math.isclose(np.std(a), 0.816496580927726, rel_tol=1e-06, abs_tol=1e-06))
+print(math.isclose(np.std(a, axis=0), 0.816496580927726, rel_tol=1e-06, abs_tol=1e-06))
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.float)
+#print(np.std(a))
+print(math.isclose(np.std(a), 6.289320754704403, rel_tol=1e-06, abs_tol=1e-06))
+#print(np.std(a, axis=0))
+result = np.std(a, axis=0)
+ref_result = [6.23609564, 6.23609564, 6.23609564]
+for p, q in zip(list(result), ref_result):
+    print(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+
+#print(np.std(a, axis=1))
+result = np.std(a, axis=1)
+ref_result = [0.81649658, 0.81649658, 0.81649658]
+for p, q in zip(list(result), ref_result):
+    print(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+
+print("Testing np.median:")
+a = np.array([253, 254, 255], dtype=np.uint8)
+print(np.median(a))
+print(np.median(a, axis=0))
+a = np.array([range(255-3, 255),range(240-3, 240),range(250-3,250)], dtype=np.float)
+print(np.median(a))
+print(np.median(a, axis=0))
+print(np.median(a, axis=1))
+print("Testing np.roll:")  ## Here is problem
+print(np.arange(10))
+print(np.roll(np.arange(10), 2))
+print(np.roll(np.arange(10), -2))
+a = np.array([1, 2, 3, 4, 5, 6, 7, 8])
+print(np.roll(a, 2))
+print(np.roll(a, -2))
+print("Testing np.clip:")
+print(np.clip(5, 3, 6))  ## Here is problem
+print(np.clip(7, 3, 6))
+print(np.clip(1, 3, 6))
+a = np.array([1,2,3,4,5,6,7], dtype=np.int8)
+print(np.clip(a, 3, 5))
+a = np.array([1,2,3,4,5,6,7], dtype=np.float)
+print(np.clip(a, 3, 5))
diff --git a/tulip/shared/ulab/tests/2d/numpy/ones.py b/tulip/shared/ulab/tests/2d/numpy/ones.py
new file mode 100644
index 000000000..f0aee8682
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/ones.py
@@ -0,0 +1,13 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+print(np.ones(3))
+print(np.ones((3,3)))
+
+for dtype in dtypes:
+    print(np.ones((3,3), dtype=dtype))
+    print(np.ones((4,2), dtype=dtype))
diff --git a/tulip/shared/ulab/tests/2d/numpy/operators.py b/tulip/shared/ulab/tests/2d/numpy/operators.py
new file mode 100644
index 000000000..42d004326
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/operators.py
@@ -0,0 +1,181 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+  
+print(len(np.array([1, 2, 3, 4, 5], dtype=np.uint8)))
+print(len(np.array([[1, 2, 3],[4, 5, 6]])))
+
+print(~np.array([0, -1, -100], dtype=np.uint8))
+print(~np.array([0, -1, -100], dtype=np.uint16))
+print(~np.array([0, -1, -100], dtype=np.int8))
+print(~np.array([0, -1, -100], dtype=np.int16))
+
+print(abs(np.array([0, -1, -100], dtype=np.uint8)))
+print(abs(np.array([0, -1, -100], dtype=np.uint16)))
+print(abs(np.array([0, -1, -100], dtype=np.int8)))
+print(abs(np.array([0, -1, -100], dtype=np.int16)))
+print(abs(np.array([0, -1, -100], dtype=np.float)))
+
+print(-(np.array([0, -1, -100], dtype=np.uint8)))
+print(-(np.array([0, -1, -100], dtype=np.uint16)))
+print(-(np.array([0, -1, -100], dtype=np.int8)))
+print(-(np.array([0, -1, -100], dtype=np.int16)))
+print(-(np.array([0, -1, -100], dtype=np.float)))
+
+print(+(np.array([0, -1, -100], dtype=np.uint8)))
+print(+(np.array([0, -1, -100], dtype=np.uint16)))
+print(+(np.array([0, -1, -100], dtype=np.int8)))
+print(+(np.array([0, -1, -100], dtype=np.int16)))
+print(+(np.array([0, -1, -100], dtype=np.float)))
+
+print(np.array([1,2,3], dtype=np.float) > np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) > np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) > np.array([4,5,6], dtype=np.int16))
+print(np.array([1,2,3], dtype=np.float) < np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) < np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) < np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) >= np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) >= np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) >= np.array([4,5,6], dtype=np.int16))
+print(np.array([1,2,3], dtype=np.float) <= np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) <= np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) <= np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) > 4)
+print(np.array([1,2,3], dtype=np.float) > 4.0)
+print(np.array([1,2,3], dtype=np.float) < 4)
+print(np.array([1,2,3], dtype=np.float) < 4.0)
+
+print(np.array([1,2,3], dtype=np.float) == np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) == np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) == np.array([4,5,6], dtype=np.int16))
+print(np.array([1,2,3], dtype=np.float) != np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) != np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) != np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) == 4)
+print(np.array([1,2,3], dtype=np.float) == 4.0)
+print(np.array([1,2,3], dtype=np.float) != 4)
+print(np.array([1,2,3], dtype=np.float) != 4.0)
+
+print(np.array([1,2,3], dtype=np.float) - np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) - np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) - np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) + np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) + np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) + np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) * np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) * np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) * np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) ** np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) ** np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) ** np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) / np.array([4,5,6], dtype=np.float))
+print(np.array([1,2,3], dtype=np.float) / np.array([4,5,6], dtype=np.uint16))
+print(np.array([1,2,3], dtype=np.float) / np.array([4,5,6], dtype=np.int16))
+
+print(np.array([10,20,30], dtype=np.float) // np.array([4,5,6], dtype=np.float))
+print(np.array([10,20,30], dtype=np.float) // np.array([4,5,6], dtype=np.uint16))
+print(np.array([10,20,30], dtype=np.float) // np.array([4,5,6], dtype=np.int16))
+
+print(np.array([1,2,3], dtype=np.float) - 4)
+print(np.array([1,2,3], dtype=np.float) - 4.0)
+print(np.array([1,2,3], dtype=np.float) + 4)
+print(np.array([1,2,3], dtype=np.float) + 4.0)
+
+print(np.array([1,2,3], dtype=np.float) * 4)
+print(np.array([1,2,3], dtype=np.float) * 4.0)
+print(np.array([1,2,3], dtype=np.float) ** 4)
+print(np.array([1,2,3], dtype=np.float) ** 4.0)
+
+print(np.array([1,2,3], dtype=np.float) / 4)
+print(np.array([1,2,3], dtype=np.float) / 4.0)
+print(np.array([10,20,30], dtype=np.float) // 4)
+print(np.array([10,20,30], dtype=np.float) // 4.0)
+print(np.array([10,20,30], dtype=np.int8) // 4)
+print(np.array([10,20,30], dtype=np.int8) // 4.0)
+print(np.array([10,20,30], dtype=np.uint16) // 4)
+print(np.array([10,20,30], dtype=np.uint16) // 4.0)
+print(np.array([10,20,30], dtype=np.int16) // 4)
+print(np.array([10,20,30], dtype=np.int16) // 4.0)
+
+a = np.array([1,2,3], dtype=np.float)
+a -= np.array([4,5,6], dtype=np.float)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a -= np.array([4,5,6], dtype=np.uint16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a -= np.array([4,5,6], dtype=np.int16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a += np.array([4,5,6], dtype=np.float)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a += np.array([4,5,6], dtype=np.uint16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a += np.array([4,5,6], dtype=np.int16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a *= np.array([4,5,6], dtype=np.float)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a *= np.array([4,5,6], dtype=np.uint16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a *= np.array([4,5,6], dtype=np.int16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+#a /= np.array([4,5,6])
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a **= np.array([4,5,6], dtype=np.float)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a **= np.array([4,5,6], dtype=np.uint16)
+print(a)
+
+a = np.array([1,2,3], dtype=np.float)
+a **= np.array([4,5,6], dtype=np.int16)
+print(a)
+
+print(np.array([1,2,3],dtype=np.uint8) + np.array([4,5,6],dtype=np.uint8))
+print(np.array([1,2,3],dtype=np.uint8) + np.array([4,5,6],dtype=np.int8))
+print(np.array([1,2,3],dtype=np.int8) + np.array([4,5,6],dtype=np.int8))
+print(np.array([1,2,3],dtype=np.uint8) + np.array([4,5,6],dtype=np.uint16))
+print(np.array([1,2,3],dtype=np.int8) + np.array([4,5,6],dtype=np.uint16))
+print(np.array([1,2,3],dtype=np.uint8) + np.array([4,5,6],dtype=np.int16))
+print(np.array([1,2,3],dtype=np.int8) + np.array([4,5,6],dtype=np.int16))
+print(np.array([1,2,3],dtype=np.uint16) + np.array([4,5,6],dtype=np.uint16))
+print(np.array([1,2,3],dtype=np.int16) + np.array([4,5,6],dtype=np.int16))
+print(np.array([1,2,3],dtype=np.int16) + np.array([4,5,6],dtype=np.uint16))
+
+print(np.array([1,2,3],dtype=np.uint8) + np.array([4,5,6],dtype=np.float))
+print(np.array([1,2,3],dtype=np.int8) + np.array([4,5,6],dtype=np.float))
+print(np.array([1,2,3],dtype=np.uint16) + np.array([4,5,6],dtype=np.float))
+print(np.array([1,2,3],dtype=np.int16) + np.array([4,5,6],dtype=np.float))
+print(np.array([1,2,3],dtype=np.int16) + np.array([4,5,6],dtype=np.float))
+
+a = np.array([1, 2, 3, 4, 5], dtype=np.uint8)
+for i, _a in enumerate(a):
+    print("element %d in a:"%i, _a)
+
diff --git a/tulip/shared/ulab/tests/2d/numpy/or.py b/tulip/shared/ulab/tests/2d/numpy/or.py
new file mode 100644
index 000000000..5788843e9
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/or.py
@@ -0,0 +1,21 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+for dtype_a in dtypes:
+    a = np.array(range(5), dtype=dtype_a)
+    for dtype_b in dtypes:
+        b = np.array(range(250, 255), dtype=dtype_b)
+        try:
+            print('a | b: ', a | b)
+        except Exception as e:
+            print(e)
+
+    b = np.array([False, True, False, True, False], dtype=np.bool)
+    try:
+        print('a | b (bool): ', a | b)
+    except Exception as e:
+        print(e)
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/reshape.py b/tulip/shared/ulab/tests/2d/numpy/reshape.py
new file mode 100644
index 000000000..7f4add6aa
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/reshape.py
@@ -0,0 +1,17 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    print()
+    print('=' * 50)
+    a = np.array(range(12), dtype=dtype).reshape((3, 4))
+    print(a)
+    b = a[0,:]
+    print(b.reshape((1,4)))
+    b = a[:,0]
+    print(b.reshape((1,3)))
+
diff --git a/tulip/shared/ulab/tests/2d/numpy/right_shift.py b/tulip/shared/ulab/tests/2d/numpy/right_shift.py
new file mode 100644
index 000000000..f5099f343
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/right_shift.py
@@ -0,0 +1,21 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+np.set_printoptions(threshold=100)
+
+
+shift_values = (
+    (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+    (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
+    (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
+)
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+
+for shift_value in shift_values:
+    for dtype1 in dtypes:
+        x1 = np.array([0, 1, 2, 4, 8, 16, 32, 3, 5, 7, 11, 13], dtype=dtype1)
+        for dtype2 in dtypes:
+            x2 = np.array(shift_value, dtype=dtype2)
+            print(np.right_shift(x1, x2))
diff --git a/tulip/shared/ulab/tests/2d/numpy/savetxt.py b/tulip/shared/ulab/tests/2d/numpy/savetxt.py
new file mode 100644
index 000000000..857c910e6
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/savetxt.py
@@ -0,0 +1,38 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+a = np.array(range(9))
+
+print('savetxt with linear arrays')
+np.savetxt('savetxt.dat', a)
+
+with open('savetxt.dat', 'r') as fin:
+    print(fin.read())
+
+a = a.reshape((3, 3))
+
+print('savetxt with no keyword arguments')
+np.savetxt('savetxt.dat', a)
+
+with open('savetxt.dat', 'r') as fin:
+    print(fin.read())
+
+print('savetxt with delimiter')
+np.savetxt('savetxt.dat', a, delimiter=',')
+
+with open('savetxt.dat', 'r') as fin:
+    print(fin.read())
+
+print('savetxt with header')
+np.savetxt('savetxt.dat', a, header='column1 column2 column3')
+
+with open('savetxt.dat', 'r') as fin:
+    print(fin.read())
+
+print('savetxt with footer')
+np.savetxt('savetxt.dat', a, footer='written data file')
+
+with open('savetxt.dat', 'r') as fin:
+    print(fin.read())
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/signal.py b/tulip/shared/ulab/tests/2d/numpy/signal.py
new file mode 100644
index 000000000..d7a6412be
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/signal.py
@@ -0,0 +1,37 @@
+import math
+try:
+    from ulab import numpy as np
+    from ulab import scipy as spy
+except ImportError:
+    import numpy as np
+    import scipy as spy
+
+x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.float)
+sos = np.array([[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]],dtype=np.float)
+result = spy.signal.sosfilt(sos, x)
+
+ref_result = np.array([0.0000e+00, 1.0000e+00, -4.0000e+00, 2.4000e+01, -1.0400e+02, 4.4000e+02, -1.7280e+03,  6.5320e+03, -2.3848e+04, 8.4864e+04], dtype=np.float)
+cmp_result = []
+for p,q in zip(list(result), list(ref_result)):
+    cmp_result.append(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+print(cmp_result)
+
+x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+sos = np.array([[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]],dtype=np.float)
+zi = np.array([[1, 2], [3, 4]],dtype=np.float)
+y, zo = spy.signal.sosfilt(sos, x, zi=zi)
+
+y_ref = np.array([ 4.00000e+00, -1.60000e+01,  6.30000e+01, -2.27000e+02, 8.03000e+02, -2.75100e+03,  9.27100e+03, -3.07750e+04, 1.01067e+05, -3.28991e+05], dtype=np.float)
+zo_ref = np.array([[37242.0, 74835.],[1026187.0, 1936542.0]], dtype=np.float)
+cmp_result = []
+for p,q in zip(list(y), list(y_ref)):
+    cmp_result.append(math.isclose(p, q, rel_tol=1e-06, abs_tol=1e-06))
+print(cmp_result)
+
+cmp_result = []
+for i in range(2):
+    temp = []
+    for j in range(2):
+        temp.append(math.isclose(zo[i][j], zo_ref[i][j], rel_tol=1E-9, abs_tol=1E-9))
+    cmp_result.append(temp)
+print(cmp_result)
diff --git a/tulip/shared/ulab/tests/2d/numpy/size.py b/tulip/shared/ulab/tests/2d/numpy/size.py
new file mode 100644
index 000000000..636bf1f2d
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/size.py
@@ -0,0 +1,10 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+a = np.zeros((3, 4))
+
+print(np.size(a, axis=0))
+print(np.size(a, axis=1))
+print(np.size(a, axis=None))
diff --git a/tulip/shared/ulab/tests/2d/numpy/sort.py b/tulip/shared/ulab/tests/2d/numpy/sort.py
new file mode 100644
index 000000000..d91bb6fde
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/sort.py
@@ -0,0 +1,18 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    print()
+    a = np.array([], dtype=dtype)
+    print(np.sort(a, axis=0))
+    print(list(np.argsort(a, axis=0)))
+
+    a = np.array([4, 1, 3, 2], dtype=dtype)
+    print(np.sort(a, axis=0))
+    print(list(np.argsort(a, axis=0)))
+
+
diff --git a/tulip/shared/ulab/tests/2d/numpy/take.py b/tulip/shared/ulab/tests/2d/numpy/take.py
new file mode 100644
index 000000000..3bf5ab8b0
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/take.py
@@ -0,0 +1,30 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+print('flattened array')
+for dtype in dtypes:
+    a = np.array(range(12), dtype=dtype).reshape((3, 4))
+    print(np.take(a, (0, 10)))
+
+print('\n1D arrays')
+for dtype in dtypes:
+    a = np.array(range(12), dtype=dtype)
+    print('\na:', a)
+    indices = (0, 2, 2, 1)
+    print(np.take(a, indices))
+    indices = np.array([0, 2, 2, 1], dtype=np.uint8)
+    print(np.take(a, indices))
+
+print('\n2D arrays')
+for dtype in dtypes:
+    a = np.array(range(12), dtype=dtype).reshape((3, 4))
+    print('\na:', a)
+    print('\nfirst axis')
+    print(np.take(a, (0, 2, 2, 1), axis=0))
+    print('\nsecond axis')
+    print(np.take(a, (0, 2, 2, 1), axis=1))
+
diff --git a/tulip/shared/ulab/tests/2d/numpy/vectorize.py b/tulip/shared/ulab/tests/2d/numpy/vectorize.py
new file mode 100644
index 000000000..8cb6e104c
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/vectorize.py
@@ -0,0 +1,18 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+square = np.vectorize(lambda n: n*n)
+
+for dtype in dtypes:
+    a = np.array(range(9), dtype=dtype).reshape((3, 3))
+    print(a)
+    print(square(a))
+    
+    b = a[:,2]
+    print(square(b))
+    print()
diff --git a/tulip/shared/ulab/tests/2d/numpy/where.py b/tulip/shared/ulab/tests/2d/numpy/where.py
new file mode 100644
index 000000000..18bf1ccef
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/where.py
@@ -0,0 +1,18 @@
+from ulab import numpy as np
+
+
+a = np.array(range(8))
+
+print(np.where(a < 4, 1, 0))
+print(np.where(a < 4, 2 * a, 0))
+
+a = np.array(range(12)).reshape((3, 4))
+print(np.where(a < 6, a, -1))
+
+b = np.array(range(4))
+print(np.where(a < 6, 10 + b, -1))
+
+# test upcasting here
+b = np.array(range(4), dtype=np.uint8)
+c = np.array([25, 25, 25, 25], dtype=np.int16)
+print(np.where(a < 6, b, c))
diff --git a/tulip/shared/ulab/tests/2d/numpy/xor.py b/tulip/shared/ulab/tests/2d/numpy/xor.py
new file mode 100644
index 000000000..f571dce3e
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/xor.py
@@ -0,0 +1,21 @@
+try:
+    from ulab import numpy as np
+except ImportError:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
+for dtype_a in dtypes:
+    a = np.array(range(5), dtype=dtype_a)
+    for dtype_b in dtypes:
+        b = np.array(range(250, 255), dtype=dtype_b)
+        try:
+            print('a ^ b: ', a ^ b)
+        except Exception as e:
+            print(e)
+
+    b = np.array([False, True, False, True, False], dtype=np.bool)
+    try:
+        print('a ^ b (bool): ', a ^ b)
+    except Exception as e:
+        print(e)
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/2d/numpy/zeros.py b/tulip/shared/ulab/tests/2d/numpy/zeros.py
new file mode 100644
index 000000000..8e86f9d3f
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/numpy/zeros.py
@@ -0,0 +1,28 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+print(np.zeros(3))
+print(np.zeros((3,3)))
+
+for dtype in dtypes:
+    print(np.zeros((3,3), dtype=dtype))
+    print(np.zeros((4,2), dtype=dtype))
+
+try:
+    np.zeros((1<<31, 1<<31))
+except ValueError:
+    print("ValueError")
+
+try:
+    np.zeros((2147483653, 2147483649))
+except ValueError:
+    print("ValueError")
+
+try:
+    np.zeros((194899806, 189294637612))
+except ValueError:
+    print("ValueError")
diff --git a/tulip/shared/ulab/tests/2d/scipy/cho_solve.py b/tulip/shared/ulab/tests/2d/scipy/cho_solve.py
new file mode 100644
index 000000000..57643c810
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/scipy/cho_solve.py
@@ -0,0 +1,29 @@
+import math
+
+try:
+    from ulab import scipy, numpy as np
+except ImportError:
+    import scipy
+    import numpy as np
+
+## test cholesky solve
+L = np.array([[3, 0, 0, 0], [2, 1, 0, 0], [1, 0, 1, 0], [1, 2, 1, 8]])
+b = np.array([4, 2, 4, 2])
+
+# L needs to be a lower triangular matrix
+result = scipy.linalg.cho_solve(L, b)
+ref_result = np.array([-0.01388888888888906, -0.6458333333333331, 2.677083333333333, -0.01041666666666667])
+
+for i in range(4):
+        print(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
+
+## test cholesky and cho_solve together
+C = np.array([[18, 22,  54,  42], [22, 70,  86,  62], [54, 86, 174, 134], [42, 62, 134, 106]])
+L = np.linalg.cholesky(C)
+
+# L is a lower triangular matrix obtained by performing cholesky of positive-definite linear system
+result = scipy.linalg.cho_solve(L, b)
+ref_result = np.array([6.5625, 1.1875, -2.9375, 0.4375])
+
+for i in range(4):
+        print(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
diff --git a/tulip/shared/ulab/tests/2d/scipy/integrate.py b/tulip/shared/ulab/tests/2d/scipy/integrate.py
new file mode 100644
index 000000000..1d0edb7b4
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/scipy/integrate.py
@@ -0,0 +1,28 @@
+import sys
+from math import *
+
+try:
+    from ulab import scipy
+except ImportError:
+    import scipy
+
+f = lambda x: x * sin(x) * exp(x)
+a=1
+b=2
+
+(res, err) = scipy.integrate.tanhsinh(f, a, b)
+if isclose (res, 7.11263821415851) and isclose (err, 5.438231077315757e-14):
+	print (res, err)
+		
+res = scipy.integrate.romberg(f, a, b)
+if isclose (res, 7.112638214158507):
+	print (res)
+
+res = scipy.integrate.simpson(f, a, b)
+if isclose (res, 7.112638214158494):
+	print (res)
+
+(res, err) = scipy.integrate.quad(f, a, b)
+if isclose (res, 7.112638214158507) and isclose (err, 7.686723611780195e-14):
+		print (res, err)
+
diff --git a/tulip/shared/ulab/tests/2d/scipy/solve_triangular.py b/tulip/shared/ulab/tests/2d/scipy/solve_triangular.py
new file mode 100644
index 000000000..fdb674390
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/scipy/solve_triangular.py
@@ -0,0 +1,22 @@
+import math
+
+try:
+    from ulab import scipy, numpy as np
+except ImportError:
+    import scipy
+    import numpy as np
+
+A = np.array([[3, 0, 2, 6], [2, 1, 0, 1], [1, 0, 1, 4], [1, 2, 1, 8]])
+b = np.array([4, 2, 4, 2])
+
+# forward substitution
+result = scipy.linalg.solve_triangular(A, b, lower=True)
+ref_result = np.array([1.333333333, -0.666666666, 2.666666666, -0.083333333])
+for i in range(4):
+        print(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
+
+# backward substitution
+result = scipy.linalg.solve_triangular(A, b, lower=False)
+ref_result = np.array([-1.166666666, 1.75, 3.0, 0.25])
+for i in range(4):
+        print(math.isclose(result[i], ref_result[i], rel_tol=1E-6, abs_tol=1E-6))
diff --git a/tulip/shared/ulab/tests/2d/scipy/sosfilt.py b/tulip/shared/ulab/tests/2d/scipy/sosfilt.py
new file mode 100644
index 000000000..015d6728b
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/scipy/sosfilt.py
@@ -0,0 +1,13 @@
+try:
+    from ulab import numpy as np
+    from ulab import scipy as spy
+except:
+    import numpy as np
+    import scipy as spy
+
+x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+sos = [[1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6], [1, 2, 3, 1, 5, 6]]
+zi = np.array([[1, 2], [3, 4], [5, 6]],dtype=np.float)
+y, zo = spy.signal.sosfilt(sos, x, zi=zi)
+print('y: ', y)
+print('zo: ', zo)
diff --git a/tulip/shared/ulab/tests/2d/utils/from_buffer.py b/tulip/shared/ulab/tests/2d/utils/from_buffer.py
new file mode 100644
index 000000000..64a989721
--- /dev/null
+++ b/tulip/shared/ulab/tests/2d/utils/from_buffer.py
@@ -0,0 +1,22 @@
+from ulab import numpy as np
+from ulab import utils
+
+a = bytearray([1, 0, 0, 1, 0, 255, 255, 255])
+print(utils.from_uint16_buffer(a))
+a = bytearray([1, 0, 0, 1, 0, 255, 255, 255])
+print(utils.from_int16_buffer(a))
+
+a = bytearray([1, 0, 0, 1, 0, 255, 255, 255])
+print(utils.from_uint32_buffer(a))
+a = bytearray([1, 0, 0, 1, 0, 255, 255, 255])
+print(utils.from_int32_buffer(a))
+
+a = bytearray([1, 0, 0, 1, 0, 0, 255, 255])
+print(utils.from_uint32_buffer(a))
+a = bytearray([1, 0, 0, 1, 0, 0, 255, 255])
+print(utils.from_int32_buffer(a))
+
+a = bytearray([1, 0, 0, 0, 0, 0, 0, 1])
+print(utils.from_uint32_buffer(a))
+a = bytearray([1, 0, 0, 0, 0, 0, 0, 1])
+print(utils.from_int32_buffer(a))
diff --git a/tulip/shared/ulab/tests/3d/complex/complex_exp.py b/tulip/shared/ulab/tests/3d/complex/complex_exp.py
new file mode 100644
index 000000000..ef36e2263
--- /dev/null
+++ b/tulip/shared/ulab/tests/3d/complex/complex_exp.py
@@ -0,0 +1,24 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    b = a.reshape((2, 2))
+    print('\narray:\n', a)
+    print('\nexponential:\n', np.exp(a))
+    print('\narray:\n', b)
+    print('\nexponential:\n', np.exp(b))
+
+a = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+b = np.array([[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], dtype=np.complex)
+c = np.array([[[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], [[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]]], dtype=np.complex)
+
+for m in (a, b, c):
+    print('\n\narray:\n', m)
+    print('\nexponential:\n', np.exp(m))
diff --git a/tulip/shared/ulab/tests/3d/complex/complex_sqrt.py b/tulip/shared/ulab/tests/3d/complex/complex_sqrt.py
new file mode 100644
index 000000000..4bc9def0d
--- /dev/null
+++ b/tulip/shared/ulab/tests/3d/complex/complex_sqrt.py
@@ -0,0 +1,26 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(8), dtype=dtype)
+    b = a.reshape((2, 2, 2))
+    outtype = np.float if dtype is not np.complex else np.complex
+    print('\narray:\n', a)
+    print('\nsquare root:\n', np.sqrt(a, dtype=outtype))
+    print('\narray:\n', b)
+    print('\nsquare root:\n', np.sqrt(b, dtype=outtype))
+
+
+a = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+b = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 4))
+c = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 2, 2))
+
+for m in (a, b, c):
+    print('\n\narray:\n', m)
+    print('\nsquare root:\n', np.sqrt(m, dtype=np.complex))
diff --git a/tulip/shared/ulab/tests/3d/complex/imag_real.py b/tulip/shared/ulab/tests/3d/complex/imag_real.py
new file mode 100644
index 000000000..1e12a8dfe
--- /dev/null
+++ b/tulip/shared/ulab/tests/3d/complex/imag_real.py
@@ -0,0 +1,28 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(8), dtype=dtype)
+    print('\narray:\n', a)
+    print('\nreal part:\n', np.real(a))
+    print('\nimaginary part:\n', np.imag(a))
+    for m in (a.reshape((2, 4)), a.reshape((2, 2, 2))):
+        print('\narray:\n', m)
+        print('\nreal part:\n', np.real(m))
+        print('\nimaginary part:\n', np.imag(m), '\n')
+
+
+a = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+b = np.array([[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], dtype=np.complex)
+c = np.array([[[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]], [[0, 1j, 2+2j, 3-3j], [0, 1j, 2+2j, 3-3j]]], dtype=np.complex)
+
+for m in (a, b, c):
+    print('\n\narray:\n', m)
+    print('\nreal part:\n', np.real(m))
+    print('\nimaginary part:\n', np.imag(m))
diff --git a/tulip/shared/ulab/tests/3d/numpy/create.py b/tulip/shared/ulab/tests/3d/numpy/create.py
new file mode 100644
index 000000000..a5c1fa157
--- /dev/null
+++ b/tulip/shared/ulab/tests/3d/numpy/create.py
@@ -0,0 +1,2 @@
+from ulab import numpy as np
+print(sum(np.ones((3,2,4))))
diff --git a/tulip/shared/ulab/tests/4d/complex/complex_exp.py b/tulip/shared/ulab/tests/4d/complex/complex_exp.py
new file mode 100644
index 000000000..63ed87326
--- /dev/null
+++ b/tulip/shared/ulab/tests/4d/complex/complex_exp.py
@@ -0,0 +1,26 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(4), dtype=dtype)
+    b = a.reshape((2, 2))
+    print('\narray:\n', a)
+    print('\nexponential:\n', np.exp(a))
+    print('\narray:\n', b)
+    print('\nexponential:\n', np.exp(b))
+
+
+a = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+b = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 4))
+c = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 2, 2))
+d = np.array([0, 1j, 2+2j, 3-3j] * 4, dtype=np.complex).reshape((2, 2, 2, 2))
+
+for m in (a, b, c, d):
+    print('\n\narray:\n', m)
+    print('\nexponential:\n', np.exp(m))
diff --git a/tulip/shared/ulab/tests/4d/complex/complex_sqrt.py b/tulip/shared/ulab/tests/4d/complex/complex_sqrt.py
new file mode 100644
index 000000000..052a07d73
--- /dev/null
+++ b/tulip/shared/ulab/tests/4d/complex/complex_sqrt.py
@@ -0,0 +1,27 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(16), dtype=dtype)
+    b = a.reshape((2, 2, 2, 2))
+    outtype = np.float if dtype is not np.complex else np.complex
+    print('\narray:\n', a)
+    print('\nsquare root:\n', np.sqrt(a, dtype=outtype))
+    print('\narray:\n', b)
+    print('\nsquare root:\n', np.sqrt(b, dtype=outtype))
+
+
+a = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+b = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 4))
+c = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 2, 2))
+d = np.array([0, 1j, 2+2j, 3-3j] * 4, dtype=np.complex).reshape((2, 2, 2, 2))
+
+for m in (a, b, c, d):
+    print('\n\narray:\n', m)
+    print('\nsquare root:\n', np.sqrt(m, dtype=np.complex))
diff --git a/tulip/shared/ulab/tests/4d/complex/imag_real.py b/tulip/shared/ulab/tests/4d/complex/imag_real.py
new file mode 100644
index 000000000..63b9da5e2
--- /dev/null
+++ b/tulip/shared/ulab/tests/4d/complex/imag_real.py
@@ -0,0 +1,29 @@
+# this test is meaningful only, when the firmware supports complex arrays
+
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float, np.complex)
+
+for dtype in dtypes:
+    a = np.array(range(16), dtype=dtype)
+    print('\narray:\n', a)
+    print('\nreal part:\n', np.real(a))
+    print('\nimaginary part:\n', np.imag(a))
+    for m in (a.reshape((4, 4)), a.reshape((2, 2, 4)), a.reshape((2, 2, 2, 2))):
+        print('\narray:\n', m)
+        print('\nreal part:\n', np.real(m))
+        print('\nimaginary part:\n', np.imag(m), '\n')
+
+
+a = np.array([0, 1j, 2+2j, 3-3j], dtype=np.complex)
+b = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 4))
+c = np.array([0, 1j, 2+2j, 3-3j] * 2, dtype=np.complex).reshape((2, 2, 2))
+d = np.array([0, 1j, 2+2j, 3-3j] * 4, dtype=np.complex).reshape((2, 2, 2, 2))
+
+for m in (a, b, c, d):
+    print('\n\narray:\n', m)
+    print('\nreal part:\n', np.real(m))
+    print('\nimaginary part:\n', np.imag(m))
\ No newline at end of file
diff --git a/tulip/shared/ulab/tests/4d/numpy/create.py b/tulip/shared/ulab/tests/4d/numpy/create.py
new file mode 100644
index 000000000..64c344c2c
--- /dev/null
+++ b/tulip/shared/ulab/tests/4d/numpy/create.py
@@ -0,0 +1,2 @@
+from ulab import numpy as np
+print(sum(np.ones((3,4,2,5))))
diff --git a/tulip/web/Makefile b/tulip/web/Makefile
index 276b645dd..18e685989 100644
--- a/tulip/web/Makefile
+++ b/tulip/web/Makefile
@@ -18,7 +18,6 @@ VARIANT ?= standard
 VARIANT_DIR ?= variants/$(VARIANT)
 endif
 
-
 ifeq ($(wildcard $(VARIANT_DIR)/.),)
 $(error Invalid VARIANT specified: $(VARIANT_DIR))
 endif
@@ -74,6 +73,7 @@ TERSER ?= npx terser
 
 INC += -I.
 INC += -I../shared/
+INC += -I../shared/ulab/code/
 INC += -I../shared/desktop/
 INC += -I$(TOP)
 INC += -I$(BUILD)
@@ -85,7 +85,7 @@ CFLAGS += -Wall -Werror -Wdouble-promotion -Wfloat-conversion -DSTATIC=static -W
 CFLAGS += -O3 -DNDEBUG
 CWARN += -Wextra -Wno-unused-parameter -Wno-unused-but-set-parameter -Wpointer-arith -Wno-double-promotion -Wfloat-conversion -Wno-missing-declarations  -Wno-unused-but-set-variable -Wno-sign-compare -Wno-gnu-variable-sized-type-not-at-end -Wno-undefined-internal
 CFLAGS += $(INC) $(CWARN)
-CFLAGS += -DTULIP_WEB -DMA_ENABLE_AUDIO_WORKLETS -sUSE_SDL=2 -DAMY_IS_EXTERNAL
+CFLAGS += -DTULIP_WEB -DMA_ENABLE_AUDIO_WORKLETS -sUSE_SDL=2 -DAMY_IS_EXTERNAL -DMODULE_ULAB_ENABLED=1 -DAMY_HAS_AUDIO_IN
 
 EXPORTED_FUNCTIONS_EXTRA += ,\
 	_mp_js_do_exec,\
@@ -168,6 +168,8 @@ SRC_C += \
 
 
 TULIP_EXTMOD_DIR = ../shared
+ULAB_DIR = ../shared/ulab/code
+
 EXTMOD_SRC_C += $(addprefix $(TULIP_EXTMOD_DIR)/, \
 	modtulip.c \
 	polyfills.c \
@@ -188,6 +190,42 @@ EXTMOD_SRC_C += $(addprefix $(TULIP_EXTMOD_DIR)/, \
 	desktop/unix_display.c \
 	)
 
+EXTMOD_SRC_C += $(addprefix $(ULAB_DIR)/, \
+	scipy/integrate/integrate.c \
+	scipy/linalg/linalg.c \
+	scipy/optimize/optimize.c \
+	scipy/signal/signal.c \
+	scipy/special/special.c \
+	ndarray_operators.c \
+	ulab_tools.c \
+	ndarray.c \
+	numpy/ndarray/ndarray_iter.c \
+	ndarray_properties.c \
+	numpy/approx.c \
+	numpy/bitwise.c \
+	numpy/compare.c \
+	numpy/carray/carray.c \
+	numpy/carray/carray_tools.c \
+	numpy/create.c \
+	numpy/fft/fft.c \
+	numpy/fft/fft_tools.c \
+	numpy/filter.c \
+	numpy/io/io.c \
+	numpy/linalg/linalg.c \
+	numpy/linalg/linalg_tools.c \
+	numpy/numerical.c \
+	numpy/poly.c \
+	numpy/random/random.c \
+	numpy/stats.c \
+	numpy/transform.c \
+	numpy/vector.c \
+	numpy/numpy.c \
+	scipy/scipy.c \
+	user/user.c \
+	utils/utils.c \
+	ulab.c \
+	)
+
 # List of sources for qstr extraction.
 SRC_QSTR += $(SRC_C) $(SRC_SHARED) $(EXTMOD_SRC_C)
 
diff --git a/tulip/web/build.sh b/tulip/web/build.sh
index 7dcc7654b..04a481b67 100755
--- a/tulip/web/build.sh
+++ b/tulip/web/build.sh
@@ -6,11 +6,11 @@ set -e
 source ../shared/grab_submodules.sh
 
 cd ../../amy
-make docs/amy.js
+make docs/amy-audioin.js
 cd ../tulip/web
-cp ../../amy/docs/amy.js ../../www/run/
-cp ../../amy/docs/amy.wasm ../../www/run/
-cp ../../amy/docs/amy.aw.js ../../www/run/
+cp ../../amy/docs/amy-audioin.js ../../www/run/
+cp ../../amy/docs/amy-audioin.wasm ../../www/run/
+cp ../../amy/docs/amy-audioin.aw.js ../../www/run/
 
 make
 cp build-standard/tulip/obj/micropython.wasm ../../www/run/
diff --git a/tulip/web/variants/manifest.py b/tulip/web/variants/manifest.py
index cc84dd1e2..1bc2cd7d2 100644
--- a/tulip/web/variants/manifest.py
+++ b/tulip/web/variants/manifest.py
@@ -5,7 +5,9 @@
 freeze("../../shared/py")
 freeze("../../../amy", "amy.py")
 freeze("../../../amy", "juno.py")
-
+freeze("../../../amy", "amy_wave.py")
+freeze("../../../amy/experiments", "tulip_piano.py")
+freeze("../../../amy/experiments", "piano_params.py")
 
 package(
     "asyncio",
diff --git a/www/run/amy-audioin.aw.js b/www/run/amy-audioin.aw.js
new file mode 100644
index 000000000..818a1fbc4
--- /dev/null
+++ b/www/run/amy-audioin.aw.js
@@ -0,0 +1 @@
+function createWasmAudioWorkletProcessor(audioParams){class WasmAudioWorkletProcessor extends AudioWorkletProcessor{constructor(args){super();globalThis.stackAlloc=Module["stackAlloc"];globalThis.stackSave=Module["stackSave"];globalThis.stackRestore=Module["stackRestore"];globalThis.HEAPU32=Module["HEAPU32"];globalThis.HEAPF32=Module["HEAPF32"];let opts=args.processorOptions;this.callbackFunction=Module["wasmTable"].get(opts["cb"]);this.userData=opts["ud"]}static get parameterDescriptors(){return audioParams}process(inputList,outputList,parameters){let numInputs=inputList.length,numOutputs=outputList.length,numParams=0,i,j,k,dataPtr,stackMemoryNeeded=(numInputs+numOutputs)*8,oldStackPtr=stackSave(),inputsPtr,outputsPtr,outputDataPtr,paramsPtr,didProduceAudio,paramArray;for(i of inputList)stackMemoryNeeded+=i.length*512;for(i of outputList)stackMemoryNeeded+=i.length*512;for(i in parameters)stackMemoryNeeded+=parameters[i].byteLength+8,++numParams;inputsPtr=stackAlloc(stackMemoryNeeded);k=inputsPtr>>2;dataPtr=inputsPtr+numInputs*8;for(i of inputList){HEAPU32[k++]=i.length;HEAPU32[k++]=dataPtr;for(j of i){HEAPF32.set(j,dataPtr>>2);dataPtr+=512}}outputsPtr=dataPtr;k=outputsPtr>>2;outputDataPtr=(dataPtr+=numOutputs*8)>>2;for(i of outputList){HEAPU32[k++]=i.length;HEAPU32[k++]=dataPtr;dataPtr+=512*i.length}paramsPtr=dataPtr;k=paramsPtr>>2;dataPtr+=numParams*8;for(i=0;paramArray=parameters[i++];){HEAPU32[k++]=paramArray.length;HEAPU32[k++]=dataPtr;HEAPF32.set(paramArray,dataPtr>>2);dataPtr+=paramArray.length*4}if(didProduceAudio=this.callbackFunction(numInputs,inputsPtr,numOutputs,outputsPtr,numParams,paramsPtr,this.userData)){for(i of outputList){for(j of i){for(k=0;k<128;++k){j[k]=HEAPF32[outputDataPtr++]}}}}stackRestore(oldStackPtr);return!!didProduceAudio}}return WasmAudioWorkletProcessor}class BootstrapMessages extends AudioWorkletProcessor{constructor(arg){super();globalThis.Module=arg["processorOptions"];globalThis.Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasm"],info);receiveInstance(instance,Module["wasm"]);return instance.exports};let p=globalThis["messagePort"]=this.port;p.onmessage=msg=>{let d=msg.data;if(d["_wpn"]){if(globalThis.AudioWorkletModule){AudioWorkletModule(Module);delete globalThis.AudioWorkletModule}registerProcessor(d["_wpn"],createWasmAudioWorkletProcessor(d["audioParams"]));p.postMessage({"_wsc":d["callback"],"x":[d["contextHandle"],1,d["userData"]]})}else if(d["_wsc"]){Module["wasmTable"].get(d["_wsc"])(...d["x"])}}}process(){}}registerProcessor("message",BootstrapMessages);
diff --git a/www/run/amy-audioin.js b/www/run/amy-audioin.js
new file mode 100644
index 000000000..b046c4d89
--- /dev/null
+++ b/www/run/amy-audioin.js
@@ -0,0 +1,19 @@
+
+var amyModule = (() => {
+  var _scriptDir = typeof document !== 'undefined' && document.currentScript ? document.currentScript.src : undefined;
+  if (typeof __filename !== 'undefined') _scriptDir = _scriptDir || __filename;
+  return (
+function(moduleArg = {}) {
+
+function GROWABLE_HEAP_I8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP8}function GROWABLE_HEAP_U8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU8}function GROWABLE_HEAP_I16(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP16}function GROWABLE_HEAP_I32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP32}function GROWABLE_HEAP_U32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU32}function GROWABLE_HEAP_F32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF32}function GROWABLE_HEAP_F64(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF64}var Module=moduleArg;var readyPromiseResolve,readyPromiseReject;Module["ready"]=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject});["_amy_play_message","_amy_reset_sysclock","_amy_live_start","_amy_start","_sequencer_ticks","_malloc","_free","_emscripten_wasm_worker_initialize","___set_thread_state","___indirect_function_table","_ma_device__on_notification_unlocked","_ma_malloc_emscripten","_ma_free_emscripten","_ma_device_process_pcm_frames_capture__webaudio","_ma_device_process_pcm_frames_playback__webaudio","_fflush","___start_em_asm","___stop_em_asm","onRuntimeInitialized"].forEach(prop=>{if(!Object.getOwnPropertyDescriptor(Module["ready"],prop)){Object.defineProperty(Module["ready"],prop,{get:()=>abort("You are getting "+prop+" on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js"),set:()=>abort("You are setting "+prop+" on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js")})}});var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var ENVIRONMENT_IS_AUDIO_WORKLET=typeof AudioWorkletGlobalScope!=="undefined";var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var ENVIRONMENT_IS_SHELL=!ENVIRONMENT_IS_WEB&&!ENVIRONMENT_IS_NODE&&!ENVIRONMENT_IS_WORKER&&!ENVIRONMENT_IS_AUDIO_WORKLET;if(Module["ENVIRONMENT"]){throw new Error("Module.ENVIRONMENT has been deprecated. To force the environment, use the ENVIRONMENT compile-time option (for example, -sENVIRONMENT=web or -sENVIRONMENT=node)")}var ENVIRONMENT_IS_WASM_WORKER=Module["$ww"];var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var read_,readAsync,readBinary;if(ENVIRONMENT_IS_NODE){if(typeof process=="undefined"||!process.release||process.release.name!=="node")throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");var nodeVersion=process.versions.node;var numericVersion=nodeVersion.split(".").slice(0,3);numericVersion=numericVersion[0]*1e4+numericVersion[1]*100+numericVersion[2].split("-")[0]*1;if(numericVersion<16e4){throw new Error("This emscripten-generated code requires node v16.0.0 (detected v"+nodeVersion+")")}var fs=require("fs");var nodePath=require("path");if(ENVIRONMENT_IS_WORKER){scriptDirectory=nodePath.dirname(scriptDirectory)+"/"}else{scriptDirectory=__dirname+"/"}read_=(filename,binary)=>{filename=isFileURI(filename)?new URL(filename):nodePath.normalize(filename);return fs.readFileSync(filename,binary?undefined:"utf8")};readBinary=filename=>{var ret=read_(filename,true);if(!ret.buffer){ret=new Uint8Array(ret)}assert(ret.buffer);return ret};readAsync=(filename,onload,onerror,binary=true)=>{filename=isFileURI(filename)?new URL(filename):nodePath.normalize(filename);fs.readFile(filename,binary?undefined:"utf8",(err,data)=>{if(err)onerror(err);else onload(binary?data.buffer:data)})};if(!Module["thisProgram"]&&process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow};Module["inspect"]=()=>"[Emscripten Module object]";let nodeWorkerThreads;try{nodeWorkerThreads=require("worker_threads")}catch(e){console.error('The "worker_threads" module is not supported in this node.js build - perhaps a newer version is needed?');throw e}global.Worker=nodeWorkerThreads.Worker}else if(ENVIRONMENT_IS_SHELL){if(typeof process=="object"&&typeof require==="function"||typeof window=="object"||typeof importScripts=="function")throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");if(typeof read!="undefined"){read_=read}readBinary=f=>{if(typeof readbuffer=="function"){return new Uint8Array(readbuffer(f))}let data=read(f,"binary");assert(typeof data=="object");return data};readAsync=(f,onload,onerror)=>{setTimeout(()=>onload(readBinary(f)))};if(typeof clearTimeout=="undefined"){globalThis.clearTimeout=id=>{}}if(typeof setTimeout=="undefined"){globalThis.setTimeout=f=>typeof f=="function"?f():abort()}if(typeof scriptArgs!="undefined"){arguments_=scriptArgs}else if(typeof arguments!="undefined"){arguments_=arguments}if(typeof quit=="function"){quit_=(status,toThrow)=>{setTimeout(()=>{if(!(toThrow instanceof ExitStatus)){let toLog=toThrow;if(toThrow&&typeof toThrow=="object"&&toThrow.stack){toLog=[toThrow,toThrow.stack]}err(`exiting due to exception: ${toLog}`)}quit(status)});throw toThrow}}if(typeof print!="undefined"){if(typeof console=="undefined")console={};console.log=print;console.warn=console.error=typeof printErr!="undefined"?printErr:print}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(_scriptDir){scriptDirectory=_scriptDir}if(scriptDirectory.indexOf("blob:")!==0){scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}else{scriptDirectory=""}if(!(typeof window=="object"||typeof importScripts=="function"))throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");{read_=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.send(null);return xhr.responseText};if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=(url,onload,onerror)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){onload(xhr.response);return}onerror()};xhr.onerror=onerror;xhr.send(null)}}}else if(!ENVIRONMENT_IS_AUDIO_WORKLET){throw new Error("environment detection error")}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.error.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;checkIncomingModuleAPI();if(Module["arguments"])arguments_=Module["arguments"];legacyModuleProp("arguments","arguments_");if(Module["thisProgram"])thisProgram=Module["thisProgram"];legacyModuleProp("thisProgram","thisProgram");if(Module["quit"])quit_=Module["quit"];legacyModuleProp("quit","quit_");assert(typeof Module["memoryInitializerPrefixURL"]=="undefined","Module.memoryInitializerPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["pthreadMainPrefixURL"]=="undefined","Module.pthreadMainPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["cdInitializerPrefixURL"]=="undefined","Module.cdInitializerPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["filePackagePrefixURL"]=="undefined","Module.filePackagePrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["read"]=="undefined","Module.read option was removed (modify read_ in JS)");assert(typeof Module["readAsync"]=="undefined","Module.readAsync option was removed (modify readAsync in JS)");assert(typeof Module["readBinary"]=="undefined","Module.readBinary option was removed (modify readBinary in JS)");assert(typeof Module["setWindowTitle"]=="undefined","Module.setWindowTitle option was removed (modify emscripten_set_window_title in JS)");assert(typeof Module["TOTAL_MEMORY"]=="undefined","Module.TOTAL_MEMORY has been renamed Module.INITIAL_MEMORY");legacyModuleProp("asm","wasmExports");legacyModuleProp("read","read_");legacyModuleProp("readAsync","readAsync");legacyModuleProp("readBinary","readBinary");legacyModuleProp("setWindowTitle","setWindowTitle");assert(!ENVIRONMENT_IS_SHELL,"shell environment detected but not enabled at build time.  Add 'shell' to `-sENVIRONMENT` to enable.");var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];legacyModuleProp("wasmBinary","wasmBinary");if(typeof WebAssembly!="object"){abort("no native wasm support detected")}var wasmMemory;var wasmModule;var ABORT=false;var EXITSTATUS;function assert(condition,text){if(!condition){abort("Assertion failed"+(text?": "+text:""))}}var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);Module["HEAP16"]=HEAP16=new Int16Array(b);Module["HEAPU8"]=HEAPU8=new Uint8Array(b);Module["HEAPU16"]=HEAPU16=new Uint16Array(b);Module["HEAP32"]=HEAP32=new Int32Array(b);Module["HEAPU32"]=HEAPU32=new Uint32Array(b);Module["HEAPF32"]=HEAPF32=new Float32Array(b);Module["HEAPF64"]=HEAPF64=new Float64Array(b)}assert(!Module["STACK_SIZE"],"STACK_SIZE can no longer be set at runtime.  Use -sSTACK_SIZE at link time");assert(typeof Int32Array!="undefined"&&typeof Float64Array!=="undefined"&&Int32Array.prototype.subarray!=undefined&&Int32Array.prototype.set!=undefined,"JS engine does not provide full typed array support");var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||268435456;legacyModuleProp("INITIAL_MEMORY","INITIAL_MEMORY");assert(INITIAL_MEMORY>=134217728,"INITIAL_MEMORY should be larger than STACK_SIZE, was "+INITIAL_MEMORY+"! (STACK_SIZE="+134217728+")");if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{wasmMemory=new WebAssembly.Memory({"initial":INITIAL_MEMORY/65536,"maximum":2147483648/65536,"shared":true});if(!(wasmMemory.buffer instanceof SharedArrayBuffer)){err("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag");if(ENVIRONMENT_IS_NODE){err("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)")}throw Error("bad memory")}}updateMemoryViews();INITIAL_MEMORY=wasmMemory.buffer.byteLength;assert(INITIAL_MEMORY%65536===0);function writeStackCookie(){var max=_emscripten_stack_get_end();assert((max&3)==0);if(max==0){max+=4}GROWABLE_HEAP_U32()[max>>2]=34821223;GROWABLE_HEAP_U32()[max+4>>2]=2310721022;GROWABLE_HEAP_U32()[0>>2]=1668509029}function checkStackCookie(){if(ABORT)return;var max=_emscripten_stack_get_end();if(max==0){max+=4}var cookie1=GROWABLE_HEAP_U32()[max>>2];var cookie2=GROWABLE_HEAP_U32()[max+4>>2];if(cookie1!=34821223||cookie2!=2310721022){abort(`Stack overflow! Stack cookie has been overwritten at ${ptrToString(max)}, expected hex dwords 0x89BACDFE and 0x2135467, but received ${ptrToString(cookie2)} ${ptrToString(cookie1)}`)}if(GROWABLE_HEAP_U32()[0>>2]!=1668509029){abort("Runtime error: The application has corrupted its heap memory area (address zero)!")}}(function(){var h16=new Int16Array(1);var h8=new Int8Array(h16.buffer);h16[0]=25459;if(h8[0]!==115||h8[1]!==99)throw"Runtime error: expected the system to be little-endian! (Run with -sSUPPORT_BIG_ENDIAN to bypass)"})();var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){assert(!runtimeInitialized);runtimeInitialized=true;if(ENVIRONMENT_IS_WASM_WORKER)return _wasmWorkerInitializeRuntime();checkStackCookie();callRuntimeCallbacks(__ATINIT__)}function postRun(){checkStackCookie();if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}assert(Math.imul,"This browser does not support Math.imul(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.fround,"This browser does not support Math.fround(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.clz32,"This browser does not support Math.clz32(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.trunc,"This browser does not support Math.trunc(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;var runDependencyTracking={};function addRunDependency(id){runDependencies++;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(id){assert(!runDependencyTracking[id]);runDependencyTracking[id]=1;if(runDependencyWatcher===null&&typeof setInterval!="undefined"){runDependencyWatcher=setInterval(()=>{if(ABORT){clearInterval(runDependencyWatcher);runDependencyWatcher=null;return}var shown=false;for(var dep in runDependencyTracking){if(!shown){shown=true;err("still waiting on run dependencies:")}err(`dependency: ${dep}`)}if(shown){err("(end of list)")}},1e4)}}else{err("warning: run dependency added without ID")}}function removeRunDependency(id){runDependencies--;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(id){assert(runDependencyTracking[id]);delete runDependencyTracking[id]}else{err("warning: run dependency removed without ID")}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){if(Module["onAbort"]){Module["onAbort"](what)}what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;if(what.indexOf("RuntimeError: unreachable")>=0){what+='. "unreachable" may be due to ASYNCIFY_STACK_SIZE not being large enough (try increasing it)'}var e=new WebAssembly.RuntimeError(what);readyPromiseReject(e);throw e}var FS={error(){abort("Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with -sFORCE_FILESYSTEM")},init(){FS.error()},createDataFile(){FS.error()},createPreloadedFile(){FS.error()},createLazyFile(){FS.error()},open(){FS.error()},mkdev(){FS.error()},registerDevice(){FS.error()},analyzePath(){FS.error()},ErrnoError(){FS.error()}};Module["FS_createDataFile"]=FS.createDataFile;Module["FS_createPreloadedFile"]=FS.createPreloadedFile;var dataURIPrefix="data:application/octet-stream;base64,";var isDataURI=filename=>filename.startsWith(dataURIPrefix);var isFileURI=filename=>filename.startsWith("file://");function createExportWrapper(name){return function(){assert(runtimeInitialized,`native function \`${name}\` called before runtime initialization`);var f=wasmExports[name];assert(f,`exported native function \`${name}\` not found`);return f.apply(null,arguments)}}var wasmBinaryFile;wasmBinaryFile="amy-audioin.wasm";if(!isDataURI(wasmBinaryFile)){wasmBinaryFile=locateFile(wasmBinaryFile)}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}function getBinaryPromise(binaryFile){if(!wasmBinary&&(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER)){if(typeof fetch=="function"&&!isFileURI(binaryFile)){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{if(!response["ok"]){throw"failed to load wasm binary file at '"+binaryFile+"'"}return response["arrayBuffer"]()}).catch(()=>getBinarySync(binaryFile))}else if(readAsync){return new Promise((resolve,reject)=>{readAsync(binaryFile,response=>resolve(new Uint8Array(response)),reject)})}}return Promise.resolve().then(()=>getBinarySync(binaryFile))}function instantiateArrayBuffer(binaryFile,imports,receiver){return getBinaryPromise(binaryFile).then(binary=>WebAssembly.instantiate(binary,imports)).then(instance=>instance).then(receiver,reason=>{err(`failed to asynchronously prepare wasm: ${reason}`);if(isFileURI(wasmBinaryFile)){err(`warning: Loading from a file URI (${wasmBinaryFile}) is not supported in most browsers. See https://emscripten.org/docs/getting_started/FAQ.html#how-do-i-run-a-local-webserver-for-testing-why-does-my-program-stall-in-downloading-or-preparing`)}abort(reason)})}function instantiateAsync(binary,binaryFile,imports,callback){if(!binary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(binaryFile)&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE&&typeof fetch=="function"){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{var result=WebAssembly.instantiateStreaming(response,imports);return result.then(callback,function(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(binaryFile,imports,callback)})})}return instantiateArrayBuffer(binaryFile,imports,callback)}function createWasm(){var info={"env":wasmImports,"wasi_snapshot_preview1":wasmImports};function receiveInstance(instance,module){wasmExports=instance.exports;wasmExports=Asyncify.instrumentWasmExports(wasmExports);wasmTable=wasmExports["__indirect_function_table"];assert(wasmTable,"table not found in wasm exports");Module["wasmTable"]=wasmTable;addOnInit(wasmExports["__wasm_call_ctors"]);wasmModule=module;removeRunDependency("wasm-instantiate");return wasmExports}addRunDependency("wasm-instantiate");var trueModule=Module;function receiveInstantiationResult(result){assert(Module===trueModule,"the Module object should not be replaced during async compilation - perhaps the order of HTML elements is wrong?");trueModule=null;receiveInstance(result["instance"],result["module"])}if(Module["instantiateWasm"]){try{return Module["instantiateWasm"](info,receiveInstance)}catch(e){err(`Module.instantiateWasm callback failed with error: ${e}`);readyPromiseReject(e)}}instantiateAsync(wasmBinary,wasmBinaryFile,info,receiveInstantiationResult).catch(readyPromiseReject);return{}}function legacyModuleProp(prop,newName,incomming=true){if(!Object.getOwnPropertyDescriptor(Module,prop)){Object.defineProperty(Module,prop,{configurable:true,get(){let extra=incomming?" (the initial value can be provided on Module, but after startup the value is only looked for on a local variable of that name)":"";abort(`\`Module.${prop}\` has been replaced by \`${newName}\``+extra)}})}}function ignoredModuleProp(prop){if(Object.getOwnPropertyDescriptor(Module,prop)){abort(`\`Module.${prop}\` was supplied but \`${prop}\` not included in INCOMING_MODULE_JS_API`)}}function isExportedByForceFilesystem(name){return name==="FS_createPath"||name==="FS_createDataFile"||name==="FS_createPreloadedFile"||name==="FS_unlink"||name==="addRunDependency"||name==="FS_createLazyFile"||name==="FS_createDevice"||name==="removeRunDependency"}function missingGlobal(sym,msg){if(typeof globalThis!=="undefined"){Object.defineProperty(globalThis,sym,{configurable:true,get(){warnOnce(`\`${sym}\` is not longer defined by emscripten. ${msg}`);return undefined}})}}missingGlobal("buffer","Please use HEAP8.buffer or wasmMemory.buffer");missingGlobal("asm","Please use wasmExports instead");function missingLibrarySymbol(sym){if(typeof globalThis!=="undefined"&&!Object.getOwnPropertyDescriptor(globalThis,sym)){Object.defineProperty(globalThis,sym,{configurable:true,get(){var msg=`\`${sym}\` is a library symbol and not included by default; add it to your library.js __deps or to DEFAULT_LIBRARY_FUNCS_TO_INCLUDE on the command line`;var librarySymbol=sym;if(!librarySymbol.startsWith("_")){librarySymbol="$"+sym}msg+=` (e.g. -sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE='${librarySymbol}')`;if(isExportedByForceFilesystem(sym)){msg+=". Alternatively, forcing filesystem support (-sFORCE_FILESYSTEM) can export this for you"}warnOnce(msg);return undefined}})}unexportedRuntimeSymbol(sym)}function unexportedRuntimeSymbol(sym){if(!Object.getOwnPropertyDescriptor(Module,sym)){Object.defineProperty(Module,sym,{configurable:true,get(){var msg=`'${sym}' was not exported. add it to EXPORTED_RUNTIME_METHODS (see the Emscripten FAQ)`;if(isExportedByForceFilesystem(sym)){msg+=". Alternatively, forcing filesystem support (-sFORCE_FILESYSTEM) can export this for you"}abort(msg)}})}}var ASM_CONSTS={1110016:$0=>{amy_sequencer_js_hook($0)},1110047:($0,$1,$2,$3,$4)=>{if(typeof window==="undefined"||(window.AudioContext||window.webkitAudioContext)===undefined){return 0}if(typeof window.miniaudio==="undefined"){window.miniaudio={referenceCount:0};window.miniaudio.device_type={};window.miniaudio.device_type.playback=$0;window.miniaudio.device_type.capture=$1;window.miniaudio.device_type.duplex=$2;window.miniaudio.device_state={};window.miniaudio.device_state.stopped=$3;window.miniaudio.device_state.started=$4;miniaudio.devices=[];miniaudio.track_device=function(device){for(var iDevice=0;iDevice<miniaudio.devices.length;++iDevice){if(miniaudio.devices[iDevice]==null){miniaudio.devices[iDevice]=device;return iDevice}}miniaudio.devices.push(device);return miniaudio.devices.length-1};miniaudio.untrack_device_by_index=function(deviceIndex){miniaudio.devices[deviceIndex]=null;while(miniaudio.devices.length>0){if(miniaudio.devices[miniaudio.devices.length-1]==null){miniaudio.devices.pop()}else{break}}};miniaudio.untrack_device=function(device){for(var iDevice=0;iDevice<miniaudio.devices.length;++iDevice){if(miniaudio.devices[iDevice]==device){return miniaudio.untrack_device_by_index(iDevice)}}};miniaudio.get_device_by_index=function(deviceIndex){return miniaudio.devices[deviceIndex]};miniaudio.unlock_event_types=function(){return["touchend","click"]}();miniaudio.unlock=function(){for(var i=0;i<miniaudio.devices.length;++i){var device=miniaudio.devices[i];if(device!=null&&device.webaudio!=null&&device.state===window.miniaudio.device_state.started){device.webaudio.resume().then(()=>{Module._ma_device__on_notification_unlocked(device.pDevice)},error=>{console.error("Failed to resume audiocontext",error)})}}miniaudio.unlock_event_types.map(function(event_type){document.removeEventListener(event_type,miniaudio.unlock,true)})};miniaudio.unlock_event_types.map(function(event_type){document.addEventListener(event_type,miniaudio.unlock,true)})}window.miniaudio.referenceCount+=1;return 1},1112205:()=>{if(typeof window.miniaudio!=="undefined"){window.miniaudio.referenceCount-=1;if(window.miniaudio.referenceCount===0){delete window.miniaudio}}},1112369:()=>navigator.mediaDevices!==undefined&&navigator.mediaDevices.getUserMedia!==undefined,1112473:()=>{try{var temp=new(window.AudioContext||window.webkitAudioContext);var sampleRate=temp.sampleRate;temp.close();return sampleRate}catch(e){return 0}},1112644:$0=>miniaudio.track_device({webaudio:emscriptenGetAudioObject($0),state:1}),1112733:($0,$1)=>{var getUserMediaResult=0;var audioWorklet=emscriptenGetAudioObject($0);var audioContext=emscriptenGetAudioObject($1);navigator.mediaDevices.getUserMedia({audio:true,video:false}).then(function(stream){audioContext.streamNode=audioContext.createMediaStreamSource(stream);audioContext.streamNode.connect(audioWorklet);audioWorklet.connect(audioContext.destination);getUserMediaResult=0}).catch(function(error){console.log("navigator.mediaDevices.getUserMedia Failed: "+error);getUserMediaResult=-1});return getUserMediaResult},1113295:($0,$1)=>{var audioWorklet=emscriptenGetAudioObject($0);var audioContext=emscriptenGetAudioObject($1);audioWorklet.connect(audioContext.destination);return 0},1113455:$0=>emscriptenGetAudioObject($0).sampleRate,1113507:$0=>{var device=miniaudio.get_device_by_index($0);if(device.streamNode!==undefined){device.streamNode.disconnect();device.streamNode=undefined}},1113663:$0=>{miniaudio.untrack_device_by_index($0)},1113706:$0=>{var device=miniaudio.get_device_by_index($0);device.webaudio.resume();device.state=miniaudio.device_state.started},1113831:$0=>{var device=miniaudio.get_device_by_index($0);device.webaudio.suspend();device.state=miniaudio.device_state.stopped}};function ExitStatus(status){this.name="ExitStatus";this.message=`Program terminated with exit(${status})`;this.status=status}var _wasmWorkerDelayedMessageQueue=[];var wasmTableMirror=[];var wasmTable;var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}assert(wasmTable.get(funcPtr)==func,"JavaScript-side Wasm function table mirror is out of date!");return func};var _wasmWorkerRunPostMessage=e=>{let data=ENVIRONMENT_IS_NODE?e:e.data;let wasmCall=data["_wsc"];wasmCall&&getWasmTableEntry(wasmCall)(...data["x"])};var _wasmWorkerAppendToQueue=e=>{_wasmWorkerDelayedMessageQueue.push(e)};var _wasmWorkerInitializeRuntime=()=>{let m=Module;assert(m["sb"]%16==0);assert(m["sz"]%16==0);_emscripten_wasm_worker_initialize(m["sb"],m["sz"]);if(typeof AudioWorkletGlobalScope==="undefined"){removeEventListener("message",_wasmWorkerAppendToQueue);_wasmWorkerDelayedMessageQueue=_wasmWorkerDelayedMessageQueue.forEach(_wasmWorkerRunPostMessage);addEventListener("message",_wasmWorkerRunPostMessage)}};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var noExitRuntime=Module["noExitRuntime"]||true;var ptrToString=ptr=>{assert(typeof ptr==="number");ptr>>>=0;return"0x"+ptr.toString(16).padStart(8,"0")};var warnOnce=text=>{if(!warnOnce.shown)warnOnce.shown={};if(!warnOnce.shown[text]){warnOnce.shown[text]=1;if(ENVIRONMENT_IS_NODE)text="warning: "+text;err(text)}};var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):undefined;var UTF8ArrayToString=(heapOrArray,idx,maxBytesToRead)=>{var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof SharedArrayBuffer?heapOrArray.slice(idx,endPtr):heapOrArray.subarray(idx,endPtr))}var str="";while(idx<endPtr){var u0=heapOrArray[idx++];if(!(u0&128)){str+=String.fromCharCode(u0);continue}var u1=heapOrArray[idx++]&63;if((u0&224)==192){str+=String.fromCharCode((u0&31)<<6|u1);continue}var u2=heapOrArray[idx++]&63;if((u0&240)==224){u0=(u0&15)<<12|u1<<6|u2}else{if((u0&248)!=240)warnOnce("Invalid UTF-8 leading byte "+ptrToString(u0)+" encountered when deserializing a UTF-8 string in wasm memory to a JS string!");u0=(u0&7)<<18|u1<<12|u2<<6|heapOrArray[idx++]&63}if(u0<65536){str+=String.fromCharCode(u0)}else{var ch=u0-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead)=>{assert(typeof ptr=="number",`UTF8ToString expects a number (got ${typeof ptr})`);return ptr?UTF8ArrayToString(GROWABLE_HEAP_U8(),ptr,maxBytesToRead):""};var ___assert_fail=(condition,filename,line,func)=>{abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])};var nowIsMonotonic=1;var __emscripten_get_now_is_monotonic=()=>nowIsMonotonic;var _abort=()=>{abort("native code called abort()")};var readEmAsmArgsArray=[];var readEmAsmArgs=(sigPtr,buf)=>{assert(Array.isArray(readEmAsmArgsArray));assert(buf%16==0);readEmAsmArgsArray.length=0;var ch;while(ch=GROWABLE_HEAP_U8()[sigPtr++]){var chr=String.fromCharCode(ch);var validChars=["d","f","i","p"];assert(validChars.includes(chr),`Invalid character ${ch}("${chr}") in readEmAsmArgs! Use only [${validChars}], and do not specify "v" for void return argument.`);var wide=ch!=105;wide&=ch!=112;buf+=wide&&buf%8?4:0;readEmAsmArgsArray.push(ch==112?GROWABLE_HEAP_U32()[buf>>2]:ch==105?GROWABLE_HEAP_I32()[buf>>2]:GROWABLE_HEAP_F64()[buf>>3]);buf+=wide?8:4}return readEmAsmArgsArray};var runEmAsmFunction=(code,sigPtr,argbuf)=>{var args=readEmAsmArgs(sigPtr,argbuf);assert(ASM_CONSTS.hasOwnProperty(code),`No EM_ASM constant found at address ${code}.  The loaded WebAssembly file is likely out of sync with the generated JavaScript.`);return ASM_CONSTS[code].apply(null,args)};var _emscripten_asm_const_int=(code,sigPtr,argbuf)=>runEmAsmFunction(code,sigPtr,argbuf);var EmAudio={};var EmAudioCounter=0;var emscriptenRegisterAudioObject=object=>{assert(object,"Called emscriptenRegisterAudioObject() with a null object handle!");EmAudio[++EmAudioCounter]=object;return EmAudioCounter};var emscriptenGetAudioObject=objectHandle=>EmAudio[objectHandle];var _emscripten_create_audio_context=options=>{let ctx=window.AudioContext||window.webkitAudioContext;if(!ctx)console.error("emscripten_create_audio_context failed! Web Audio is not supported.");options>>=2;let opts=options?{latencyHint:GROWABLE_HEAP_U32()[options]?UTF8ToString(GROWABLE_HEAP_U32()[options]):void 0,sampleRate:GROWABLE_HEAP_I32()[options+1]||void 0}:void 0;return ctx&&emscriptenRegisterAudioObject(new ctx(opts))};var _emscripten_create_wasm_audio_worklet_node=(contextHandle,name,options,callback,userData)=>{assert(contextHandle,`Called emscripten_create_wasm_audio_worklet_node() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_create_wasm_audio_worklet_node() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_create_wasm_audio_worklet_node() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);options>>=2;function readChannelCountArray(heapIndex,numOutputs){let channelCounts=[];while(numOutputs--)channelCounts.push(GROWABLE_HEAP_U32()[heapIndex++]);return channelCounts}let opts=options?{numberOfInputs:GROWABLE_HEAP_I32()[options],numberOfOutputs:GROWABLE_HEAP_I32()[options+1],outputChannelCount:GROWABLE_HEAP_U32()[options+2]?readChannelCountArray(GROWABLE_HEAP_U32()[options+2]>>2,GROWABLE_HEAP_I32()[options+1]):void 0,processorOptions:{"cb":callback,"ud":userData}}:void 0;return emscriptenRegisterAudioObject(new AudioWorkletNode(EmAudio[contextHandle],UTF8ToString(name),opts))};var _emscripten_create_wasm_audio_worklet_processor_async=(contextHandle,options,callback,userData)=>{assert(contextHandle,`Called emscripten_create_wasm_audio_worklet_processor_async() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_create_wasm_audio_worklet_processor_async() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_create_wasm_audio_worklet_processor_async() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);options>>=2;let audioParams=[],numAudioParams=GROWABLE_HEAP_U32()[options+1],audioParamDescriptors=GROWABLE_HEAP_U32()[options+2]>>2,i=0;while(numAudioParams--){audioParams.push({name:i++,defaultValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],minValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],maxValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],automationRate:["a","k"][GROWABLE_HEAP_U32()[audioParamDescriptors++]]+"-rate"})}EmAudio[contextHandle].audioWorklet.bootstrapMessage.port.postMessage({_wpn:UTF8ToString(GROWABLE_HEAP_U32()[options]),audioParams:audioParams,contextHandle:contextHandle,callback:callback,userData:userData})};var _emscripten_date_now=()=>Date.now();var _emscripten_destroy_audio_context=contextHandle=>{assert(EmAudio[contextHandle],`Called emscripten_destroy_audio_context() on an already freed context handle ${contextHandle}`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_destroy_audio_context() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);EmAudio[contextHandle].suspend();delete EmAudio[contextHandle]};var _emscripten_destroy_web_audio_node=objectHandle=>{assert(EmAudio[objectHandle],`Called emscripten_destroy_web_audio_node() on a nonexisting/already freed object handle ${objectHandle}`);assert(EmAudio[objectHandle].disconnect,`Called emscripten_destroy_web_audio_node() on a handle ${objectHandle} that is not an Web Audio Node, but of type ${typeof EmAudio[objectHandle]}`);EmAudio[objectHandle].disconnect();delete EmAudio[objectHandle]};var _emscripten_get_now;if(typeof performance!="undefined"&&performance.now){_emscripten_get_now=()=>performance.now()}else{_emscripten_get_now=Date.now}var getHeapMax=()=>2147483648;var growMemory=size=>{var b=wasmMemory.buffer;var pages=(size-b.byteLength+65535)/65536;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){err(`growMemory: Attempted to grow heap from ${b.byteLength} bytes to ${size} bytes, but got error: ${e}`)}};var _emscripten_resize_heap=requestedSize=>{var oldSize=GROWABLE_HEAP_U8().length;requestedSize>>>=0;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){err(`Cannot enlarge memory, requested ${requestedSize} bytes, but the limit is ${maxHeapSize} bytes!`);return false}var alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}err(`Failed to grow the heap from ${oldSize} bytes to ${newSize} bytes, not enough memory!`);return false};var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}checkStackCookie();if(e instanceof WebAssembly.RuntimeError){if(_emscripten_stack_get_current()<=0){err("Stack overflow detected.  You can try increasing -sSTACK_SIZE (currently set to 134217728)")}}quit_(1,e)};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var SYSCALLS={varargs:undefined,get(){assert(SYSCALLS.varargs!=undefined);var ret=GROWABLE_HEAP_I32()[+SYSCALLS.varargs>>2];SYSCALLS.varargs+=4;return ret},getp(){return SYSCALLS.get()},getStr(ptr){var ret=UTF8ToString(ptr);return ret}};var _proc_exit=code=>{EXITSTATUS=code;if(!keepRuntimeAlive()){if(Module["onExit"])Module["onExit"](code);ABORT=true}quit_(code,new ExitStatus(code))};var exitJS=(status,implicit)=>{EXITSTATUS=status;checkUnflushedContent();if(keepRuntimeAlive()&&!implicit){var msg=`program exited (with status: ${status}), but keepRuntimeAlive() is set (counter=${runtimeKeepaliveCounter}) due to an async operation, so halting execution but not exiting the runtime or preventing further async execution (you can use emscripten_force_exit, if you want to force a true shutdown)`;readyPromiseReject(msg);err(msg)}_proc_exit(status)};var _exit=exitJS;var maybeExit=()=>{if(!keepRuntimeAlive()){try{_exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){err("user callback triggered after runtime exited or application aborted.  Ignoring.");return}try{func();maybeExit()}catch(e){handleException(e)}};var safeSetTimeout=(func,timeout)=>setTimeout(()=>{callUserCallback(func)},timeout);var preloadPlugins=Module["preloadPlugins"]||[];var Browser={mainLoop:{running:false,scheduler:null,method:"",currentlyRunningMainloop:0,func:null,arg:0,timingMode:0,timingValue:0,currentFrameNumber:0,queue:[],pause(){Browser.mainLoop.scheduler=null;Browser.mainLoop.currentlyRunningMainloop++},resume(){Browser.mainLoop.currentlyRunningMainloop++;var timingMode=Browser.mainLoop.timingMode;var timingValue=Browser.mainLoop.timingValue;var func=Browser.mainLoop.func;Browser.mainLoop.func=null;setMainLoop(func,0,false,Browser.mainLoop.arg,true);_emscripten_set_main_loop_timing(timingMode,timingValue);Browser.mainLoop.scheduler()},updateStatus(){if(Module["setStatus"]){var message=Module["statusMessage"]||"Please wait...";var remaining=Browser.mainLoop.remainingBlockers;var expected=Browser.mainLoop.expectedBlockers;if(remaining){if(remaining<expected){Module["setStatus"](message+" ("+(expected-remaining)+"/"+expected+")")}else{Module["setStatus"](message)}}else{Module["setStatus"]("")}}},runIter(func){if(ABORT)return;if(Module["preMainLoop"]){var preRet=Module["preMainLoop"]();if(preRet===false){return}}callUserCallback(func);if(Module["postMainLoop"])Module["postMainLoop"]()}},isFullscreen:false,pointerLock:false,moduleContextCreatedCallbacks:[],workers:[],init(){if(Browser.initted)return;Browser.initted=true;var imagePlugin={};imagePlugin["canHandle"]=function imagePlugin_canHandle(name){return!Module.noImageDecoding&&/\.(jpg|jpeg|png|bmp)$/i.test(name)};imagePlugin["handle"]=function imagePlugin_handle(byteArray,name,onload,onerror){var b=new Blob([byteArray],{type:Browser.getMimetype(name)});if(b.size!==byteArray.length){b=new Blob([new Uint8Array(byteArray).buffer],{type:Browser.getMimetype(name)})}var url=URL.createObjectURL(b);assert(typeof url=="string","createObjectURL must return a url as a string");var img=new Image;img.onload=()=>{assert(img.complete,`Image ${name} could not be decoded`);var canvas=document.createElement("canvas");canvas.width=img.width;canvas.height=img.height;var ctx=canvas.getContext("2d");ctx.drawImage(img,0,0);preloadedImages[name]=canvas;URL.revokeObjectURL(url);if(onload)onload(byteArray)};img.onerror=event=>{err(`Image ${url} could not be decoded`);if(onerror)onerror()};img.src=url};preloadPlugins.push(imagePlugin);var audioPlugin={};audioPlugin["canHandle"]=function audioPlugin_canHandle(name){return!Module.noAudioDecoding&&name.substr(-4)in{".ogg":1,".wav":1,".mp3":1}};audioPlugin["handle"]=function audioPlugin_handle(byteArray,name,onload,onerror){var done=false;function finish(audio){if(done)return;done=true;preloadedAudios[name]=audio;if(onload)onload(byteArray)}var b=new Blob([byteArray],{type:Browser.getMimetype(name)});var url=URL.createObjectURL(b);assert(typeof url=="string","createObjectURL must return a url as a string");var audio=new Audio;audio.addEventListener("canplaythrough",()=>finish(audio),false);audio.onerror=function audio_onerror(event){if(done)return;err(`warning: browser could not fully decode audio ${name}, trying slower base64 approach`);function encode64(data){var BASE="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var PAD="=";var ret="";var leftchar=0;var leftbits=0;for(var i=0;i<data.length;i++){leftchar=leftchar<<8|data[i];leftbits+=8;while(leftbits>=6){var curr=leftchar>>leftbits-6&63;leftbits-=6;ret+=BASE[curr]}}if(leftbits==2){ret+=BASE[(leftchar&3)<<4];ret+=PAD+PAD}else if(leftbits==4){ret+=BASE[(leftchar&15)<<2];ret+=PAD}return ret}audio.src="data:audio/x-"+name.substr(-3)+";base64,"+encode64(byteArray);finish(audio)};audio.src=url;safeSetTimeout(()=>{finish(audio)},1e4)};preloadPlugins.push(audioPlugin);function pointerLockChange(){Browser.pointerLock=document["pointerLockElement"]===Module["canvas"]||document["mozPointerLockElement"]===Module["canvas"]||document["webkitPointerLockElement"]===Module["canvas"]||document["msPointerLockElement"]===Module["canvas"]}var canvas=Module["canvas"];if(canvas){canvas.requestPointerLock=canvas["requestPointerLock"]||canvas["mozRequestPointerLock"]||canvas["webkitRequestPointerLock"]||canvas["msRequestPointerLock"]||(()=>{});canvas.exitPointerLock=document["exitPointerLock"]||document["mozExitPointerLock"]||document["webkitExitPointerLock"]||document["msExitPointerLock"]||(()=>{});canvas.exitPointerLock=canvas.exitPointerLock.bind(document);document.addEventListener("pointerlockchange",pointerLockChange,false);document.addEventListener("mozpointerlockchange",pointerLockChange,false);document.addEventListener("webkitpointerlockchange",pointerLockChange,false);document.addEventListener("mspointerlockchange",pointerLockChange,false);if(Module["elementPointerLock"]){canvas.addEventListener("click",ev=>{if(!Browser.pointerLock&&Module["canvas"].requestPointerLock){Module["canvas"].requestPointerLock();ev.preventDefault()}},false)}}},createContext(canvas,useWebGL,setInModule,webGLContextAttributes){if(useWebGL&&Module.ctx&&canvas==Module.canvas)return Module.ctx;var ctx;var contextHandle;if(useWebGL){var contextAttributes={antialias:false,alpha:false,majorVersion:1};if(webGLContextAttributes){for(var attribute in webGLContextAttributes){contextAttributes[attribute]=webGLContextAttributes[attribute]}}if(typeof GL!="undefined"){contextHandle=GL.createContext(canvas,contextAttributes);if(contextHandle){ctx=GL.getContext(contextHandle).GLctx}}}else{ctx=canvas.getContext("2d")}if(!ctx)return null;if(setInModule){if(!useWebGL)assert(typeof GLctx=="undefined","cannot set in module if GLctx is used, but we are a non-GL context that would replace it");Module.ctx=ctx;if(useWebGL)GL.makeContextCurrent(contextHandle);Module.useWebGL=useWebGL;Browser.moduleContextCreatedCallbacks.forEach(callback=>callback());Browser.init()}return ctx},destroyContext(canvas,useWebGL,setInModule){},fullscreenHandlersInstalled:false,lockPointer:undefined,resizeCanvas:undefined,requestFullscreen(lockPointer,resizeCanvas){Browser.lockPointer=lockPointer;Browser.resizeCanvas=resizeCanvas;if(typeof Browser.lockPointer=="undefined")Browser.lockPointer=true;if(typeof Browser.resizeCanvas=="undefined")Browser.resizeCanvas=false;var canvas=Module["canvas"];function fullscreenChange(){Browser.isFullscreen=false;var canvasContainer=canvas.parentNode;if((document["fullscreenElement"]||document["mozFullScreenElement"]||document["msFullscreenElement"]||document["webkitFullscreenElement"]||document["webkitCurrentFullScreenElement"])===canvasContainer){canvas.exitFullscreen=Browser.exitFullscreen;if(Browser.lockPointer)canvas.requestPointerLock();Browser.isFullscreen=true;if(Browser.resizeCanvas){Browser.setFullscreenCanvasSize()}else{Browser.updateCanvasDimensions(canvas)}}else{canvasContainer.parentNode.insertBefore(canvas,canvasContainer);canvasContainer.parentNode.removeChild(canvasContainer);if(Browser.resizeCanvas){Browser.setWindowedCanvasSize()}else{Browser.updateCanvasDimensions(canvas)}}if(Module["onFullScreen"])Module["onFullScreen"](Browser.isFullscreen);if(Module["onFullscreen"])Module["onFullscreen"](Browser.isFullscreen)}if(!Browser.fullscreenHandlersInstalled){Browser.fullscreenHandlersInstalled=true;document.addEventListener("fullscreenchange",fullscreenChange,false);document.addEventListener("mozfullscreenchange",fullscreenChange,false);document.addEventListener("webkitfullscreenchange",fullscreenChange,false);document.addEventListener("MSFullscreenChange",fullscreenChange,false)}var canvasContainer=document.createElement("div");canvas.parentNode.insertBefore(canvasContainer,canvas);canvasContainer.appendChild(canvas);canvasContainer.requestFullscreen=canvasContainer["requestFullscreen"]||canvasContainer["mozRequestFullScreen"]||canvasContainer["msRequestFullscreen"]||(canvasContainer["webkitRequestFullscreen"]?()=>canvasContainer["webkitRequestFullscreen"](Element["ALLOW_KEYBOARD_INPUT"]):null)||(canvasContainer["webkitRequestFullScreen"]?()=>canvasContainer["webkitRequestFullScreen"](Element["ALLOW_KEYBOARD_INPUT"]):null);canvasContainer.requestFullscreen()},requestFullScreen(){abort("Module.requestFullScreen has been replaced by Module.requestFullscreen (without a capital S)")},exitFullscreen(){if(!Browser.isFullscreen){return false}var CFS=document["exitFullscreen"]||document["cancelFullScreen"]||document["mozCancelFullScreen"]||document["msExitFullscreen"]||document["webkitCancelFullScreen"]||(()=>{});CFS.apply(document,[]);return true},nextRAF:0,fakeRequestAnimationFrame(func){var now=Date.now();if(Browser.nextRAF===0){Browser.nextRAF=now+1e3/60}else{while(now+2>=Browser.nextRAF){Browser.nextRAF+=1e3/60}}var delay=Math.max(Browser.nextRAF-now,0);setTimeout(func,delay)},requestAnimationFrame(func){if(typeof requestAnimationFrame=="function"){requestAnimationFrame(func);return}var RAF=Browser.fakeRequestAnimationFrame;RAF(func)},safeSetTimeout(func,timeout){return safeSetTimeout(func,timeout)},safeRequestAnimationFrame(func){return Browser.requestAnimationFrame(()=>{callUserCallback(func)})},getMimetype(name){return{"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png","bmp":"image/bmp","ogg":"audio/ogg","wav":"audio/wav","mp3":"audio/mpeg"}[name.substr(name.lastIndexOf(".")+1)]},getUserMedia(func){if(!window.getUserMedia){window.getUserMedia=navigator["getUserMedia"]||navigator["mozGetUserMedia"]}window.getUserMedia(func)},getMovementX(event){return event["movementX"]||event["mozMovementX"]||event["webkitMovementX"]||0},getMovementY(event){return event["movementY"]||event["mozMovementY"]||event["webkitMovementY"]||0},getMouseWheelDelta(event){var delta=0;switch(event.type){case"DOMMouseScroll":delta=event.detail/3;break;case"mousewheel":delta=event.wheelDelta/120;break;case"wheel":delta=event.deltaY;switch(event.deltaMode){case 0:delta/=100;break;case 1:delta/=3;break;case 2:delta*=80;break;default:throw"unrecognized mouse wheel delta mode: "+event.deltaMode}break;default:throw"unrecognized mouse wheel event: "+event.type}return delta},mouseX:0,mouseY:0,mouseMovementX:0,mouseMovementY:0,touches:{},lastTouches:{},calculateMouseEvent(event){if(Browser.pointerLock){if(event.type!="mousemove"&&"mozMovementX"in event){Browser.mouseMovementX=Browser.mouseMovementY=0}else{Browser.mouseMovementX=Browser.getMovementX(event);Browser.mouseMovementY=Browser.getMovementY(event)}if(typeof SDL!="undefined"){Browser.mouseX=SDL.mouseX+Browser.mouseMovementX;Browser.mouseY=SDL.mouseY+Browser.mouseMovementY}else{Browser.mouseX+=Browser.mouseMovementX;Browser.mouseY+=Browser.mouseMovementY}}else{var rect=Module["canvas"].getBoundingClientRect();var cw=Module["canvas"].width;var ch=Module["canvas"].height;var scrollX=typeof window.scrollX!="undefined"?window.scrollX:window.pageXOffset;var scrollY=typeof window.scrollY!="undefined"?window.scrollY:window.pageYOffset;assert(typeof scrollX!="undefined"&&typeof scrollY!="undefined","Unable to retrieve scroll position, mouse positions likely broken.");if(event.type==="touchstart"||event.type==="touchend"||event.type==="touchmove"){var touch=event.touch;if(touch===undefined){return}var adjustedX=touch.pageX-(scrollX+rect.left);var adjustedY=touch.pageY-(scrollY+rect.top);adjustedX=adjustedX*(cw/rect.width);adjustedY=adjustedY*(ch/rect.height);var coords={x:adjustedX,y:adjustedY};if(event.type==="touchstart"){Browser.lastTouches[touch.identifier]=coords;Browser.touches[touch.identifier]=coords}else if(event.type==="touchend"||event.type==="touchmove"){var last=Browser.touches[touch.identifier];if(!last)last=coords;Browser.lastTouches[touch.identifier]=last;Browser.touches[touch.identifier]=coords}return}var x=event.pageX-(scrollX+rect.left);var y=event.pageY-(scrollY+rect.top);x=x*(cw/rect.width);y=y*(ch/rect.height);Browser.mouseMovementX=x-Browser.mouseX;Browser.mouseMovementY=y-Browser.mouseY;Browser.mouseX=x;Browser.mouseY=y}},resizeListeners:[],updateResizeListeners(){var canvas=Module["canvas"];Browser.resizeListeners.forEach(listener=>listener(canvas.width,canvas.height))},setCanvasSize(width,height,noUpdates){var canvas=Module["canvas"];Browser.updateCanvasDimensions(canvas,width,height);if(!noUpdates)Browser.updateResizeListeners()},windowedWidth:0,windowedHeight:0,setFullscreenCanvasSize(){if(typeof SDL!="undefined"){var flags=GROWABLE_HEAP_U32()[SDL.screen>>2];flags=flags|8388608;GROWABLE_HEAP_I32()[SDL.screen>>2]=flags}Browser.updateCanvasDimensions(Module["canvas"]);Browser.updateResizeListeners()},setWindowedCanvasSize(){if(typeof SDL!="undefined"){var flags=GROWABLE_HEAP_U32()[SDL.screen>>2];flags=flags&~8388608;GROWABLE_HEAP_I32()[SDL.screen>>2]=flags}Browser.updateCanvasDimensions(Module["canvas"]);Browser.updateResizeListeners()},updateCanvasDimensions(canvas,wNative,hNative){if(wNative&&hNative){canvas.widthNative=wNative;canvas.heightNative=hNative}else{wNative=canvas.widthNative;hNative=canvas.heightNative}var w=wNative;var h=hNative;if(Module["forcedAspectRatio"]&&Module["forcedAspectRatio"]>0){if(w/h<Module["forcedAspectRatio"]){w=Math.round(h*Module["forcedAspectRatio"])}else{h=Math.round(w/Module["forcedAspectRatio"])}}if((document["fullscreenElement"]||document["mozFullScreenElement"]||document["msFullscreenElement"]||document["webkitFullscreenElement"]||document["webkitCurrentFullScreenElement"])===canvas.parentNode&&typeof screen!="undefined"){var factor=Math.min(screen.width/w,screen.height/h);w=Math.round(w*factor);h=Math.round(h*factor)}if(Browser.resizeCanvas){if(canvas.width!=w)canvas.width=w;if(canvas.height!=h)canvas.height=h;if(typeof canvas.style!="undefined"){canvas.style.removeProperty("width");canvas.style.removeProperty("height")}}else{if(canvas.width!=wNative)canvas.width=wNative;if(canvas.height!=hNative)canvas.height=hNative;if(typeof canvas.style!="undefined"){if(w!=wNative||h!=hNative){canvas.style.setProperty("width",w+"px","important");canvas.style.setProperty("height",h+"px","important")}else{canvas.style.removeProperty("width");canvas.style.removeProperty("height")}}}}};var _emscripten_set_main_loop_timing=(mode,value)=>{Browser.mainLoop.timingMode=mode;Browser.mainLoop.timingValue=value;if(!Browser.mainLoop.func){err("emscripten_set_main_loop_timing: Cannot set timing mode for main loop since a main loop does not exist! Call emscripten_set_main_loop first to set one up.");return 1}if(!Browser.mainLoop.running){Browser.mainLoop.running=true}if(mode==0){Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_setTimeout(){var timeUntilNextTick=Math.max(0,Browser.mainLoop.tickStartTime+value-_emscripten_get_now())|0;setTimeout(Browser.mainLoop.runner,timeUntilNextTick)};Browser.mainLoop.method="timeout"}else if(mode==1){Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_rAF(){Browser.requestAnimationFrame(Browser.mainLoop.runner)};Browser.mainLoop.method="rAF"}else if(mode==2){if(typeof Browser.setImmediate=="undefined"){if(typeof setImmediate=="undefined"){var setImmediates=[];var emscriptenMainLoopMessageId="setimmediate";var Browser_setImmediate_messageHandler=event=>{if(event.data===emscriptenMainLoopMessageId||event.data.target===emscriptenMainLoopMessageId){event.stopPropagation();setImmediates.shift()()}};addEventListener("message",Browser_setImmediate_messageHandler,true);Browser.setImmediate=function Browser_emulated_setImmediate(func){setImmediates.push(func);if(ENVIRONMENT_IS_WORKER){if(Module["setImmediates"]===undefined)Module["setImmediates"]=[];Module["setImmediates"].push(func);postMessage({target:emscriptenMainLoopMessageId})}else postMessage(emscriptenMainLoopMessageId,"*")}}else{Browser.setImmediate=setImmediate}}Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_setImmediate(){Browser.setImmediate(Browser.mainLoop.runner)};Browser.mainLoop.method="immediate"}return 0};var setMainLoop=(browserIterationFunc,fps,simulateInfiniteLoop,arg,noSetTiming)=>{assert(!Browser.mainLoop.func,"emscripten_set_main_loop: there can only be one main loop function at once: call emscripten_cancel_main_loop to cancel the previous one before setting a new one with different parameters.");Browser.mainLoop.func=browserIterationFunc;Browser.mainLoop.arg=arg;var thisMainLoopId=Browser.mainLoop.currentlyRunningMainloop;function checkIsRunning(){if(thisMainLoopId<Browser.mainLoop.currentlyRunningMainloop){return false}return true}Browser.mainLoop.running=false;Browser.mainLoop.runner=function Browser_mainLoop_runner(){if(ABORT)return;if(Browser.mainLoop.queue.length>0){var start=Date.now();var blocker=Browser.mainLoop.queue.shift();blocker.func(blocker.arg);if(Browser.mainLoop.remainingBlockers){var remaining=Browser.mainLoop.remainingBlockers;var next=remaining%1==0?remaining-1:Math.floor(remaining);if(blocker.counted){Browser.mainLoop.remainingBlockers=next}else{next=next+.5;Browser.mainLoop.remainingBlockers=(8*remaining+next)/9}}Browser.mainLoop.updateStatus();if(!checkIsRunning())return;setTimeout(Browser.mainLoop.runner,0);return}if(!checkIsRunning())return;Browser.mainLoop.currentFrameNumber=Browser.mainLoop.currentFrameNumber+1|0;if(Browser.mainLoop.timingMode==1&&Browser.mainLoop.timingValue>1&&Browser.mainLoop.currentFrameNumber%Browser.mainLoop.timingValue!=0){Browser.mainLoop.scheduler();return}else if(Browser.mainLoop.timingMode==0){Browser.mainLoop.tickStartTime=_emscripten_get_now()}if(Browser.mainLoop.method==="timeout"&&Module.ctx){warnOnce("Looks like you are rendering without using requestAnimationFrame for the main loop. You should use 0 for the frame rate in emscripten_set_main_loop in order to use requestAnimationFrame, as that can greatly improve your frame rates!");Browser.mainLoop.method=""}Browser.mainLoop.runIter(browserIterationFunc);checkStackCookie();if(!checkIsRunning())return;if(typeof SDL=="object"&&SDL.audio&&SDL.audio.queueNewAudioData)SDL.audio.queueNewAudioData();Browser.mainLoop.scheduler()};if(!noSetTiming){if(fps&&fps>0){_emscripten_set_main_loop_timing(0,1e3/fps)}else{_emscripten_set_main_loop_timing(1,1)}Browser.mainLoop.scheduler()}if(simulateInfiniteLoop){throw"unwind"}};var _emscripten_set_main_loop=(func,fps,simulateInfiniteLoop)=>{var browserIterationFunc=()=>dynCall_v.call(null,func);setMainLoop(browserIterationFunc,fps,simulateInfiniteLoop)};var _emscripten_sleep=ms=>Asyncify.handleSleep(wakeUp=>safeSetTimeout(wakeUp,ms));_emscripten_sleep.isAsync=true;var _wasmWorkersID=1;var _EmAudioDispatchProcessorCallback=e=>{let data=e.data,wasmCall=data["_wsc"];wasmCall&&getWasmTableEntry(wasmCall)(...data["x"])};var _emscripten_start_wasm_audio_worklet_thread_async=(contextHandle,stackLowestAddress,stackSize,callback,userData)=>{assert(contextHandle,`Called emscripten_start_wasm_audio_worklet_thread_async() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_start_wasm_audio_worklet_thread_async() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_start_wasm_audio_worklet_thread_async() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);let audioContext=EmAudio[contextHandle],audioWorklet=audioContext.audioWorklet;assert(stackLowestAddress!=0,"AudioWorklets require a dedicated stack space for audio data marshalling between Wasm and JS!");assert(stackLowestAddress%16==0,`AudioWorklet stack should be aligned to 16 bytes! (was ${stackLowestAddress} == ${stackLowestAddress%16} mod 16) Use e.g. memalign(16, stackSize) to align the stack!`);assert(stackSize!=0,"AudioWorklets require a dedicated stack space for audio data marshalling between Wasm and JS!");assert(stackSize%16==0,`AudioWorklet stack size should be a multiple of 16 bytes! (was ${stackSize} == ${stackSize%16} mod 16)`);assert(!audioContext.audioWorkletInitialized,"emscripten_create_wasm_audio_worklet() was already called for AudioContext "+contextHandle+"! Only call this function once per AudioContext!");audioContext.audioWorkletInitialized=1;let audioWorkletCreationFailed=()=>{((a1,a2,a3)=>dynCall_viii.apply(null,[callback,a1,a2,a3]))(contextHandle,0,userData)};if(!audioWorklet){return audioWorkletCreationFailed()}audioWorklet.addModule("amy-audioin.aw.js").then(()=>{audioWorklet.bootstrapMessage=new AudioWorkletNode(audioContext,"message",{processorOptions:{"$ww":_wasmWorkersID++,"wasm":wasmModule,"wasmMemory":wasmMemory,"sb":stackLowestAddress,"sz":stackSize}});audioWorklet.bootstrapMessage.port.onmessage=_EmAudioDispatchProcessorCallback;return audioWorklet.addModule(Module["mainScriptUrlOrBlob"]||_scriptDir)}).then(()=>{((a1,a2,a3)=>dynCall_viii.apply(null,[callback,a1,a2,a3]))(contextHandle,1,userData)}).catch(audioWorkletCreationFailed)};var _fd_close=fd=>{abort("fd_close called without SYSCALLS_REQUIRE_FILESYSTEM")};var convertI32PairToI53Checked=(lo,hi)=>{assert(lo==lo>>>0||lo==(lo|0));assert(hi===(hi|0));return hi+2097152>>>0<4194305-!!lo?(lo>>>0)+hi*4294967296:NaN};function _fd_seek(fd,offset_low,offset_high,whence,newOffset){var offset=convertI32PairToI53Checked(offset_low,offset_high);return 70}var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];assert(buffer);if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer,0));buffer.length=0}else{buffer.push(curr)}};var flush_NO_FILESYSTEM=()=>{_fflush(0);if(printCharBuffers[1].length)printChar(1,10);if(printCharBuffers[2].length)printChar(2,10)};var _fd_write=(fd,iov,iovcnt,pnum)=>{var num=0;for(var i=0;i<iovcnt;i++){var ptr=GROWABLE_HEAP_U32()[iov>>2];var len=GROWABLE_HEAP_U32()[iov+4>>2];iov+=8;for(var j=0;j<len;j++){printChar(fd,GROWABLE_HEAP_U8()[ptr+j])}num+=len}GROWABLE_HEAP_U32()[pnum>>2]=num;return 0};var runAndAbortIfError=func=>{try{return func()}catch(e){abort(e)}};var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var runtimeKeepalivePop=()=>{assert(runtimeKeepaliveCounter>0);runtimeKeepaliveCounter-=1};var Asyncify={instrumentWasmImports(imports){var importPattern=/^(invoke_.*|__asyncjs__.*)$/;for(var x in imports){(function(x){var original=imports[x];var sig=original.sig;if(typeof original=="function"){var isAsyncifyImport=original.isAsync||importPattern.test(x);imports[x]=function(){var originalAsyncifyState=Asyncify.state;try{return original.apply(null,arguments)}finally{var changedToDisabled=originalAsyncifyState===Asyncify.State.Normal&&Asyncify.state===Asyncify.State.Disabled;var ignoredInvoke=x.startsWith("invoke_")&&true;if(Asyncify.state!==originalAsyncifyState&&!isAsyncifyImport&&!changedToDisabled&&!ignoredInvoke){throw new Error(`import ${x} was not in ASYNCIFY_IMPORTS, but changed the state`)}}}}})(x)}},instrumentWasmExports(exports){var ret={};for(var x in exports){(function(x){var original=exports[x];if(typeof original=="function"){ret[x]=function(){Asyncify.exportCallStack.push(x);try{return original.apply(null,arguments)}finally{if(!ABORT){var y=Asyncify.exportCallStack.pop();assert(y===x);Asyncify.maybeStopUnwind()}}}}else{ret[x]=original}})(x)}return ret},State:{Normal:0,Unwinding:1,Rewinding:2,Disabled:3},state:0,StackSize:128e3,currData:null,handleSleepReturnValue:0,exportCallStack:[],callStackNameToId:{},callStackIdToName:{},callStackId:0,asyncPromiseHandlers:null,sleepCallbacks:[],getCallStackId(funcName){var id=Asyncify.callStackNameToId[funcName];if(id===undefined){id=Asyncify.callStackId++;Asyncify.callStackNameToId[funcName]=id;Asyncify.callStackIdToName[id]=funcName}return id},maybeStopUnwind(){if(Asyncify.currData&&Asyncify.state===Asyncify.State.Unwinding&&Asyncify.exportCallStack.length===0){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_unwind);if(typeof Fibers!="undefined"){Fibers.trampoline()}}},whenDone(){assert(Asyncify.currData,"Tried to wait for an async operation when none is in progress.");assert(!Asyncify.asyncPromiseHandlers,"Cannot have multiple async operations in flight at once");return new Promise((resolve,reject)=>{Asyncify.asyncPromiseHandlers={resolve:resolve,reject:reject}})},allocateData(){var ptr=_malloc(12+Asyncify.StackSize);Asyncify.setDataHeader(ptr,ptr+12,Asyncify.StackSize);Asyncify.setDataRewindFunc(ptr);return ptr},setDataHeader(ptr,stack,stackSize){GROWABLE_HEAP_U32()[ptr>>2]=stack;GROWABLE_HEAP_U32()[ptr+4>>2]=stack+stackSize},setDataRewindFunc(ptr){var bottomOfCallStack=Asyncify.exportCallStack[0];var rewindId=Asyncify.getCallStackId(bottomOfCallStack);GROWABLE_HEAP_I32()[ptr+8>>2]=rewindId},getDataRewindFunc(ptr){var id=GROWABLE_HEAP_I32()[ptr+8>>2];var name=Asyncify.callStackIdToName[id];var func=wasmExports[name];return func},doRewind(ptr){var start=Asyncify.getDataRewindFunc(ptr);return start()},handleSleep(startAsync){assert(Asyncify.state!==Asyncify.State.Disabled,"Asyncify cannot be done during or after the runtime exits");if(ABORT)return;if(Asyncify.state===Asyncify.State.Normal){var reachedCallback=false;var reachedAfterCallback=false;startAsync((handleSleepReturnValue=0)=>{assert(!handleSleepReturnValue||typeof handleSleepReturnValue=="number"||typeof handleSleepReturnValue=="boolean");if(ABORT)return;Asyncify.handleSleepReturnValue=handleSleepReturnValue;reachedCallback=true;if(!reachedAfterCallback){return}assert(!Asyncify.exportCallStack.length,"Waking up (starting to rewind) must be done from JS, without compiled code on the stack.");Asyncify.state=Asyncify.State.Rewinding;runAndAbortIfError(()=>_asyncify_start_rewind(Asyncify.currData));if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.resume()}var asyncWasmReturnValue,isError=false;try{asyncWasmReturnValue=Asyncify.doRewind(Asyncify.currData)}catch(err){asyncWasmReturnValue=err;isError=true}var handled=false;if(!Asyncify.currData){var asyncPromiseHandlers=Asyncify.asyncPromiseHandlers;if(asyncPromiseHandlers){Asyncify.asyncPromiseHandlers=null;(isError?asyncPromiseHandlers.reject:asyncPromiseHandlers.resolve)(asyncWasmReturnValue);handled=true}}if(isError&&!handled){throw asyncWasmReturnValue}});reachedAfterCallback=true;if(!reachedCallback){Asyncify.state=Asyncify.State.Unwinding;Asyncify.currData=Asyncify.allocateData();if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.pause()}runAndAbortIfError(()=>_asyncify_start_unwind(Asyncify.currData))}}else if(Asyncify.state===Asyncify.State.Rewinding){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_rewind);_free(Asyncify.currData);Asyncify.currData=null;Asyncify.sleepCallbacks.forEach(func=>callUserCallback(func))}else{abort(`invalid state: ${Asyncify.state}`)}return Asyncify.handleSleepReturnValue},handleAsync(startAsync){return Asyncify.handleSleep(wakeUp=>{startAsync().then(wakeUp)})}};var getCFunc=ident=>{var func=Module["_"+ident];assert(func,"Cannot call unknown function "+ident+", make sure it is exported");return func};var writeArrayToMemory=(array,buffer)=>{assert(array.length>=0,"writeArrayToMemory array must have a length (should be an array or typed array)");GROWABLE_HEAP_I8().set(array,buffer)};var lengthBytesUTF8=str=>{var len=0;for(var i=0;i<str.length;++i){var c=str.charCodeAt(i);if(c<=127){len++}else if(c<=2047){len+=2}else if(c>=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{assert(typeof str==="string",`stringToUTF8Array expects a string (got ${typeof str})`);if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i<str.length;++i){var u=str.charCodeAt(i);if(u>=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;if(u>1114111)warnOnce("Invalid Unicode code point "+ptrToString(u)+" encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).");heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}}heap[outIdx]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>{assert(typeof maxBytesToWrite=="number","stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!");return stringToUTF8Array(str,GROWABLE_HEAP_U8(),outPtr,maxBytesToWrite)};var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={"string":str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},"array":arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;assert(returnType!=="array",'Return type should not be "array".');if(args){for(var i=0;i<args.length;i++){var converter=toC[argTypes[i]];if(converter){if(stack===0)stack=stackSave();cArgs[i]=converter(args[i])}else{cArgs[i]=args[i]}}}var previousAsync=Asyncify.currData;var ret=func.apply(null,cArgs);function onDone(ret){runtimeKeepalivePop();if(stack!==0)stackRestore(stack);return convertReturnValue(ret)}var asyncMode=opts&&opts.async;runtimeKeepalivePush();if(Asyncify.currData!=previousAsync){assert(!(previousAsync&&Asyncify.currData),"We cannot start an async operation when one is already flight");assert(!(previousAsync&&!Asyncify.currData),"We cannot stop an async operation in flight");assert(asyncMode,"The call to "+ident+" is running asynchronously. If this was intended, add the async option to the ccall/cwrap call.");return Asyncify.whenDone().then(onDone)}ret=onDone(ret);if(asyncMode)return Promise.resolve(ret);return ret};var cwrap=(ident,returnType,argTypes,opts)=>function(){return ccall(ident,returnType,argTypes,arguments,opts)};Module["requestFullscreen"]=Browser.requestFullscreen;Module["requestFullScreen"]=Browser.requestFullScreen;Module["requestAnimationFrame"]=Browser.requestAnimationFrame;Module["setCanvasSize"]=Browser.setCanvasSize;Module["pauseMainLoop"]=Browser.mainLoop.pause;Module["resumeMainLoop"]=Browser.mainLoop.resume;Module["getUserMedia"]=Browser.getUserMedia;Module["createContext"]=Browser.createContext;var preloadedImages={};var preloadedAudios={};function checkIncomingModuleAPI(){ignoredModuleProp("fetchSettings")}var wasmImports={__assert_fail:___assert_fail,_emscripten_get_now_is_monotonic:__emscripten_get_now_is_monotonic,abort:_abort,emscripten_asm_const_int:_emscripten_asm_const_int,emscripten_create_audio_context:_emscripten_create_audio_context,emscripten_create_wasm_audio_worklet_node:_emscripten_create_wasm_audio_worklet_node,emscripten_create_wasm_audio_worklet_processor_async:_emscripten_create_wasm_audio_worklet_processor_async,emscripten_date_now:_emscripten_date_now,emscripten_destroy_audio_context:_emscripten_destroy_audio_context,emscripten_destroy_web_audio_node:_emscripten_destroy_web_audio_node,emscripten_get_now:_emscripten_get_now,emscripten_resize_heap:_emscripten_resize_heap,emscripten_set_main_loop:_emscripten_set_main_loop,emscripten_sleep:_emscripten_sleep,emscripten_start_wasm_audio_worklet_thread_async:_emscripten_start_wasm_audio_worklet_thread_async,exit:_exit,fd_close:_fd_close,fd_seek:_fd_seek,fd_write:_fd_write,memory:wasmMemory};Asyncify.instrumentWasmImports(wasmImports);var wasmExports=createWasm();var ___wasm_call_ctors=createExportWrapper("__wasm_call_ctors");var _free=Module["_free"]=createExportWrapper("free");var _malloc=Module["_malloc"]=createExportWrapper("malloc");var _amy_start=Module["_amy_start"]=createExportWrapper("amy_start");var _amy_reset_sysclock=Module["_amy_reset_sysclock"]=createExportWrapper("amy_reset_sysclock");var _amy_play_message=Module["_amy_play_message"]=createExportWrapper("amy_play_message");var _sequencer_ticks=Module["_sequencer_ticks"]=createExportWrapper("sequencer_ticks");var ___errno_location=createExportWrapper("__errno_location");var _ma_device__on_notification_unlocked=Module["_ma_device__on_notification_unlocked"]=createExportWrapper("ma_device__on_notification_unlocked");var _ma_malloc_emscripten=Module["_ma_malloc_emscripten"]=createExportWrapper("ma_malloc_emscripten");var _ma_free_emscripten=Module["_ma_free_emscripten"]=createExportWrapper("ma_free_emscripten");var _ma_device_process_pcm_frames_capture__webaudio=Module["_ma_device_process_pcm_frames_capture__webaudio"]=createExportWrapper("ma_device_process_pcm_frames_capture__webaudio");var _ma_device_process_pcm_frames_playback__webaudio=Module["_ma_device_process_pcm_frames_playback__webaudio"]=createExportWrapper("ma_device_process_pcm_frames_playback__webaudio");var _amy_live_start=Module["_amy_live_start"]=createExportWrapper("amy_live_start");var _fflush=Module["_fflush"]=createExportWrapper("fflush");var _emscripten_stack_init=()=>(_emscripten_stack_init=wasmExports["emscripten_stack_init"])();var _emscripten_stack_get_free=()=>(_emscripten_stack_get_free=wasmExports["emscripten_stack_get_free"])();var _emscripten_stack_get_base=()=>(_emscripten_stack_get_base=wasmExports["emscripten_stack_get_base"])();var _emscripten_stack_get_end=()=>(_emscripten_stack_get_end=wasmExports["emscripten_stack_get_end"])();var _emscripten_wasm_worker_initialize=Module["_emscripten_wasm_worker_initialize"]=createExportWrapper("emscripten_wasm_worker_initialize");var stackSave=createExportWrapper("stackSave");var stackRestore=createExportWrapper("stackRestore");var stackAlloc=createExportWrapper("stackAlloc");var _emscripten_stack_get_current=()=>(_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"])();var dynCall_ii=Module["dynCall_ii"]=createExportWrapper("dynCall_ii");var dynCall_vii=Module["dynCall_vii"]=createExportWrapper("dynCall_vii");var dynCall_iiii=Module["dynCall_iiii"]=createExportWrapper("dynCall_iiii");var dynCall_iii=Module["dynCall_iii"]=createExportWrapper("dynCall_iii");var dynCall_iiiii=Module["dynCall_iiiii"]=createExportWrapper("dynCall_iiiii");var dynCall_viii=Module["dynCall_viii"]=createExportWrapper("dynCall_viii");var dynCall_viiii=Module["dynCall_viiii"]=createExportWrapper("dynCall_viiii");var dynCall_v=Module["dynCall_v"]=createExportWrapper("dynCall_v");var dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=createExportWrapper("dynCall_iiiiiiii");var dynCall_iiiji=Module["dynCall_iiiji"]=createExportWrapper("dynCall_iiiji");var dynCall_iiiiiii=Module["dynCall_iiiiiii"]=createExportWrapper("dynCall_iiiiiii");var dynCall_jii=Module["dynCall_jii"]=createExportWrapper("dynCall_jii");var dynCall_jiji=Module["dynCall_jiji"]=createExportWrapper("dynCall_jiji");var dynCall_iidiiii=Module["dynCall_iidiiii"]=createExportWrapper("dynCall_iidiiii");var _asyncify_start_unwind=createExportWrapper("asyncify_start_unwind");var _asyncify_stop_unwind=createExportWrapper("asyncify_stop_unwind");var _asyncify_start_rewind=createExportWrapper("asyncify_start_rewind");var _asyncify_stop_rewind=createExportWrapper("asyncify_stop_rewind");Module["stackAlloc"]=stackAlloc;Module["stackSave"]=stackSave;Module["stackRestore"]=stackRestore;Module["ccall"]=ccall;Module["cwrap"]=cwrap;var missingLibrarySymbols=["writeI53ToI64","writeI53ToI64Clamped","writeI53ToI64Signaling","writeI53ToU64Clamped","writeI53ToU64Signaling","readI53FromI64","readI53FromU64","convertI32PairToI53","convertU32PairToI53","zeroMemory","isLeapYear","ydayFromDate","arraySum","addDays","setErrNo","inetPton4","inetNtop4","inetPton6","inetNtop6","readSockaddr","writeSockaddr","getHostByName","initRandomFill","randomFill","getCallstack","emscriptenLog","convertPCtoSourceLocation","runMainThreadEmAsm","jstoi_q","jstoi_s","getExecutableName","listenOnce","autoResumeAudioContext","dynCallLegacy","getDynCaller","dynCall","asmjsMangle","asyncLoad","alignMemory","mmapAlloc","handleAllocatorInit","HandleAllocator","getNativeTypeSize","STACK_SIZE","STACK_ALIGN","POINTER_SIZE","ASSERTIONS","uleb128Encode","generateFuncType","convertJsFunctionToWasm","getEmptyTableSlot","updateTableMap","getFunctionAddress","addFunction","removeFunction","reallyNegative","unSign","strLen","reSign","formatString","intArrayFromString","intArrayToString","AsciiToString","stringToAscii","UTF16ToString","stringToUTF16","lengthBytesUTF16","UTF32ToString","stringToUTF32","lengthBytesUTF32","stringToNewUTF8","registerKeyEventCallback","maybeCStringToJsString","findEventTarget","findCanvasEventTarget","getBoundingClientRect","fillMouseEventData","registerMouseEventCallback","registerWheelEventCallback","registerUiEventCallback","registerFocusEventCallback","fillDeviceOrientationEventData","registerDeviceOrientationEventCallback","fillDeviceMotionEventData","registerDeviceMotionEventCallback","screenOrientation","fillOrientationChangeEventData","registerOrientationChangeEventCallback","fillFullscreenChangeEventData","registerFullscreenChangeEventCallback","JSEvents_requestFullscreen","JSEvents_resizeCanvasForFullscreen","registerRestoreOldStyle","hideEverythingExceptGivenElement","restoreHiddenElements","setLetterbox","softFullscreenResizeWebGLRenderTarget","doRequestFullscreen","fillPointerlockChangeEventData","registerPointerlockChangeEventCallback","registerPointerlockErrorEventCallback","requestPointerLock","fillVisibilityChangeEventData","registerVisibilityChangeEventCallback","registerTouchEventCallback","fillGamepadEventData","registerGamepadEventCallback","registerBeforeUnloadEventCallback","fillBatteryEventData","battery","registerBatteryEventCallback","setCanvasElementSize","getCanvasElementSize","demangle","demangleAll","jsStackTrace","stackTrace","getEnvStrings","checkWasiClock","wasiRightsToMuslOFlags","wasiOFlagsToMuslOFlags","createDyncallWrapper","setImmediateWrapped","clearImmediateWrapped","polyfillSetImmediate","getPromise","makePromise","idsToPromises","makePromiseCallback","ExceptionInfo","findMatchingCatch","getSocketFromFD","getSocketAddress","FS_createPreloadedFile","FS_modeStringToFlags","FS_getMode","FS_stdin_getChar","FS_createDataFile","FS_unlink","FS_mkdirTree","_setNetworkCallback","heapObjectForWebGLType","heapAccessShiftForWebGLHeap","webgl_enable_ANGLE_instanced_arrays","webgl_enable_OES_vertex_array_object","webgl_enable_WEBGL_draw_buffers","webgl_enable_WEBGL_multi_draw","emscriptenWebGLGet","computeUnpackAlignedImageSize","colorChannelsInGlTextureFormat","emscriptenWebGLGetTexPixelData","__glGenObject","emscriptenWebGLGetUniform","webglGetUniformLocation","webglPrepareUniformLocationsBeforeFirstUse","webglGetLeftBracePos","emscriptenWebGLGetVertexAttrib","__glGetActiveAttribOrUniform","writeGLArray","registerWebGlEventCallback","SDL_unicode","SDL_ttfContext","SDL_audio","ALLOC_NORMAL","ALLOC_STACK","allocate","writeStringToMemory","writeAsciiToMemory","_wasmWorkerPostFunction1","_wasmWorkerPostFunction2","_wasmWorkerPostFunction3","emscripten_audio_worklet_post_function_1","emscripten_audio_worklet_post_function_2","emscripten_audio_worklet_post_function_3"];missingLibrarySymbols.forEach(missingLibrarySymbol);var unexportedSymbols=["run","addOnPreRun","addOnInit","addOnPreMain","addOnExit","addOnPostRun","addRunDependency","removeRunDependency","FS_createFolder","FS_createPath","FS_createLazyFile","FS_createLink","FS_createDevice","FS_readFile","out","err","callMain","abort","wasmMemory","wasmExports","getTempRet0","setTempRet0","writeStackCookie","checkStackCookie","convertI32PairToI53Checked","ptrToString","exitJS","getHeapMax","growMemory","ENV","MONTH_DAYS_REGULAR","MONTH_DAYS_LEAP","MONTH_DAYS_REGULAR_CUMULATIVE","MONTH_DAYS_LEAP_CUMULATIVE","ERRNO_CODES","ERRNO_MESSAGES","DNS","Protocols","Sockets","timers","warnOnce","UNWIND_CACHE","readEmAsmArgsArray","readEmAsmArgs","runEmAsmFunction","handleException","keepRuntimeAlive","runtimeKeepalivePush","runtimeKeepalivePop","callUserCallback","maybeExit","wasmTable","noExitRuntime","getCFunc","sigToWasmTypes","freeTableIndexes","functionsInTableMap","setValue","getValue","PATH","PATH_FS","UTF8Decoder","UTF8ArrayToString","UTF8ToString","stringToUTF8Array","stringToUTF8","lengthBytesUTF8","UTF16Decoder","stringToUTF8OnStack","writeArrayToMemory","JSEvents","specialHTMLTargets","currentFullscreenStrategy","restoreOldWindowedStyle","ExitStatus","flush_NO_FILESYSTEM","safeSetTimeout","promiseMap","uncaughtExceptionCount","exceptionLast","exceptionCaught","Browser","setMainLoop","wget","SYSCALLS","preloadPlugins","FS_stdin_getChar_buffer","FS","MEMFS","TTY","PIPEFS","SOCKFS","tempFixedLengthArray","miniTempWebGLFloatBuffers","miniTempWebGLIntBuffers","GL","emscripten_webgl_power_preferences","AL","GLUT","EGL","GLEW","IDBStore","runAndAbortIfError","Asyncify","Fibers","SDL","SDL_gfx","allocateUTF8","allocateUTF8OnStack","_wasmWorkers","_wasmWorkersID","_wasmWorkerDelayedMessageQueue","_wasmWorkerAppendToQueue","_wasmWorkerRunPostMessage","_wasmWorkerInitializeRuntime","EmAudio","EmAudioCounter","emscriptenRegisterAudioObject","emscriptenDestroyAudioContext","emscriptenGetAudioObject","_EmAudioDispatchProcessorCallback"];unexportedSymbols.forEach(unexportedRuntimeSymbol);var calledRun;dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function stackCheckInit(){_emscripten_stack_init();writeStackCookie()}function run(){if(runDependencies>0){return}stackCheckInit();if(ENVIRONMENT_IS_WASM_WORKER){readyPromiseResolve(Module);return initRuntime()}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve(Module);if(Module["onRuntimeInitialized"])Module["onRuntimeInitialized"]();assert(!Module["_main"],'compiled without a main, but one is present. if you added it from JS, use Module["onRuntimeInitialized"]');postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}checkStackCookie()}function checkUnflushedContent(){var oldOut=out;var oldErr=err;var has=false;out=err=x=>{has=true};try{flush_NO_FILESYSTEM()}catch(e){}out=oldOut;err=oldErr;if(has){warnOnce("stdio streams had content in them that was not flushed. you should set EXIT_RUNTIME to 1 (see the Emscripten FAQ), or make sure to emit a newline when you printf etc.");warnOnce("(this may also be due to not including full filesystem support - try building with -sFORCE_FILESYSTEM)")}}if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run();
+
+
+  return moduleArg.ready
+}
+);
+})();
+globalThis.AudioWorkletModule = amyModule;
+if (typeof exports === 'object' && typeof module === 'object')
+  module.exports = amyModule;
+else if (typeof define === 'function' && define['amd'])
+  define([], () => amyModule);
diff --git a/www/run/amy-audioin.wasm b/www/run/amy-audioin.wasm
new file mode 100755
index 000000000..ddd295b60
Binary files /dev/null and b/www/run/amy-audioin.wasm differ
diff --git a/www/run/amy.aw.js b/www/run/amy.aw.js
index 2ff898627..818a1fbc4 100644
--- a/www/run/amy.aw.js
+++ b/www/run/amy.aw.js
@@ -1 +1 @@
-function createWasmAudioWorkletProcessor(audioParams){class WasmAudioWorkletProcessor extends AudioWorkletProcessor{constructor(args){super();globalThis.stackAlloc=Module["stackAlloc"];globalThis.stackSave=Module["stackSave"];globalThis.stackRestore=Module["stackRestore"];globalThis.HEAPU32=Module["HEAPU32"];globalThis.HEAPF32=Module["HEAPF32"];let opts=args.processorOptions;this.callbackFunction=Module["wasmTable"].get(opts["cb"]);this.userData=opts["ud"]}static get parameterDescriptors(){return audioParams}process(inputList,outputList,parameters){let numInputs=inputList.length,numOutputs=outputList.length,numParams=0,i,j,k,dataPtr,stackMemoryNeeded=(numInputs+numOutputs)*8,oldStackPtr=stackSave(),inputsPtr,outputsPtr,outputDataPtr,paramsPtr,didProduceAudio,paramArray;for(i of inputList)stackMemoryNeeded+=i.length*512;for(i of outputList)stackMemoryNeeded+=i.length*512;for(i in parameters)stackMemoryNeeded+=parameters[i].byteLength+8,++numParams;inputsPtr=stackAlloc(stackMemoryNeeded);k=inputsPtr>>2;dataPtr=inputsPtr+numInputs*8;for(i of inputList){HEAPU32[k++]=i.length;HEAPU32[k++]=dataPtr;for(j of i){HEAPF32.set(j,dataPtr>>2);dataPtr+=512}}outputsPtr=dataPtr;k=outputsPtr>>2;outputDataPtr=(dataPtr+=numOutputs*8)>>2;for(i of outputList){HEAPU32[k++]=i.length;HEAPU32[k++]=dataPtr;dataPtr+=512*i.length}paramsPtr=dataPtr;k=paramsPtr>>2;dataPtr+=numParams*8;for(i=0;paramArray=parameters[i++];){HEAPU32[k++]=paramArray.length;HEAPU32[k++]=dataPtr;HEAPF32.set(paramArray,dataPtr>>2);dataPtr+=paramArray.length*4}if(didProduceAudio=this.callbackFunction(numInputs,inputsPtr,numOutputs,outputsPtr,numParams,paramsPtr,this.userData)){for(i of outputList){for(j of i){for(k=0;k<128;++k){j[k]=HEAPF32[outputDataPtr++]}}}}stackRestore(oldStackPtr);return!!didProduceAudio}}return WasmAudioWorkletProcessor}class BootstrapMessages extends AudioWorkletProcessor{constructor(arg){super();globalThis.Module=arg["processorOptions"];globalThis.Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasm"],info);receiveInstance(instance,Module["wasm"]);return instance.exports};let p=globalThis["messagePort"]=this.port;p.onmessage=async msg=>{let d=msg.data;if(d["_wpn"]){if(globalThis.AudioWorkletModule){globalThis.Module=await AudioWorkletModule(Module);delete globalThis.AudioWorkletModule}registerProcessor(d["_wpn"],createWasmAudioWorkletProcessor(d["audioParams"]));p.postMessage({_wsc:d["callback"],x:[d["contextHandle"],1,d["userData"]]})}else if(d["_wsc"]){Module["wasmTable"].get(d["_wsc"])(...d["x"])}}}process(){}}registerProcessor("message",BootstrapMessages);
+function createWasmAudioWorkletProcessor(audioParams){class WasmAudioWorkletProcessor extends AudioWorkletProcessor{constructor(args){super();globalThis.stackAlloc=Module["stackAlloc"];globalThis.stackSave=Module["stackSave"];globalThis.stackRestore=Module["stackRestore"];globalThis.HEAPU32=Module["HEAPU32"];globalThis.HEAPF32=Module["HEAPF32"];let opts=args.processorOptions;this.callbackFunction=Module["wasmTable"].get(opts["cb"]);this.userData=opts["ud"]}static get parameterDescriptors(){return audioParams}process(inputList,outputList,parameters){let numInputs=inputList.length,numOutputs=outputList.length,numParams=0,i,j,k,dataPtr,stackMemoryNeeded=(numInputs+numOutputs)*8,oldStackPtr=stackSave(),inputsPtr,outputsPtr,outputDataPtr,paramsPtr,didProduceAudio,paramArray;for(i of inputList)stackMemoryNeeded+=i.length*512;for(i of outputList)stackMemoryNeeded+=i.length*512;for(i in parameters)stackMemoryNeeded+=parameters[i].byteLength+8,++numParams;inputsPtr=stackAlloc(stackMemoryNeeded);k=inputsPtr>>2;dataPtr=inputsPtr+numInputs*8;for(i of inputList){HEAPU32[k++]=i.length;HEAPU32[k++]=dataPtr;for(j of i){HEAPF32.set(j,dataPtr>>2);dataPtr+=512}}outputsPtr=dataPtr;k=outputsPtr>>2;outputDataPtr=(dataPtr+=numOutputs*8)>>2;for(i of outputList){HEAPU32[k++]=i.length;HEAPU32[k++]=dataPtr;dataPtr+=512*i.length}paramsPtr=dataPtr;k=paramsPtr>>2;dataPtr+=numParams*8;for(i=0;paramArray=parameters[i++];){HEAPU32[k++]=paramArray.length;HEAPU32[k++]=dataPtr;HEAPF32.set(paramArray,dataPtr>>2);dataPtr+=paramArray.length*4}if(didProduceAudio=this.callbackFunction(numInputs,inputsPtr,numOutputs,outputsPtr,numParams,paramsPtr,this.userData)){for(i of outputList){for(j of i){for(k=0;k<128;++k){j[k]=HEAPF32[outputDataPtr++]}}}}stackRestore(oldStackPtr);return!!didProduceAudio}}return WasmAudioWorkletProcessor}class BootstrapMessages extends AudioWorkletProcessor{constructor(arg){super();globalThis.Module=arg["processorOptions"];globalThis.Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasm"],info);receiveInstance(instance,Module["wasm"]);return instance.exports};let p=globalThis["messagePort"]=this.port;p.onmessage=msg=>{let d=msg.data;if(d["_wpn"]){if(globalThis.AudioWorkletModule){AudioWorkletModule(Module);delete globalThis.AudioWorkletModule}registerProcessor(d["_wpn"],createWasmAudioWorkletProcessor(d["audioParams"]));p.postMessage({"_wsc":d["callback"],"x":[d["contextHandle"],1,d["userData"]]})}else if(d["_wsc"]){Module["wasmTable"].get(d["_wsc"])(...d["x"])}}}process(){}}registerProcessor("message",BootstrapMessages);
diff --git a/www/run/amy.js b/www/run/amy.js
index 28029f9c6..152a571f8 100644
--- a/www/run/amy.js
+++ b/www/run/amy.js
@@ -1,15 +1,14 @@
 
 var amyModule = (() => {
-  var _scriptName = typeof document != 'undefined' ? document.currentScript?.src : undefined;
-  if (typeof __filename != 'undefined') _scriptName = _scriptName || __filename;
+  var _scriptDir = typeof document !== 'undefined' && document.currentScript ? document.currentScript.src : undefined;
+  if (typeof __filename !== 'undefined') _scriptDir = _scriptDir || __filename;
   return (
 function(moduleArg = {}) {
-  var moduleRtn;
 
-function GROWABLE_HEAP_I8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP8}function GROWABLE_HEAP_U8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU8}function GROWABLE_HEAP_I16(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP16}function GROWABLE_HEAP_I32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP32}function GROWABLE_HEAP_U32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU32}function GROWABLE_HEAP_F32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF32}function GROWABLE_HEAP_F64(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF64}var Module=moduleArg;var readyPromiseResolve,readyPromiseReject;var readyPromise=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject});["_amy_play_message","_amy_reset_sysclock","_amy_live_start","_amy_start","_sequencer_ticks","_malloc","_free","___indirect_function_table","_ma_device__on_notification_unlocked","_ma_malloc_emscripten","_ma_free_emscripten","_ma_device_process_pcm_frames_capture__webaudio","_ma_device_process_pcm_frames_playback__webaudio","onRuntimeInitialized"].forEach(prop=>{if(!Object.getOwnPropertyDescriptor(readyPromise,prop)){Object.defineProperty(readyPromise,prop,{get:()=>abort("You are getting "+prop+" on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js"),set:()=>abort("You are setting "+prop+" on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js")})}});var ENVIRONMENT_IS_AUDIO_WORKLET=typeof AudioWorkletGlobalScope!=="undefined";var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var ENVIRONMENT_IS_SHELL=!ENVIRONMENT_IS_WEB&&!ENVIRONMENT_IS_NODE&&!ENVIRONMENT_IS_WORKER&&!ENVIRONMENT_IS_AUDIO_WORKLET;if(Module["ENVIRONMENT"]){throw new Error("Module.ENVIRONMENT has been deprecated. To force the environment, use the ENVIRONMENT compile-time option (for example, -sENVIRONMENT=web or -sENVIRONMENT=node)")}if(ENVIRONMENT_IS_NODE){var worker_threads=require("worker_threads");global.Worker=worker_threads.Worker;ENVIRONMENT_IS_WORKER=!worker_threads.isMainThread}var ENVIRONMENT_IS_WASM_WORKER=Module["$ww"];var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){if(typeof process=="undefined"||!process.release||process.release.name!=="node")throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");var nodeVersion=process.versions.node;var numericVersion=nodeVersion.split(".").slice(0,3);numericVersion=numericVersion[0]*1e4+numericVersion[1]*100+numericVersion[2].split("-")[0]*1;if(numericVersion<16e4){throw new Error("This emscripten-generated code requires node v16.0.0 (detected v"+nodeVersion+")")}var fs=require("fs");var nodePath=require("path");scriptDirectory=__dirname+"/";readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):nodePath.normalize(filename);var ret=fs.readFileSync(filename);assert(ret.buffer);return ret};readAsync=(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):nodePath.normalize(filename);return new Promise((resolve,reject)=>{fs.readFile(filename,binary?undefined:"utf8",(err,data)=>{if(err)reject(err);else resolve(binary?data.buffer:data)})})};if(!Module["thisProgram"]&&process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else if(ENVIRONMENT_IS_SHELL){if(typeof process=="object"&&typeof require==="function"||typeof window=="object"||typeof importScripts=="function")throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)")}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(_scriptName){scriptDirectory=_scriptName}if(scriptDirectory.startsWith("blob:")){scriptDirectory=""}else{scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}if(!(typeof window=="object"||typeof importScripts=="function"))throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=url=>{if(isFileURI(url)){return new Promise((resolve,reject)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response);return}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}return fetch(url,{credentials:"same-origin"}).then(response=>{if(response.ok){return response.arrayBuffer()}return Promise.reject(new Error(response.status+" : "+response.url))})}}}else if(!ENVIRONMENT_IS_AUDIO_WORKLET){throw new Error("environment detection error")}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.error.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;checkIncomingModuleAPI();if(Module["arguments"])arguments_=Module["arguments"];legacyModuleProp("arguments","arguments_");if(Module["thisProgram"])thisProgram=Module["thisProgram"];legacyModuleProp("thisProgram","thisProgram");assert(typeof Module["memoryInitializerPrefixURL"]=="undefined","Module.memoryInitializerPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["pthreadMainPrefixURL"]=="undefined","Module.pthreadMainPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["cdInitializerPrefixURL"]=="undefined","Module.cdInitializerPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["filePackagePrefixURL"]=="undefined","Module.filePackagePrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["read"]=="undefined","Module.read option was removed");assert(typeof Module["readAsync"]=="undefined","Module.readAsync option was removed (modify readAsync in JS)");assert(typeof Module["readBinary"]=="undefined","Module.readBinary option was removed (modify readBinary in JS)");assert(typeof Module["setWindowTitle"]=="undefined","Module.setWindowTitle option was removed (modify emscripten_set_window_title in JS)");assert(typeof Module["TOTAL_MEMORY"]=="undefined","Module.TOTAL_MEMORY has been renamed Module.INITIAL_MEMORY");legacyModuleProp("asm","wasmExports");legacyModuleProp("readAsync","readAsync");legacyModuleProp("readBinary","readBinary");legacyModuleProp("setWindowTitle","setWindowTitle");assert(!ENVIRONMENT_IS_SHELL,"shell environment detected but not enabled at build time.  Add `shell` to `-sENVIRONMENT` to enable.");var wasmBinary=Module["wasmBinary"];legacyModuleProp("wasmBinary","wasmBinary");if(typeof WebAssembly!="object"){err("no native wasm support detected")}var wasmMemory;var wasmModule;var ABORT=false;var EXITSTATUS;function assert(condition,text){if(!condition){abort("Assertion failed"+(text?": "+text:""))}}var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);Module["HEAP16"]=HEAP16=new Int16Array(b);Module["HEAPU8"]=HEAPU8=new Uint8Array(b);Module["HEAPU16"]=HEAPU16=new Uint16Array(b);Module["HEAP32"]=HEAP32=new Int32Array(b);Module["HEAPU32"]=HEAPU32=new Uint32Array(b);Module["HEAPF32"]=HEAPF32=new Float32Array(b);Module["HEAPF64"]=HEAPF64=new Float64Array(b)}assert(!Module["STACK_SIZE"],"STACK_SIZE can no longer be set at runtime.  Use -sSTACK_SIZE at link time");assert(typeof Int32Array!="undefined"&&typeof Float64Array!=="undefined"&&Int32Array.prototype.subarray!=undefined&&Int32Array.prototype.set!=undefined,"JS engine does not provide full typed array support");if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||134217728;legacyModuleProp("INITIAL_MEMORY","INITIAL_MEMORY");assert(INITIAL_MEMORY>=67108864,"INITIAL_MEMORY should be larger than STACK_SIZE, was "+INITIAL_MEMORY+"! (STACK_SIZE="+67108864+")");wasmMemory=new WebAssembly.Memory({initial:INITIAL_MEMORY/65536,maximum:32768,shared:true});if(!(wasmMemory.buffer instanceof SharedArrayBuffer)){err("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag");if(ENVIRONMENT_IS_NODE){err("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)")}throw Error("bad memory")}}updateMemoryViews();function writeStackCookie(){var max=_emscripten_stack_get_end();assert((max&3)==0);if(max==0){max+=4}GROWABLE_HEAP_U32()[max>>2]=34821223;GROWABLE_HEAP_U32()[max+4>>2]=2310721022;GROWABLE_HEAP_U32()[0>>2]=1668509029}function checkStackCookie(){if(ABORT)return;var max=_emscripten_stack_get_end();if(max==0){max+=4}var cookie1=GROWABLE_HEAP_U32()[max>>2];var cookie2=GROWABLE_HEAP_U32()[max+4>>2];if(cookie1!=34821223||cookie2!=2310721022){abort(`Stack overflow! Stack cookie has been overwritten at ${ptrToString(max)}, expected hex dwords 0x89BACDFE and 0x2135467, but received ${ptrToString(cookie2)} ${ptrToString(cookie1)}`)}if(GROWABLE_HEAP_U32()[0>>2]!=1668509029){abort("Runtime error: The application has corrupted its heap memory area (address zero)!")}}var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){assert(!runtimeInitialized);runtimeInitialized=true;if(ENVIRONMENT_IS_WASM_WORKER)return _wasmWorkerInitializeRuntime();checkStackCookie();callRuntimeCallbacks(__ATINIT__)}function postRun(){checkStackCookie();if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}assert(Math.imul,"This browser does not support Math.imul(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.fround,"This browser does not support Math.fround(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.clz32,"This browser does not support Math.clz32(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.trunc,"This browser does not support Math.trunc(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;var runDependencyTracking={};function addRunDependency(id){runDependencies++;Module["monitorRunDependencies"]?.(runDependencies);if(id){assert(!runDependencyTracking[id]);runDependencyTracking[id]=1;if(runDependencyWatcher===null&&typeof setInterval!="undefined"){runDependencyWatcher=setInterval(()=>{if(ABORT){clearInterval(runDependencyWatcher);runDependencyWatcher=null;return}var shown=false;for(var dep in runDependencyTracking){if(!shown){shown=true;err("still waiting on run dependencies:")}err(`dependency: ${dep}`)}if(shown){err("(end of list)")}},1e4)}}else{err("warning: run dependency added without ID")}}function removeRunDependency(id){runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(id){assert(runDependencyTracking[id]);delete runDependencyTracking[id]}else{err("warning: run dependency removed without ID")}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;if(what.indexOf("RuntimeError: unreachable")>=0){what+='. "unreachable" may be due to ASYNCIFY_STACK_SIZE not being large enough (try increasing it)'}var e=new WebAssembly.RuntimeError(what);readyPromiseReject(e);throw e}var FS={error(){abort("Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with -sFORCE_FILESYSTEM")},init(){FS.error()},createDataFile(){FS.error()},createPreloadedFile(){FS.error()},createLazyFile(){FS.error()},open(){FS.error()},mkdev(){FS.error()},registerDevice(){FS.error()},analyzePath(){FS.error()},ErrnoError(){FS.error()}};Module["FS_createDataFile"]=FS.createDataFile;Module["FS_createPreloadedFile"]=FS.createPreloadedFile;var dataURIPrefix="data:application/octet-stream;base64,";var isDataURI=filename=>filename.startsWith(dataURIPrefix);var isFileURI=filename=>filename.startsWith("file://");function createExportWrapper(name,nargs){return(...args)=>{assert(runtimeInitialized,`native function \`${name}\` called before runtime initialization`);var f=wasmExports[name];assert(f,`exported native function \`${name}\` not found`);assert(args.length<=nargs,`native function \`${name}\` called with ${args.length} args but expects ${nargs}`);return f(...args)}}function findWasmBinary(){var f="amy.wasm";if(!isDataURI(f)){return locateFile(f)}return f}var wasmBinaryFile;function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}function getBinaryPromise(binaryFile){if(!wasmBinary){return readAsync(binaryFile).then(response=>new Uint8Array(response),()=>getBinarySync(binaryFile))}return Promise.resolve().then(()=>getBinarySync(binaryFile))}function instantiateArrayBuffer(binaryFile,imports,receiver){return getBinaryPromise(binaryFile).then(binary=>WebAssembly.instantiate(binary,imports)).then(receiver,reason=>{err(`failed to asynchronously prepare wasm: ${reason}`);if(isFileURI(wasmBinaryFile)){err(`warning: Loading from a file URI (${wasmBinaryFile}) is not supported in most browsers. See https://emscripten.org/docs/getting_started/FAQ.html#how-do-i-run-a-local-webserver-for-testing-why-does-my-program-stall-in-downloading-or-preparing`)}abort(reason)})}function instantiateAsync(binary,binaryFile,imports,callback){if(!binary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(binaryFile)&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE&&typeof fetch=="function"){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{var result=WebAssembly.instantiateStreaming(response,imports);return result.then(callback,function(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(binaryFile,imports,callback)})})}return instantiateArrayBuffer(binaryFile,imports,callback)}function getWasmImports(){Asyncify.instrumentWasmImports(wasmImports);return{env:wasmImports,wasi_snapshot_preview1:wasmImports}}function createWasm(){var info=getWasmImports();function receiveInstance(instance,module){wasmExports=instance.exports;wasmExports=Asyncify.instrumentWasmExports(wasmExports);wasmTable=wasmExports["__indirect_function_table"];Module["wasmTable"]=wasmTable;assert(wasmTable,"table not found in wasm exports");addOnInit(wasmExports["__wasm_call_ctors"]);wasmModule=module;removeRunDependency("wasm-instantiate");return wasmExports}addRunDependency("wasm-instantiate");var trueModule=Module;function receiveInstantiationResult(result){assert(Module===trueModule,"the Module object should not be replaced during async compilation - perhaps the order of HTML elements is wrong?");trueModule=null;receiveInstance(result["instance"],result["module"])}if(Module["instantiateWasm"]){try{return Module["instantiateWasm"](info,receiveInstance)}catch(e){err(`Module.instantiateWasm callback failed with error: ${e}`);readyPromiseReject(e)}}if(!wasmBinaryFile)wasmBinaryFile=findWasmBinary();instantiateAsync(wasmBinary,wasmBinaryFile,info,receiveInstantiationResult).catch(readyPromiseReject);return{}}(()=>{var h16=new Int16Array(1);var h8=new Int8Array(h16.buffer);h16[0]=25459;if(h8[0]!==115||h8[1]!==99)throw"Runtime error: expected the system to be little-endian! (Run with -sSUPPORT_BIG_ENDIAN to bypass)"})();function legacyModuleProp(prop,newName,incoming=true){if(!Object.getOwnPropertyDescriptor(Module,prop)){Object.defineProperty(Module,prop,{configurable:true,get(){let extra=incoming?" (the initial value can be provided on Module, but after startup the value is only looked for on a local variable of that name)":"";abort(`\`Module.${prop}\` has been replaced by \`${newName}\``+extra)}})}}function ignoredModuleProp(prop){if(Object.getOwnPropertyDescriptor(Module,prop)){abort(`\`Module.${prop}\` was supplied but \`${prop}\` not included in INCOMING_MODULE_JS_API`)}}function isExportedByForceFilesystem(name){return name==="FS_createPath"||name==="FS_createDataFile"||name==="FS_createPreloadedFile"||name==="FS_unlink"||name==="addRunDependency"||name==="FS_createLazyFile"||name==="FS_createDevice"||name==="removeRunDependency"}function missingGlobal(sym,msg){if(typeof globalThis!="undefined"){Object.defineProperty(globalThis,sym,{configurable:true,get(){warnOnce(`\`${sym}\` is not longer defined by emscripten. ${msg}`);return undefined}})}}missingGlobal("buffer","Please use HEAP8.buffer or wasmMemory.buffer");missingGlobal("asm","Please use wasmExports instead");function missingLibrarySymbol(sym){if(typeof globalThis!="undefined"&&!Object.getOwnPropertyDescriptor(globalThis,sym)){Object.defineProperty(globalThis,sym,{configurable:true,get(){var msg=`\`${sym}\` is a library symbol and not included by default; add it to your library.js __deps or to DEFAULT_LIBRARY_FUNCS_TO_INCLUDE on the command line`;var librarySymbol=sym;if(!librarySymbol.startsWith("_")){librarySymbol="$"+sym}msg+=` (e.g. -sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE='${librarySymbol}')`;if(isExportedByForceFilesystem(sym)){msg+=". Alternatively, forcing filesystem support (-sFORCE_FILESYSTEM) can export this for you"}warnOnce(msg);return undefined}})}unexportedRuntimeSymbol(sym)}function unexportedRuntimeSymbol(sym){if(!Object.getOwnPropertyDescriptor(Module,sym)){Object.defineProperty(Module,sym,{configurable:true,get(){var msg=`'${sym}' was not exported. add it to EXPORTED_RUNTIME_METHODS (see the Emscripten FAQ)`;if(isExportedByForceFilesystem(sym)){msg+=". Alternatively, forcing filesystem support (-sFORCE_FILESYSTEM) can export this for you"}abort(msg)}})}}var ASM_CONSTS={1109840:$0=>{amy_sequencer_js_hook($0)},1109871:($0,$1,$2,$3,$4)=>{if(typeof window==="undefined"||(window.AudioContext||window.webkitAudioContext)===undefined){return 0}if(typeof window.miniaudio==="undefined"){window.miniaudio={referenceCount:0};window.miniaudio.device_type={};window.miniaudio.device_type.playback=$0;window.miniaudio.device_type.capture=$1;window.miniaudio.device_type.duplex=$2;window.miniaudio.device_state={};window.miniaudio.device_state.stopped=$3;window.miniaudio.device_state.started=$4;miniaudio.devices=[];miniaudio.track_device=function(device){for(var iDevice=0;iDevice<miniaudio.devices.length;++iDevice){if(miniaudio.devices[iDevice]==null){miniaudio.devices[iDevice]=device;return iDevice}}miniaudio.devices.push(device);return miniaudio.devices.length-1};miniaudio.untrack_device_by_index=function(deviceIndex){miniaudio.devices[deviceIndex]=null;while(miniaudio.devices.length>0){if(miniaudio.devices[miniaudio.devices.length-1]==null){miniaudio.devices.pop()}else{break}}};miniaudio.untrack_device=function(device){for(var iDevice=0;iDevice<miniaudio.devices.length;++iDevice){if(miniaudio.devices[iDevice]==device){return miniaudio.untrack_device_by_index(iDevice)}}};miniaudio.get_device_by_index=function(deviceIndex){return miniaudio.devices[deviceIndex]};miniaudio.unlock_event_types=function(){return["touchend","click"]}();miniaudio.unlock=function(){for(var i=0;i<miniaudio.devices.length;++i){var device=miniaudio.devices[i];if(device!=null&&device.webaudio!=null&&device.state===window.miniaudio.device_state.started){device.webaudio.resume().then(()=>{Module._ma_device__on_notification_unlocked(device.pDevice)},error=>{console.error("Failed to resume audiocontext",error)})}}miniaudio.unlock_event_types.map(function(event_type){document.removeEventListener(event_type,miniaudio.unlock,true)})};miniaudio.unlock_event_types.map(function(event_type){document.addEventListener(event_type,miniaudio.unlock,true)})}window.miniaudio.referenceCount+=1;return 1},1112029:()=>{if(typeof window.miniaudio!=="undefined"){window.miniaudio.referenceCount-=1;if(window.miniaudio.referenceCount===0){delete window.miniaudio}}},1112193:()=>navigator.mediaDevices!==undefined&&navigator.mediaDevices.getUserMedia!==undefined,1112297:()=>{try{var temp=new(window.AudioContext||window.webkitAudioContext);var sampleRate=temp.sampleRate;temp.close();return sampleRate}catch(e){return 0}},1112468:$0=>miniaudio.track_device({webaudio:emscriptenGetAudioObject($0),state:1}),1112557:($0,$1)=>{var getUserMediaResult=0;var audioWorklet=emscriptenGetAudioObject($0);var audioContext=emscriptenGetAudioObject($1);navigator.mediaDevices.getUserMedia({audio:true,video:false}).then(function(stream){audioContext.streamNode=audioContext.createMediaStreamSource(stream);audioContext.streamNode.connect(audioWorklet);audioWorklet.connect(audioContext.destination);getUserMediaResult=0}).catch(function(error){console.log("navigator.mediaDevices.getUserMedia Failed: "+error);getUserMediaResult=-1});return getUserMediaResult},1113119:($0,$1)=>{var audioWorklet=emscriptenGetAudioObject($0);var audioContext=emscriptenGetAudioObject($1);audioWorklet.connect(audioContext.destination);return 0},1113279:$0=>emscriptenGetAudioObject($0).sampleRate,1113331:$0=>{var device=miniaudio.get_device_by_index($0);if(device.streamNode!==undefined){device.streamNode.disconnect();device.streamNode=undefined}},1113487:$0=>{miniaudio.untrack_device_by_index($0)},1113530:$0=>{var device=miniaudio.get_device_by_index($0);device.webaudio.resume();device.state=miniaudio.device_state.started},1113655:$0=>{var device=miniaudio.get_device_by_index($0);device.webaudio.suspend();device.state=miniaudio.device_state.stopped}};function ExitStatus(status){this.name="ExitStatus";this.message=`Program terminated with exit(${status})`;this.status=status}var _wasmWorkerDelayedMessageQueue=[];var wasmTableMirror=[];var wasmTable;var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}assert(wasmTable.get(funcPtr)==func,"JavaScript-side Wasm function table mirror is out of date!");return func};var _wasmWorkerRunPostMessage=e=>{let data=ENVIRONMENT_IS_NODE?e:e.data;let wasmCall=data["_wsc"];wasmCall&&getWasmTableEntry(wasmCall)(...data["x"])};var _wasmWorkerAppendToQueue=e=>{_wasmWorkerDelayedMessageQueue.push(e)};var _wasmWorkerInitializeRuntime=()=>{let m=Module;assert(m["sb"]%16==0);assert(m["sz"]%16==0);__emscripten_wasm_worker_initialize(m["sb"],m["sz"]);if(typeof AudioWorkletGlobalScope==="undefined"){removeEventListener("message",_wasmWorkerAppendToQueue);_wasmWorkerDelayedMessageQueue=_wasmWorkerDelayedMessageQueue.forEach(_wasmWorkerRunPostMessage);addEventListener("message",_wasmWorkerRunPostMessage)}};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var noExitRuntime=Module["noExitRuntime"]||true;var ptrToString=ptr=>{assert(typeof ptr==="number");ptr>>>=0;return"0x"+ptr.toString(16).padStart(8,"0")};var stackRestore=val=>__emscripten_stack_restore(val);var stackSave=()=>_emscripten_stack_get_current();var warnOnce=text=>{warnOnce.shown||={};if(!warnOnce.shown[text]){warnOnce.shown[text]=1;if(ENVIRONMENT_IS_NODE)text="warning: "+text;err(text)}};var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder:undefined;var UTF8ArrayToString=(heapOrArray,idx,maxBytesToRead)=>{var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof SharedArrayBuffer?heapOrArray.slice(idx,endPtr):heapOrArray.subarray(idx,endPtr))}var str="";while(idx<endPtr){var u0=heapOrArray[idx++];if(!(u0&128)){str+=String.fromCharCode(u0);continue}var u1=heapOrArray[idx++]&63;if((u0&224)==192){str+=String.fromCharCode((u0&31)<<6|u1);continue}var u2=heapOrArray[idx++]&63;if((u0&240)==224){u0=(u0&15)<<12|u1<<6|u2}else{if((u0&248)!=240)warnOnce("Invalid UTF-8 leading byte "+ptrToString(u0)+" encountered when deserializing a UTF-8 string in wasm memory to a JS string!");u0=(u0&7)<<18|u1<<12|u2<<6|heapOrArray[idx++]&63}if(u0<65536){str+=String.fromCharCode(u0)}else{var ch=u0-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead)=>{assert(typeof ptr=="number",`UTF8ToString expects a number (got ${typeof ptr})`);return ptr?UTF8ArrayToString(GROWABLE_HEAP_U8(),ptr,maxBytesToRead):""};var ___assert_fail=(condition,filename,line,func)=>{abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])};var __abort_js=()=>{abort("native code called abort()")};var nowIsMonotonic=1;var __emscripten_get_now_is_monotonic=()=>nowIsMonotonic;var readEmAsmArgsArray=[];var readEmAsmArgs=(sigPtr,buf)=>{assert(Array.isArray(readEmAsmArgsArray));assert(buf%16==0);readEmAsmArgsArray.length=0;var ch;while(ch=GROWABLE_HEAP_U8()[sigPtr++]){var chr=String.fromCharCode(ch);var validChars=["d","f","i","p"];assert(validChars.includes(chr),`Invalid character ${ch}("${chr}") in readEmAsmArgs! Use only [${validChars}], and do not specify "v" for void return argument.`);var wide=ch!=105;wide&=ch!=112;buf+=wide&&buf%8?4:0;readEmAsmArgsArray.push(ch==112?GROWABLE_HEAP_U32()[buf>>2]:ch==105?GROWABLE_HEAP_I32()[buf>>2]:GROWABLE_HEAP_F64()[buf>>3]);buf+=wide?8:4}return readEmAsmArgsArray};var runEmAsmFunction=(code,sigPtr,argbuf)=>{var args=readEmAsmArgs(sigPtr,argbuf);assert(ASM_CONSTS.hasOwnProperty(code),`No EM_ASM constant found at address ${code}.  The loaded WebAssembly file is likely out of sync with the generated JavaScript.`);return ASM_CONSTS[code](...args)};var _emscripten_asm_const_int=(code,sigPtr,argbuf)=>runEmAsmFunction(code,sigPtr,argbuf);var EmAudio={};var EmAudioCounter=0;var emscriptenRegisterAudioObject=object=>{assert(object,"Called emscriptenRegisterAudioObject() with a null object handle!");EmAudio[++EmAudioCounter]=object;return EmAudioCounter};var emscriptenGetAudioObject=objectHandle=>EmAudio[objectHandle];var _emscripten_create_audio_context=options=>{let ctx=window.AudioContext||window.webkitAudioContext;if(!ctx)console.error("emscripten_create_audio_context failed! Web Audio is not supported.");options>>=2;let opts=options?{latencyHint:GROWABLE_HEAP_U32()[options]?UTF8ToString(GROWABLE_HEAP_U32()[options]):void 0,sampleRate:GROWABLE_HEAP_I32()[options+1]||void 0}:void 0;return ctx&&emscriptenRegisterAudioObject(new ctx(opts))};var _emscripten_create_wasm_audio_worklet_node=(contextHandle,name,options,callback,userData)=>{assert(contextHandle,`Called emscripten_create_wasm_audio_worklet_node() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_create_wasm_audio_worklet_node() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_create_wasm_audio_worklet_node() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);options>>=2;function readChannelCountArray(heapIndex,numOutputs){let channelCounts=[];while(numOutputs--)channelCounts.push(GROWABLE_HEAP_U32()[heapIndex++]);return channelCounts}let opts=options?{numberOfInputs:GROWABLE_HEAP_I32()[options],numberOfOutputs:GROWABLE_HEAP_I32()[options+1],outputChannelCount:GROWABLE_HEAP_U32()[options+2]?readChannelCountArray(GROWABLE_HEAP_U32()[options+2]>>2,GROWABLE_HEAP_I32()[options+1]):void 0,processorOptions:{cb:callback,ud:userData}}:void 0;return emscriptenRegisterAudioObject(new AudioWorkletNode(EmAudio[contextHandle],UTF8ToString(name),opts))};var _emscripten_create_wasm_audio_worklet_processor_async=(contextHandle,options,callback,userData)=>{assert(contextHandle,`Called emscripten_create_wasm_audio_worklet_processor_async() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_create_wasm_audio_worklet_processor_async() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_create_wasm_audio_worklet_processor_async() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);options>>=2;let audioParams=[],numAudioParams=GROWABLE_HEAP_U32()[options+1],audioParamDescriptors=GROWABLE_HEAP_U32()[options+2]>>2,i=0;while(numAudioParams--){audioParams.push({name:i++,defaultValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],minValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],maxValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],automationRate:["a","k"][GROWABLE_HEAP_U32()[audioParamDescriptors++]]+"-rate"})}EmAudio[contextHandle].audioWorklet.bootstrapMessage.port.postMessage({_wpn:UTF8ToString(GROWABLE_HEAP_U32()[options]),audioParams,contextHandle,callback,userData})};var _emscripten_date_now=()=>Date.now();var _emscripten_destroy_audio_context=contextHandle=>{assert(EmAudio[contextHandle],`Called emscripten_destroy_audio_context() on an already freed context handle ${contextHandle}`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_destroy_audio_context() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);EmAudio[contextHandle].suspend();delete EmAudio[contextHandle]};var _emscripten_destroy_web_audio_node=objectHandle=>{assert(EmAudio[objectHandle],`Called emscripten_destroy_web_audio_node() on a nonexisting/already freed object handle ${objectHandle}`);assert(EmAudio[objectHandle].disconnect,`Called emscripten_destroy_web_audio_node() on a handle ${objectHandle} that is not an Web Audio Node, but of type ${typeof EmAudio[objectHandle]}`);EmAudio[objectHandle].disconnect();delete EmAudio[objectHandle]};var _emscripten_get_now;if(typeof performance!="undefined"&&performance.now){_emscripten_get_now=()=>performance.now()}else{_emscripten_get_now=Date.now}var getHeapMax=()=>2147483648;var alignMemory=(size,alignment)=>{assert(alignment,"alignment argument is required");return Math.ceil(size/alignment)*alignment};var growMemory=size=>{var b=wasmMemory.buffer;var pages=(size-b.byteLength+65535)/65536;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){err(`growMemory: Attempted to grow heap from ${b.byteLength} bytes to ${size} bytes, but got error: ${e}`)}};var _emscripten_resize_heap=requestedSize=>{var oldSize=GROWABLE_HEAP_U8().length;requestedSize>>>=0;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){err(`Cannot enlarge memory, requested ${requestedSize} bytes, but the limit is ${maxHeapSize} bytes!`);return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}err(`Failed to grow the heap from ${oldSize} bytes to ${newSize} bytes, not enough memory!`);return false};var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}checkStackCookie();if(e instanceof WebAssembly.RuntimeError){if(_emscripten_stack_get_current()<=0){err("Stack overflow detected.  You can try increasing -sSTACK_SIZE (currently set to 67108864)")}}quit_(1,e)};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var _proc_exit=code=>{EXITSTATUS=code;if(!keepRuntimeAlive()){Module["onExit"]?.(code);ABORT=true}quit_(code,new ExitStatus(code))};var exitJS=(status,implicit)=>{EXITSTATUS=status;checkUnflushedContent();if(keepRuntimeAlive()&&!implicit){var msg=`program exited (with status: ${status}), but keepRuntimeAlive() is set (counter=${runtimeKeepaliveCounter}) due to an async operation, so halting execution but not exiting the runtime or preventing further async execution (you can use emscripten_force_exit, if you want to force a true shutdown)`;readyPromiseReject(msg);err(msg)}_proc_exit(status)};var _exit=exitJS;var maybeExit=()=>{if(!keepRuntimeAlive()){try{_exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){err("user callback triggered after runtime exited or application aborted.  Ignoring.");return}try{func();maybeExit()}catch(e){handleException(e)}};var safeSetTimeout=(func,timeout)=>setTimeout(()=>{callUserCallback(func)},timeout);var preloadPlugins=Module["preloadPlugins"]||[];var Browser={mainLoop:{running:false,scheduler:null,method:"",currentlyRunningMainloop:0,func:null,arg:0,timingMode:0,timingValue:0,currentFrameNumber:0,queue:[],pause(){Browser.mainLoop.scheduler=null;Browser.mainLoop.currentlyRunningMainloop++},resume(){Browser.mainLoop.currentlyRunningMainloop++;var timingMode=Browser.mainLoop.timingMode;var timingValue=Browser.mainLoop.timingValue;var func=Browser.mainLoop.func;Browser.mainLoop.func=null;setMainLoop(func,0,false,Browser.mainLoop.arg,true);_emscripten_set_main_loop_timing(timingMode,timingValue);Browser.mainLoop.scheduler()},updateStatus(){if(Module["setStatus"]){var message=Module["statusMessage"]||"Please wait...";var remaining=Browser.mainLoop.remainingBlockers;var expected=Browser.mainLoop.expectedBlockers;if(remaining){if(remaining<expected){Module["setStatus"](`{message} ({expected - remaining}/{expected})`)}else{Module["setStatus"](message)}}else{Module["setStatus"]("")}}},runIter(func){if(ABORT)return;if(Module["preMainLoop"]){var preRet=Module["preMainLoop"]();if(preRet===false){return}}callUserCallback(func);Module["postMainLoop"]?.()}},useWebGL:false,isFullscreen:false,pointerLock:false,moduleContextCreatedCallbacks:[],workers:[],init(){if(Browser.initted)return;Browser.initted=true;var imagePlugin={};imagePlugin["canHandle"]=function imagePlugin_canHandle(name){return!Module["noImageDecoding"]&&/\.(jpg|jpeg|png|bmp|webp)$/i.test(name)};imagePlugin["handle"]=function imagePlugin_handle(byteArray,name,onload,onerror){var b=new Blob([byteArray],{type:Browser.getMimetype(name)});if(b.size!==byteArray.length){b=new Blob([new Uint8Array(byteArray).buffer],{type:Browser.getMimetype(name)})}var url=URL.createObjectURL(b);assert(typeof url=="string","createObjectURL must return a url as a string");var img=new Image;img.onload=()=>{assert(img.complete,`Image ${name} could not be decoded`);var canvas=document.createElement("canvas");canvas.width=img.width;canvas.height=img.height;var ctx=canvas.getContext("2d");ctx.drawImage(img,0,0);preloadedImages[name]=canvas;URL.revokeObjectURL(url);onload?.(byteArray)};img.onerror=event=>{err(`Image ${url} could not be decoded`);onerror?.()};img.src=url};preloadPlugins.push(imagePlugin);var audioPlugin={};audioPlugin["canHandle"]=function audioPlugin_canHandle(name){return!Module["noAudioDecoding"]&&name.substr(-4)in{".ogg":1,".wav":1,".mp3":1}};audioPlugin["handle"]=function audioPlugin_handle(byteArray,name,onload,onerror){var done=false;function finish(audio){if(done)return;done=true;preloadedAudios[name]=audio;onload?.(byteArray)}var b=new Blob([byteArray],{type:Browser.getMimetype(name)});var url=URL.createObjectURL(b);assert(typeof url=="string","createObjectURL must return a url as a string");var audio=new Audio;audio.addEventListener("canplaythrough",()=>finish(audio),false);audio.onerror=function audio_onerror(event){if(done)return;err(`warning: browser could not fully decode audio ${name}, trying slower base64 approach`);function encode64(data){var BASE="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var PAD="=";var ret="";var leftchar=0;var leftbits=0;for(var i=0;i<data.length;i++){leftchar=leftchar<<8|data[i];leftbits+=8;while(leftbits>=6){var curr=leftchar>>leftbits-6&63;leftbits-=6;ret+=BASE[curr]}}if(leftbits==2){ret+=BASE[(leftchar&3)<<4];ret+=PAD+PAD}else if(leftbits==4){ret+=BASE[(leftchar&15)<<2];ret+=PAD}return ret}audio.src="data:audio/x-"+name.substr(-3)+";base64,"+encode64(byteArray);finish(audio)};audio.src=url;safeSetTimeout(()=>{finish(audio)},1e4)};preloadPlugins.push(audioPlugin);function pointerLockChange(){Browser.pointerLock=document["pointerLockElement"]===Module["canvas"]||document["mozPointerLockElement"]===Module["canvas"]||document["webkitPointerLockElement"]===Module["canvas"]||document["msPointerLockElement"]===Module["canvas"]}var canvas=Module["canvas"];if(canvas){canvas.requestPointerLock=canvas["requestPointerLock"]||canvas["mozRequestPointerLock"]||canvas["webkitRequestPointerLock"]||canvas["msRequestPointerLock"]||(()=>{});canvas.exitPointerLock=document["exitPointerLock"]||document["mozExitPointerLock"]||document["webkitExitPointerLock"]||document["msExitPointerLock"]||(()=>{});canvas.exitPointerLock=canvas.exitPointerLock.bind(document);document.addEventListener("pointerlockchange",pointerLockChange,false);document.addEventListener("mozpointerlockchange",pointerLockChange,false);document.addEventListener("webkitpointerlockchange",pointerLockChange,false);document.addEventListener("mspointerlockchange",pointerLockChange,false);if(Module["elementPointerLock"]){canvas.addEventListener("click",ev=>{if(!Browser.pointerLock&&Module["canvas"].requestPointerLock){Module["canvas"].requestPointerLock();ev.preventDefault()}},false)}}},createContext(canvas,useWebGL,setInModule,webGLContextAttributes){if(useWebGL&&Module.ctx&&canvas==Module.canvas)return Module.ctx;var ctx;var contextHandle;if(useWebGL){var contextAttributes={antialias:false,alpha:false,majorVersion:1};if(webGLContextAttributes){for(var attribute in webGLContextAttributes){contextAttributes[attribute]=webGLContextAttributes[attribute]}}if(typeof GL!="undefined"){contextHandle=GL.createContext(canvas,contextAttributes);if(contextHandle){ctx=GL.getContext(contextHandle).GLctx}}}else{ctx=canvas.getContext("2d")}if(!ctx)return null;if(setInModule){if(!useWebGL)assert(typeof GLctx=="undefined","cannot set in module if GLctx is used, but we are a non-GL context that would replace it");Module.ctx=ctx;if(useWebGL)GL.makeContextCurrent(contextHandle);Browser.useWebGL=useWebGL;Browser.moduleContextCreatedCallbacks.forEach(callback=>callback());Browser.init()}return ctx},fullscreenHandlersInstalled:false,lockPointer:undefined,resizeCanvas:undefined,requestFullscreen(lockPointer,resizeCanvas){Browser.lockPointer=lockPointer;Browser.resizeCanvas=resizeCanvas;if(typeof Browser.lockPointer=="undefined")Browser.lockPointer=true;if(typeof Browser.resizeCanvas=="undefined")Browser.resizeCanvas=false;var canvas=Module["canvas"];function fullscreenChange(){Browser.isFullscreen=false;var canvasContainer=canvas.parentNode;if((document["fullscreenElement"]||document["mozFullScreenElement"]||document["msFullscreenElement"]||document["webkitFullscreenElement"]||document["webkitCurrentFullScreenElement"])===canvasContainer){canvas.exitFullscreen=Browser.exitFullscreen;if(Browser.lockPointer)canvas.requestPointerLock();Browser.isFullscreen=true;if(Browser.resizeCanvas){Browser.setFullscreenCanvasSize()}else{Browser.updateCanvasDimensions(canvas)}}else{canvasContainer.parentNode.insertBefore(canvas,canvasContainer);canvasContainer.parentNode.removeChild(canvasContainer);if(Browser.resizeCanvas){Browser.setWindowedCanvasSize()}else{Browser.updateCanvasDimensions(canvas)}}Module["onFullScreen"]?.(Browser.isFullscreen);Module["onFullscreen"]?.(Browser.isFullscreen)}if(!Browser.fullscreenHandlersInstalled){Browser.fullscreenHandlersInstalled=true;document.addEventListener("fullscreenchange",fullscreenChange,false);document.addEventListener("mozfullscreenchange",fullscreenChange,false);document.addEventListener("webkitfullscreenchange",fullscreenChange,false);document.addEventListener("MSFullscreenChange",fullscreenChange,false)}var canvasContainer=document.createElement("div");canvas.parentNode.insertBefore(canvasContainer,canvas);canvasContainer.appendChild(canvas);canvasContainer.requestFullscreen=canvasContainer["requestFullscreen"]||canvasContainer["mozRequestFullScreen"]||canvasContainer["msRequestFullscreen"]||(canvasContainer["webkitRequestFullscreen"]?()=>canvasContainer["webkitRequestFullscreen"](Element["ALLOW_KEYBOARD_INPUT"]):null)||(canvasContainer["webkitRequestFullScreen"]?()=>canvasContainer["webkitRequestFullScreen"](Element["ALLOW_KEYBOARD_INPUT"]):null);canvasContainer.requestFullscreen()},requestFullScreen(){abort("Module.requestFullScreen has been replaced by Module.requestFullscreen (without a capital S)")},exitFullscreen(){if(!Browser.isFullscreen){return false}var CFS=document["exitFullscreen"]||document["cancelFullScreen"]||document["mozCancelFullScreen"]||document["msExitFullscreen"]||document["webkitCancelFullScreen"]||(()=>{});CFS.apply(document,[]);return true},nextRAF:0,fakeRequestAnimationFrame(func){var now=Date.now();if(Browser.nextRAF===0){Browser.nextRAF=now+1e3/60}else{while(now+2>=Browser.nextRAF){Browser.nextRAF+=1e3/60}}var delay=Math.max(Browser.nextRAF-now,0);setTimeout(func,delay)},requestAnimationFrame(func){if(typeof requestAnimationFrame=="function"){requestAnimationFrame(func);return}var RAF=Browser.fakeRequestAnimationFrame;RAF(func)},safeSetTimeout(func,timeout){return safeSetTimeout(func,timeout)},safeRequestAnimationFrame(func){return Browser.requestAnimationFrame(()=>{callUserCallback(func)})},getMimetype(name){return{jpg:"image/jpeg",jpeg:"image/jpeg",png:"image/png",bmp:"image/bmp",ogg:"audio/ogg",wav:"audio/wav",mp3:"audio/mpeg"}[name.substr(name.lastIndexOf(".")+1)]},getUserMedia(func){window.getUserMedia||=navigator["getUserMedia"]||navigator["mozGetUserMedia"];window.getUserMedia(func)},getMovementX(event){return event["movementX"]||event["mozMovementX"]||event["webkitMovementX"]||0},getMovementY(event){return event["movementY"]||event["mozMovementY"]||event["webkitMovementY"]||0},getMouseWheelDelta(event){var delta=0;switch(event.type){case"DOMMouseScroll":delta=event.detail/3;break;case"mousewheel":delta=event.wheelDelta/120;break;case"wheel":delta=event.deltaY;switch(event.deltaMode){case 0:delta/=100;break;case 1:delta/=3;break;case 2:delta*=80;break;default:throw"unrecognized mouse wheel delta mode: "+event.deltaMode}break;default:throw"unrecognized mouse wheel event: "+event.type}return delta},mouseX:0,mouseY:0,mouseMovementX:0,mouseMovementY:0,touches:{},lastTouches:{},calculateMouseCoords(pageX,pageY){var rect=Module["canvas"].getBoundingClientRect();var cw=Module["canvas"].width;var ch=Module["canvas"].height;var scrollX=typeof window.scrollX!="undefined"?window.scrollX:window.pageXOffset;var scrollY=typeof window.scrollY!="undefined"?window.scrollY:window.pageYOffset;assert(typeof scrollX!="undefined"&&typeof scrollY!="undefined","Unable to retrieve scroll position, mouse positions likely broken.");var adjustedX=pageX-(scrollX+rect.left);var adjustedY=pageY-(scrollY+rect.top);adjustedX=adjustedX*(cw/rect.width);adjustedY=adjustedY*(ch/rect.height);return{x:adjustedX,y:adjustedY}},setMouseCoords(pageX,pageY){const{x,y}=Browser.calculateMouseCoords(pageX,pageY);Browser.mouseMovementX=x-Browser.mouseX;Browser.mouseMovementY=y-Browser.mouseY;Browser.mouseX=x;Browser.mouseY=y},calculateMouseEvent(event){if(Browser.pointerLock){if(event.type!="mousemove"&&"mozMovementX"in event){Browser.mouseMovementX=Browser.mouseMovementY=0}else{Browser.mouseMovementX=Browser.getMovementX(event);Browser.mouseMovementY=Browser.getMovementY(event)}Browser.mouseX+=Browser.mouseMovementX;Browser.mouseY+=Browser.mouseMovementY}else{if(event.type==="touchstart"||event.type==="touchend"||event.type==="touchmove"){var touch=event.touch;if(touch===undefined){return}var coords=Browser.calculateMouseCoords(touch.pageX,touch.pageY);if(event.type==="touchstart"){Browser.lastTouches[touch.identifier]=coords;Browser.touches[touch.identifier]=coords}else if(event.type==="touchend"||event.type==="touchmove"){var last=Browser.touches[touch.identifier];last||=coords;Browser.lastTouches[touch.identifier]=last;Browser.touches[touch.identifier]=coords}return}Browser.setMouseCoords(event.pageX,event.pageY)}},resizeListeners:[],updateResizeListeners(){var canvas=Module["canvas"];Browser.resizeListeners.forEach(listener=>listener(canvas.width,canvas.height))},setCanvasSize(width,height,noUpdates){var canvas=Module["canvas"];Browser.updateCanvasDimensions(canvas,width,height);if(!noUpdates)Browser.updateResizeListeners()},windowedWidth:0,windowedHeight:0,setFullscreenCanvasSize(){if(typeof SDL!="undefined"){var flags=GROWABLE_HEAP_U32()[SDL.screen>>2];flags=flags|8388608;GROWABLE_HEAP_I32()[SDL.screen>>2]=flags}Browser.updateCanvasDimensions(Module["canvas"]);Browser.updateResizeListeners()},setWindowedCanvasSize(){if(typeof SDL!="undefined"){var flags=GROWABLE_HEAP_U32()[SDL.screen>>2];flags=flags&~8388608;GROWABLE_HEAP_I32()[SDL.screen>>2]=flags}Browser.updateCanvasDimensions(Module["canvas"]);Browser.updateResizeListeners()},updateCanvasDimensions(canvas,wNative,hNative){if(wNative&&hNative){canvas.widthNative=wNative;canvas.heightNative=hNative}else{wNative=canvas.widthNative;hNative=canvas.heightNative}var w=wNative;var h=hNative;if(Module["forcedAspectRatio"]&&Module["forcedAspectRatio"]>0){if(w/h<Module["forcedAspectRatio"]){w=Math.round(h*Module["forcedAspectRatio"])}else{h=Math.round(w/Module["forcedAspectRatio"])}}if((document["fullscreenElement"]||document["mozFullScreenElement"]||document["msFullscreenElement"]||document["webkitFullscreenElement"]||document["webkitCurrentFullScreenElement"])===canvas.parentNode&&typeof screen!="undefined"){var factor=Math.min(screen.width/w,screen.height/h);w=Math.round(w*factor);h=Math.round(h*factor)}if(Browser.resizeCanvas){if(canvas.width!=w)canvas.width=w;if(canvas.height!=h)canvas.height=h;if(typeof canvas.style!="undefined"){canvas.style.removeProperty("width");canvas.style.removeProperty("height")}}else{if(canvas.width!=wNative)canvas.width=wNative;if(canvas.height!=hNative)canvas.height=hNative;if(typeof canvas.style!="undefined"){if(w!=wNative||h!=hNative){canvas.style.setProperty("width",w+"px","important");canvas.style.setProperty("height",h+"px","important")}else{canvas.style.removeProperty("width");canvas.style.removeProperty("height")}}}}};var _emscripten_set_main_loop_timing=(mode,value)=>{Browser.mainLoop.timingMode=mode;Browser.mainLoop.timingValue=value;if(!Browser.mainLoop.func){err("emscripten_set_main_loop_timing: Cannot set timing mode for main loop since a main loop does not exist! Call emscripten_set_main_loop first to set one up.");return 1}if(!Browser.mainLoop.running){Browser.mainLoop.running=true}if(mode==0){Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_setTimeout(){var timeUntilNextTick=Math.max(0,Browser.mainLoop.tickStartTime+value-_emscripten_get_now())|0;setTimeout(Browser.mainLoop.runner,timeUntilNextTick)};Browser.mainLoop.method="timeout"}else if(mode==1){Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_rAF(){Browser.requestAnimationFrame(Browser.mainLoop.runner)};Browser.mainLoop.method="rAF"}else if(mode==2){if(typeof Browser.setImmediate=="undefined"){if(typeof setImmediate=="undefined"){var setImmediates=[];var emscriptenMainLoopMessageId="setimmediate";var Browser_setImmediate_messageHandler=event=>{if(event.data===emscriptenMainLoopMessageId||event.data.target===emscriptenMainLoopMessageId){event.stopPropagation();setImmediates.shift()()}};addEventListener("message",Browser_setImmediate_messageHandler,true);Browser.setImmediate=func=>{setImmediates.push(func);if(ENVIRONMENT_IS_WORKER){Module["setImmediates"]??=[];Module["setImmediates"].push(func);postMessage({target:emscriptenMainLoopMessageId})}else postMessage(emscriptenMainLoopMessageId,"*")}}else{Browser.setImmediate=setImmediate}}Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_setImmediate(){Browser.setImmediate(Browser.mainLoop.runner)};Browser.mainLoop.method="immediate"}return 0};var setMainLoop=(browserIterationFunc,fps,simulateInfiniteLoop,arg,noSetTiming)=>{assert(!Browser.mainLoop.func,"emscripten_set_main_loop: there can only be one main loop function at once: call emscripten_cancel_main_loop to cancel the previous one before setting a new one with different parameters.");Browser.mainLoop.func=browserIterationFunc;Browser.mainLoop.arg=arg;var thisMainLoopId=Browser.mainLoop.currentlyRunningMainloop;function checkIsRunning(){if(thisMainLoopId<Browser.mainLoop.currentlyRunningMainloop){maybeExit();return false}return true}Browser.mainLoop.running=false;Browser.mainLoop.runner=function Browser_mainLoop_runner(){if(ABORT)return;if(Browser.mainLoop.queue.length>0){var start=Date.now();var blocker=Browser.mainLoop.queue.shift();blocker.func(blocker.arg);if(Browser.mainLoop.remainingBlockers){var remaining=Browser.mainLoop.remainingBlockers;var next=remaining%1==0?remaining-1:Math.floor(remaining);if(blocker.counted){Browser.mainLoop.remainingBlockers=next}else{next=next+.5;Browser.mainLoop.remainingBlockers=(8*remaining+next)/9}}Browser.mainLoop.updateStatus();if(!checkIsRunning())return;setTimeout(Browser.mainLoop.runner,0);return}if(!checkIsRunning())return;Browser.mainLoop.currentFrameNumber=Browser.mainLoop.currentFrameNumber+1|0;if(Browser.mainLoop.timingMode==1&&Browser.mainLoop.timingValue>1&&Browser.mainLoop.currentFrameNumber%Browser.mainLoop.timingValue!=0){Browser.mainLoop.scheduler();return}else if(Browser.mainLoop.timingMode==0){Browser.mainLoop.tickStartTime=_emscripten_get_now()}if(Browser.mainLoop.method==="timeout"&&Module.ctx){warnOnce("Looks like you are rendering without using requestAnimationFrame for the main loop. You should use 0 for the frame rate in emscripten_set_main_loop in order to use requestAnimationFrame, as that can greatly improve your frame rates!");Browser.mainLoop.method=""}Browser.mainLoop.runIter(browserIterationFunc);checkStackCookie();if(!checkIsRunning())return;if(typeof SDL=="object")SDL.audio?.queueNewAudioData?.();Browser.mainLoop.scheduler()};if(!noSetTiming){if(fps&&fps>0){_emscripten_set_main_loop_timing(0,1e3/fps)}else{_emscripten_set_main_loop_timing(1,1)}Browser.mainLoop.scheduler()}if(simulateInfiniteLoop){throw"unwind"}};var _emscripten_set_main_loop=(func,fps,simulateInfiniteLoop)=>{var browserIterationFunc=()=>dynCall_v(func);setMainLoop(browserIterationFunc,fps,simulateInfiniteLoop)};var _emscripten_sleep=ms=>Asyncify.handleSleep(wakeUp=>safeSetTimeout(wakeUp,ms));_emscripten_sleep.isAsync=true;var _wasmWorkersID=1;var _EmAudioDispatchProcessorCallback=e=>{let data=e.data;let wasmCall=data["_wsc"];wasmCall&&getWasmTableEntry(wasmCall)(...data["x"])};var _emscripten_start_wasm_audio_worklet_thread_async=(contextHandle,stackLowestAddress,stackSize,callback,userData)=>{assert(contextHandle,`Called emscripten_start_wasm_audio_worklet_thread_async() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_start_wasm_audio_worklet_thread_async() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_start_wasm_audio_worklet_thread_async() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);let audioContext=EmAudio[contextHandle],audioWorklet=audioContext.audioWorklet;assert(stackLowestAddress!=0,"AudioWorklets require a dedicated stack space for audio data marshalling between Wasm and JS!");assert(stackLowestAddress%16==0,`AudioWorklet stack should be aligned to 16 bytes! (was ${stackLowestAddress} == ${stackLowestAddress%16} mod 16) Use e.g. memalign(16, stackSize) to align the stack!`);assert(stackSize!=0,"AudioWorklets require a dedicated stack space for audio data marshalling between Wasm and JS!");assert(stackSize%16==0,`AudioWorklet stack size should be a multiple of 16 bytes! (was ${stackSize} == ${stackSize%16} mod 16)`);assert(!audioContext.audioWorkletInitialized,"emscripten_create_wasm_audio_worklet() was already called for AudioContext "+contextHandle+"! Only call this function once per AudioContext!");audioContext.audioWorkletInitialized=1;let audioWorkletCreationFailed=()=>{((a1,a2,a3)=>dynCall_viii(callback,a1,a2,a3))(contextHandle,0,userData)};if(!audioWorklet){return audioWorkletCreationFailed()}audioWorklet.addModule("amy.aw.js").then(()=>{audioWorklet.bootstrapMessage=new AudioWorkletNode(audioContext,"message",{processorOptions:{$ww:_wasmWorkersID++,wasm:wasmModule,wasmMemory,sb:stackLowestAddress,sz:stackSize}});audioWorklet.bootstrapMessage.port.onmessage=_EmAudioDispatchProcessorCallback;return audioWorklet.addModule(Module["mainScriptUrlOrBlob"]||_scriptName)}).then(()=>{((a1,a2,a3)=>dynCall_viii(callback,a1,a2,a3))(contextHandle,1,userData)}).catch(audioWorkletCreationFailed)};var _fd_close=fd=>{abort("fd_close called without SYSCALLS_REQUIRE_FILESYSTEM")};var convertI32PairToI53Checked=(lo,hi)=>{assert(lo==lo>>>0||lo==(lo|0));assert(hi===(hi|0));return hi+2097152>>>0<4194305-!!lo?(lo>>>0)+hi*4294967296:NaN};function _fd_seek(fd,offset_low,offset_high,whence,newOffset){var offset=convertI32PairToI53Checked(offset_low,offset_high);return 70}var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];assert(buffer);if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer,0));buffer.length=0}else{buffer.push(curr)}};var flush_NO_FILESYSTEM=()=>{_fflush(0);if(printCharBuffers[1].length)printChar(1,10);if(printCharBuffers[2].length)printChar(2,10)};var _fd_write=(fd,iov,iovcnt,pnum)=>{var num=0;for(var i=0;i<iovcnt;i++){var ptr=GROWABLE_HEAP_U32()[iov>>2];var len=GROWABLE_HEAP_U32()[iov+4>>2];iov+=8;for(var j=0;j<len;j++){printChar(fd,GROWABLE_HEAP_U8()[ptr+j])}num+=len}GROWABLE_HEAP_U32()[pnum>>2]=num;return 0};var runAndAbortIfError=func=>{try{return func()}catch(e){abort(e)}};var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var runtimeKeepalivePop=()=>{assert(runtimeKeepaliveCounter>0);runtimeKeepaliveCounter-=1};var Asyncify={instrumentWasmImports(imports){var importPattern=/^(invoke_.*|__asyncjs__.*)$/;for(let[x,original]of Object.entries(imports)){if(typeof original=="function"){let isAsyncifyImport=original.isAsync||importPattern.test(x);imports[x]=(...args)=>{var originalAsyncifyState=Asyncify.state;try{return original(...args)}finally{var changedToDisabled=originalAsyncifyState===Asyncify.State.Normal&&Asyncify.state===Asyncify.State.Disabled;var ignoredInvoke=x.startsWith("invoke_")&&true;if(Asyncify.state!==originalAsyncifyState&&!isAsyncifyImport&&!changedToDisabled&&!ignoredInvoke){throw new Error(`import ${x} was not in ASYNCIFY_IMPORTS, but changed the state`)}}}}}},instrumentWasmExports(exports){var ret={};for(let[x,original]of Object.entries(exports)){if(typeof original=="function"){ret[x]=(...args)=>{Asyncify.exportCallStack.push(x);try{return original(...args)}finally{if(!ABORT){var y=Asyncify.exportCallStack.pop();assert(y===x);Asyncify.maybeStopUnwind()}}}}else{ret[x]=original}}return ret},State:{Normal:0,Unwinding:1,Rewinding:2,Disabled:3},state:0,StackSize:4096,currData:null,handleSleepReturnValue:0,exportCallStack:[],callStackNameToId:{},callStackIdToName:{},callStackId:0,asyncPromiseHandlers:null,sleepCallbacks:[],getCallStackId(funcName){var id=Asyncify.callStackNameToId[funcName];if(id===undefined){id=Asyncify.callStackId++;Asyncify.callStackNameToId[funcName]=id;Asyncify.callStackIdToName[id]=funcName}return id},maybeStopUnwind(){if(Asyncify.currData&&Asyncify.state===Asyncify.State.Unwinding&&Asyncify.exportCallStack.length===0){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_unwind);if(typeof Fibers!="undefined"){Fibers.trampoline()}}},whenDone(){assert(Asyncify.currData,"Tried to wait for an async operation when none is in progress.");assert(!Asyncify.asyncPromiseHandlers,"Cannot have multiple async operations in flight at once");return new Promise((resolve,reject)=>{Asyncify.asyncPromiseHandlers={resolve,reject}})},allocateData(){var ptr=_malloc(12+Asyncify.StackSize);Asyncify.setDataHeader(ptr,ptr+12,Asyncify.StackSize);Asyncify.setDataRewindFunc(ptr);return ptr},setDataHeader(ptr,stack,stackSize){GROWABLE_HEAP_U32()[ptr>>2]=stack;GROWABLE_HEAP_U32()[ptr+4>>2]=stack+stackSize},setDataRewindFunc(ptr){var bottomOfCallStack=Asyncify.exportCallStack[0];var rewindId=Asyncify.getCallStackId(bottomOfCallStack);GROWABLE_HEAP_I32()[ptr+8>>2]=rewindId},getDataRewindFuncName(ptr){var id=GROWABLE_HEAP_I32()[ptr+8>>2];var name=Asyncify.callStackIdToName[id];return name},getDataRewindFunc(name){var func=wasmExports[name];return func},doRewind(ptr){var name=Asyncify.getDataRewindFuncName(ptr);var func=Asyncify.getDataRewindFunc(name);return func()},handleSleep(startAsync){assert(Asyncify.state!==Asyncify.State.Disabled,"Asyncify cannot be done during or after the runtime exits");if(ABORT)return;if(Asyncify.state===Asyncify.State.Normal){var reachedCallback=false;var reachedAfterCallback=false;startAsync((handleSleepReturnValue=0)=>{assert(!handleSleepReturnValue||typeof handleSleepReturnValue=="number"||typeof handleSleepReturnValue=="boolean");if(ABORT)return;Asyncify.handleSleepReturnValue=handleSleepReturnValue;reachedCallback=true;if(!reachedAfterCallback){return}assert(!Asyncify.exportCallStack.length,"Waking up (starting to rewind) must be done from JS, without compiled code on the stack.");Asyncify.state=Asyncify.State.Rewinding;runAndAbortIfError(()=>_asyncify_start_rewind(Asyncify.currData));if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.resume()}var asyncWasmReturnValue,isError=false;try{asyncWasmReturnValue=Asyncify.doRewind(Asyncify.currData)}catch(err){asyncWasmReturnValue=err;isError=true}var handled=false;if(!Asyncify.currData){var asyncPromiseHandlers=Asyncify.asyncPromiseHandlers;if(asyncPromiseHandlers){Asyncify.asyncPromiseHandlers=null;(isError?asyncPromiseHandlers.reject:asyncPromiseHandlers.resolve)(asyncWasmReturnValue);handled=true}}if(isError&&!handled){throw asyncWasmReturnValue}});reachedAfterCallback=true;if(!reachedCallback){Asyncify.state=Asyncify.State.Unwinding;Asyncify.currData=Asyncify.allocateData();if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.pause()}runAndAbortIfError(()=>_asyncify_start_unwind(Asyncify.currData))}}else if(Asyncify.state===Asyncify.State.Rewinding){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_rewind);_free(Asyncify.currData);Asyncify.currData=null;Asyncify.sleepCallbacks.forEach(callUserCallback)}else{abort(`invalid state: ${Asyncify.state}`)}return Asyncify.handleSleepReturnValue},handleAsync(startAsync){return Asyncify.handleSleep(wakeUp=>{startAsync().then(wakeUp)})}};var getCFunc=ident=>{var func=Module["_"+ident];assert(func,"Cannot call unknown function "+ident+", make sure it is exported");return func};var writeArrayToMemory=(array,buffer)=>{assert(array.length>=0,"writeArrayToMemory array must have a length (should be an array or typed array)");GROWABLE_HEAP_I8().set(array,buffer)};var lengthBytesUTF8=str=>{var len=0;for(var i=0;i<str.length;++i){var c=str.charCodeAt(i);if(c<=127){len++}else if(c<=2047){len+=2}else if(c>=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{assert(typeof str==="string",`stringToUTF8Array expects a string (got ${typeof str})`);if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i<str.length;++i){var u=str.charCodeAt(i);if(u>=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;if(u>1114111)warnOnce("Invalid Unicode code point "+ptrToString(u)+" encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).");heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}}heap[outIdx]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>{assert(typeof maxBytesToWrite=="number","stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!");return stringToUTF8Array(str,GROWABLE_HEAP_U8(),outPtr,maxBytesToWrite)};var stackAlloc=sz=>__emscripten_stack_alloc(sz);var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={string:str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},array:arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;assert(returnType!=="array",'Return type should not be "array".');if(args){for(var i=0;i<args.length;i++){var converter=toC[argTypes[i]];if(converter){if(stack===0)stack=stackSave();cArgs[i]=converter(args[i])}else{cArgs[i]=args[i]}}}var previousAsync=Asyncify.currData;var ret=func(...cArgs);function onDone(ret){runtimeKeepalivePop();if(stack!==0)stackRestore(stack);return convertReturnValue(ret)}var asyncMode=opts?.async;runtimeKeepalivePush();if(Asyncify.currData!=previousAsync){assert(!(previousAsync&&Asyncify.currData),"We cannot start an async operation when one is already flight");assert(!(previousAsync&&!Asyncify.currData),"We cannot stop an async operation in flight");assert(asyncMode,"The call to "+ident+" is running asynchronously. If this was intended, add the async option to the ccall/cwrap call.");return Asyncify.whenDone().then(onDone)}ret=onDone(ret);if(asyncMode)return Promise.resolve(ret);return ret};var cwrap=(ident,returnType,argTypes,opts)=>(...args)=>ccall(ident,returnType,argTypes,args,opts);Module["requestFullscreen"]=Browser.requestFullscreen;Module["requestFullScreen"]=Browser.requestFullScreen;Module["requestAnimationFrame"]=Browser.requestAnimationFrame;Module["setCanvasSize"]=Browser.setCanvasSize;Module["pauseMainLoop"]=Browser.mainLoop.pause;Module["resumeMainLoop"]=Browser.mainLoop.resume;Module["getUserMedia"]=Browser.getUserMedia;Module["createContext"]=Browser.createContext;var preloadedImages={};var preloadedAudios={};function checkIncomingModuleAPI(){ignoredModuleProp("fetchSettings")}var wasmImports={__assert_fail:___assert_fail,_abort_js:__abort_js,_emscripten_get_now_is_monotonic:__emscripten_get_now_is_monotonic,emscripten_asm_const_int:_emscripten_asm_const_int,emscripten_create_audio_context:_emscripten_create_audio_context,emscripten_create_wasm_audio_worklet_node:_emscripten_create_wasm_audio_worklet_node,emscripten_create_wasm_audio_worklet_processor_async:_emscripten_create_wasm_audio_worklet_processor_async,emscripten_date_now:_emscripten_date_now,emscripten_destroy_audio_context:_emscripten_destroy_audio_context,emscripten_destroy_web_audio_node:_emscripten_destroy_web_audio_node,emscripten_get_now:_emscripten_get_now,emscripten_resize_heap:_emscripten_resize_heap,emscripten_set_main_loop:_emscripten_set_main_loop,emscripten_sleep:_emscripten_sleep,emscripten_start_wasm_audio_worklet_thread_async:_emscripten_start_wasm_audio_worklet_thread_async,exit:_exit,fd_close:_fd_close,fd_seek:_fd_seek,fd_write:_fd_write,memory:wasmMemory};var wasmExports=createWasm();var ___wasm_call_ctors=createExportWrapper("__wasm_call_ctors",0);var _free=Module["_free"]=createExportWrapper("free",1);var _malloc=Module["_malloc"]=createExportWrapper("malloc",1);var _amy_start=Module["_amy_start"]=createExportWrapper("amy_start",4);var _amy_reset_sysclock=Module["_amy_reset_sysclock"]=createExportWrapper("amy_reset_sysclock",0);var _amy_play_message=Module["_amy_play_message"]=createExportWrapper("amy_play_message",1);var _sequencer_ticks=Module["_sequencer_ticks"]=createExportWrapper("sequencer_ticks",0);var _ma_device__on_notification_unlocked=Module["_ma_device__on_notification_unlocked"]=createExportWrapper("ma_device__on_notification_unlocked",1);var _ma_malloc_emscripten=Module["_ma_malloc_emscripten"]=createExportWrapper("ma_malloc_emscripten",2);var _ma_free_emscripten=Module["_ma_free_emscripten"]=createExportWrapper("ma_free_emscripten",2);var _ma_device_process_pcm_frames_capture__webaudio=Module["_ma_device_process_pcm_frames_capture__webaudio"]=createExportWrapper("ma_device_process_pcm_frames_capture__webaudio",3);var _ma_device_process_pcm_frames_playback__webaudio=Module["_ma_device_process_pcm_frames_playback__webaudio"]=createExportWrapper("ma_device_process_pcm_frames_playback__webaudio",3);var _amy_live_start=Module["_amy_live_start"]=createExportWrapper("amy_live_start",0);var _fflush=createExportWrapper("fflush",1);var _strerror=createExportWrapper("strerror",1);var _emscripten_stack_init=()=>(_emscripten_stack_init=wasmExports["emscripten_stack_init"])();var _emscripten_stack_get_free=()=>(_emscripten_stack_get_free=wasmExports["emscripten_stack_get_free"])();var _emscripten_stack_get_base=()=>(_emscripten_stack_get_base=wasmExports["emscripten_stack_get_base"])();var _emscripten_stack_get_end=()=>(_emscripten_stack_get_end=wasmExports["emscripten_stack_get_end"])();var __emscripten_wasm_worker_initialize=createExportWrapper("_emscripten_wasm_worker_initialize",2);var __emscripten_stack_restore=a0=>(__emscripten_stack_restore=wasmExports["_emscripten_stack_restore"])(a0);var __emscripten_stack_alloc=a0=>(__emscripten_stack_alloc=wasmExports["_emscripten_stack_alloc"])(a0);var _emscripten_stack_get_current=()=>(_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"])();var dynCall_ii=Module["dynCall_ii"]=createExportWrapper("dynCall_ii",2);var dynCall_vii=Module["dynCall_vii"]=createExportWrapper("dynCall_vii",3);var dynCall_iiii=Module["dynCall_iiii"]=createExportWrapper("dynCall_iiii",4);var dynCall_iii=Module["dynCall_iii"]=createExportWrapper("dynCall_iii",3);var dynCall_iiiii=Module["dynCall_iiiii"]=createExportWrapper("dynCall_iiiii",5);var dynCall_viii=Module["dynCall_viii"]=createExportWrapper("dynCall_viii",4);var dynCall_viiii=Module["dynCall_viiii"]=createExportWrapper("dynCall_viiii",5);var dynCall_v=Module["dynCall_v"]=createExportWrapper("dynCall_v",1);var dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=createExportWrapper("dynCall_iiiiiiii",8);var dynCall_iiiji=Module["dynCall_iiiji"]=createExportWrapper("dynCall_iiiji",6);var dynCall_iiiiiii=Module["dynCall_iiiiiii"]=createExportWrapper("dynCall_iiiiiii",7);var dynCall_jii=Module["dynCall_jii"]=createExportWrapper("dynCall_jii",3);var dynCall_jiji=Module["dynCall_jiji"]=createExportWrapper("dynCall_jiji",5);var dynCall_iidiiii=Module["dynCall_iidiiii"]=createExportWrapper("dynCall_iidiiii",7);var _asyncify_start_unwind=createExportWrapper("asyncify_start_unwind",1);var _asyncify_stop_unwind=createExportWrapper("asyncify_stop_unwind",0);var _asyncify_start_rewind=createExportWrapper("asyncify_start_rewind",1);var _asyncify_stop_rewind=createExportWrapper("asyncify_stop_rewind",0);Module["stackSave"]=stackSave;Module["stackRestore"]=stackRestore;Module["stackAlloc"]=stackAlloc;Module["wasmTable"]=wasmTable;Module["ccall"]=ccall;Module["cwrap"]=cwrap;var missingLibrarySymbols=["writeI53ToI64","writeI53ToI64Clamped","writeI53ToI64Signaling","writeI53ToU64Clamped","writeI53ToU64Signaling","readI53FromI64","readI53FromU64","convertI32PairToI53","convertU32PairToI53","getTempRet0","setTempRet0","zeroMemory","strError","inetPton4","inetNtop4","inetPton6","inetNtop6","readSockaddr","writeSockaddr","initRandomFill","randomFill","emscriptenLog","runMainThreadEmAsm","jstoi_q","getExecutableName","listenOnce","autoResumeAudioContext","dynCallLegacy","getDynCaller","dynCall","asmjsMangle","asyncLoad","mmapAlloc","HandleAllocator","getNativeTypeSize","STACK_SIZE","STACK_ALIGN","POINTER_SIZE","ASSERTIONS","uleb128Encode","generateFuncType","convertJsFunctionToWasm","getEmptyTableSlot","updateTableMap","getFunctionAddress","addFunction","removeFunction","reallyNegative","unSign","strLen","reSign","formatString","intArrayFromString","intArrayToString","AsciiToString","stringToAscii","UTF16ToString","stringToUTF16","lengthBytesUTF16","UTF32ToString","stringToUTF32","lengthBytesUTF32","stringToNewUTF8","registerKeyEventCallback","maybeCStringToJsString","findEventTarget","getBoundingClientRect","fillMouseEventData","registerMouseEventCallback","registerWheelEventCallback","registerUiEventCallback","registerFocusEventCallback","fillDeviceOrientationEventData","registerDeviceOrientationEventCallback","fillDeviceMotionEventData","registerDeviceMotionEventCallback","screenOrientation","fillOrientationChangeEventData","registerOrientationChangeEventCallback","fillFullscreenChangeEventData","registerFullscreenChangeEventCallback","JSEvents_requestFullscreen","JSEvents_resizeCanvasForFullscreen","registerRestoreOldStyle","hideEverythingExceptGivenElement","restoreHiddenElements","setLetterbox","softFullscreenResizeWebGLRenderTarget","doRequestFullscreen","fillPointerlockChangeEventData","registerPointerlockChangeEventCallback","registerPointerlockErrorEventCallback","requestPointerLock","fillVisibilityChangeEventData","registerVisibilityChangeEventCallback","registerTouchEventCallback","fillGamepadEventData","registerGamepadEventCallback","registerBeforeUnloadEventCallback","fillBatteryEventData","battery","registerBatteryEventCallback","setCanvasElementSize","getCanvasElementSize","jsStackTrace","getCallstack","convertPCtoSourceLocation","getEnvStrings","checkWasiClock","wasiRightsToMuslOFlags","wasiOFlagsToMuslOFlags","createDyncallWrapper","setImmediateWrapped","clearImmediateWrapped","polyfillSetImmediate","getPromise","makePromise","idsToPromises","makePromiseCallback","ExceptionInfo","findMatchingCatch","Browser_asyncPrepareDataCounter","isLeapYear","ydayFromDate","arraySum","addDays","getSocketFromFD","getSocketAddress","FS_createPreloadedFile","FS_modeStringToFlags","FS_getMode","FS_stdin_getChar","FS_unlink","FS_createDataFile","FS_mkdirTree","_setNetworkCallback","heapObjectForWebGLType","toTypedArrayIndex","webgl_enable_ANGLE_instanced_arrays","webgl_enable_OES_vertex_array_object","webgl_enable_WEBGL_draw_buffers","webgl_enable_WEBGL_multi_draw","webgl_enable_EXT_polygon_offset_clamp","webgl_enable_EXT_clip_control","webgl_enable_WEBGL_polygon_mode","emscriptenWebGLGet","computeUnpackAlignedImageSize","colorChannelsInGlTextureFormat","emscriptenWebGLGetTexPixelData","emscriptenWebGLGetUniform","webglGetUniformLocation","webglPrepareUniformLocationsBeforeFirstUse","webglGetLeftBracePos","emscriptenWebGLGetVertexAttrib","__glGetActiveAttribOrUniform","writeGLArray","registerWebGlEventCallback","ALLOC_NORMAL","ALLOC_STACK","allocate","writeStringToMemory","writeAsciiToMemory","setErrNo","demangle","stackTrace","_wasmWorkerPostFunction1","_wasmWorkerPostFunction2","_wasmWorkerPostFunction3","emscripten_audio_worklet_post_function_1","emscripten_audio_worklet_post_function_2","emscripten_audio_worklet_post_function_3"];missingLibrarySymbols.forEach(missingLibrarySymbol);var unexportedSymbols=["run","addOnPreRun","addOnInit","addOnPreMain","addOnExit","addOnPostRun","addRunDependency","removeRunDependency","out","err","callMain","abort","wasmMemory","wasmExports","writeStackCookie","checkStackCookie","convertI32PairToI53Checked","ptrToString","exitJS","getHeapMax","growMemory","ENV","ERRNO_CODES","DNS","Protocols","Sockets","timers","warnOnce","readEmAsmArgsArray","readEmAsmArgs","runEmAsmFunction","jstoi_s","handleException","keepRuntimeAlive","runtimeKeepalivePush","runtimeKeepalivePop","callUserCallback","maybeExit","alignMemory","noExitRuntime","getCFunc","sigToWasmTypes","freeTableIndexes","functionsInTableMap","setValue","getValue","PATH","PATH_FS","UTF8Decoder","UTF8ArrayToString","UTF8ToString","stringToUTF8Array","stringToUTF8","lengthBytesUTF8","UTF16Decoder","stringToUTF8OnStack","writeArrayToMemory","JSEvents","specialHTMLTargets","findCanvasEventTarget","currentFullscreenStrategy","restoreOldWindowedStyle","UNWIND_CACHE","ExitStatus","flush_NO_FILESYSTEM","safeSetTimeout","promiseMap","uncaughtExceptionCount","exceptionLast","exceptionCaught","Browser","setMainLoop","getPreloadedImageData__data","wget","MONTH_DAYS_REGULAR","MONTH_DAYS_LEAP","MONTH_DAYS_REGULAR_CUMULATIVE","MONTH_DAYS_LEAP_CUMULATIVE","SYSCALLS","preloadPlugins","FS_stdin_getChar_buffer","FS_createPath","FS_createDevice","FS_readFile","FS","FS_createLazyFile","MEMFS","TTY","PIPEFS","SOCKFS","tempFixedLengthArray","miniTempWebGLFloatBuffers","miniTempWebGLIntBuffers","GL","AL","GLUT","EGL","GLEW","IDBStore","runAndAbortIfError","Asyncify","Fibers","SDL","SDL_gfx","allocateUTF8","allocateUTF8OnStack","print","printErr","_wasmWorkers","_wasmWorkersID","_wasmWorkerDelayedMessageQueue","_wasmWorkerAppendToQueue","_wasmWorkerRunPostMessage","_wasmWorkerInitializeRuntime","EmAudio","EmAudioCounter","emscriptenRegisterAudioObject","emscriptenDestroyAudioContext","emscriptenGetAudioObject","_EmAudioDispatchProcessorCallback"];unexportedSymbols.forEach(unexportedRuntimeSymbol);var calledRun;dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function stackCheckInit(){_emscripten_stack_init();writeStackCookie()}function run(){if(runDependencies>0){return}stackCheckInit();if(ENVIRONMENT_IS_WASM_WORKER){readyPromiseResolve(Module);return initRuntime()}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve(Module);Module["onRuntimeInitialized"]?.();assert(!Module["_main"],'compiled without a main, but one is present. if you added it from JS, use Module["onRuntimeInitialized"]');postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}checkStackCookie()}function checkUnflushedContent(){var oldOut=out;var oldErr=err;var has=false;out=err=x=>{has=true};try{flush_NO_FILESYSTEM()}catch(e){}out=oldOut;err=oldErr;if(has){warnOnce("stdio streams had content in them that was not flushed. you should set EXIT_RUNTIME to 1 (see the Emscripten FAQ), or make sure to emit a newline when you printf etc.");warnOnce("(this may also be due to not including full filesystem support - try building with -sFORCE_FILESYSTEM)")}}if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run();moduleRtn=readyPromise;for(const prop of Object.keys(Module)){if(!(prop in moduleArg)){Object.defineProperty(moduleArg,prop,{configurable:true,get(){abort(`Access to module property ('${prop}') is no longer possible via the module constructor argument; Instead, use the result of the module constructor.`)}})}}
+function GROWABLE_HEAP_I8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP8}function GROWABLE_HEAP_U8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU8}function GROWABLE_HEAP_I16(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP16}function GROWABLE_HEAP_I32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP32}function GROWABLE_HEAP_U32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU32}function GROWABLE_HEAP_F32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF32}function GROWABLE_HEAP_F64(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF64}var Module=moduleArg;var readyPromiseResolve,readyPromiseReject;Module["ready"]=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject});["_amy_play_message","_amy_reset_sysclock","_amy_live_start","_amy_start","_sequencer_ticks","_malloc","_free","_emscripten_wasm_worker_initialize","___set_thread_state","___indirect_function_table","_ma_device__on_notification_unlocked","_ma_malloc_emscripten","_ma_free_emscripten","_ma_device_process_pcm_frames_capture__webaudio","_ma_device_process_pcm_frames_playback__webaudio","_fflush","___start_em_asm","___stop_em_asm","onRuntimeInitialized"].forEach(prop=>{if(!Object.getOwnPropertyDescriptor(Module["ready"],prop)){Object.defineProperty(Module["ready"],prop,{get:()=>abort("You are getting "+prop+" on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js"),set:()=>abort("You are setting "+prop+" on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js")})}});var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var ENVIRONMENT_IS_AUDIO_WORKLET=typeof AudioWorkletGlobalScope!=="undefined";var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var ENVIRONMENT_IS_SHELL=!ENVIRONMENT_IS_WEB&&!ENVIRONMENT_IS_NODE&&!ENVIRONMENT_IS_WORKER&&!ENVIRONMENT_IS_AUDIO_WORKLET;if(Module["ENVIRONMENT"]){throw new Error("Module.ENVIRONMENT has been deprecated. To force the environment, use the ENVIRONMENT compile-time option (for example, -sENVIRONMENT=web or -sENVIRONMENT=node)")}var ENVIRONMENT_IS_WASM_WORKER=Module["$ww"];var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var read_,readAsync,readBinary;if(ENVIRONMENT_IS_NODE){if(typeof process=="undefined"||!process.release||process.release.name!=="node")throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");var nodeVersion=process.versions.node;var numericVersion=nodeVersion.split(".").slice(0,3);numericVersion=numericVersion[0]*1e4+numericVersion[1]*100+numericVersion[2].split("-")[0]*1;if(numericVersion<16e4){throw new Error("This emscripten-generated code requires node v16.0.0 (detected v"+nodeVersion+")")}var fs=require("fs");var nodePath=require("path");if(ENVIRONMENT_IS_WORKER){scriptDirectory=nodePath.dirname(scriptDirectory)+"/"}else{scriptDirectory=__dirname+"/"}read_=(filename,binary)=>{filename=isFileURI(filename)?new URL(filename):nodePath.normalize(filename);return fs.readFileSync(filename,binary?undefined:"utf8")};readBinary=filename=>{var ret=read_(filename,true);if(!ret.buffer){ret=new Uint8Array(ret)}assert(ret.buffer);return ret};readAsync=(filename,onload,onerror,binary=true)=>{filename=isFileURI(filename)?new URL(filename):nodePath.normalize(filename);fs.readFile(filename,binary?undefined:"utf8",(err,data)=>{if(err)onerror(err);else onload(binary?data.buffer:data)})};if(!Module["thisProgram"]&&process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow};Module["inspect"]=()=>"[Emscripten Module object]";let nodeWorkerThreads;try{nodeWorkerThreads=require("worker_threads")}catch(e){console.error('The "worker_threads" module is not supported in this node.js build - perhaps a newer version is needed?');throw e}global.Worker=nodeWorkerThreads.Worker}else if(ENVIRONMENT_IS_SHELL){if(typeof process=="object"&&typeof require==="function"||typeof window=="object"||typeof importScripts=="function")throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");if(typeof read!="undefined"){read_=read}readBinary=f=>{if(typeof readbuffer=="function"){return new Uint8Array(readbuffer(f))}let data=read(f,"binary");assert(typeof data=="object");return data};readAsync=(f,onload,onerror)=>{setTimeout(()=>onload(readBinary(f)))};if(typeof clearTimeout=="undefined"){globalThis.clearTimeout=id=>{}}if(typeof setTimeout=="undefined"){globalThis.setTimeout=f=>typeof f=="function"?f():abort()}if(typeof scriptArgs!="undefined"){arguments_=scriptArgs}else if(typeof arguments!="undefined"){arguments_=arguments}if(typeof quit=="function"){quit_=(status,toThrow)=>{setTimeout(()=>{if(!(toThrow instanceof ExitStatus)){let toLog=toThrow;if(toThrow&&typeof toThrow=="object"&&toThrow.stack){toLog=[toThrow,toThrow.stack]}err(`exiting due to exception: ${toLog}`)}quit(status)});throw toThrow}}if(typeof print!="undefined"){if(typeof console=="undefined")console={};console.log=print;console.warn=console.error=typeof printErr!="undefined"?printErr:print}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(_scriptDir){scriptDirectory=_scriptDir}if(scriptDirectory.indexOf("blob:")!==0){scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}else{scriptDirectory=""}if(!(typeof window=="object"||typeof importScripts=="function"))throw new Error("not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)");{read_=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.send(null);return xhr.responseText};if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=(url,onload,onerror)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){onload(xhr.response);return}onerror()};xhr.onerror=onerror;xhr.send(null)}}}else if(!ENVIRONMENT_IS_AUDIO_WORKLET){throw new Error("environment detection error")}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.error.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;checkIncomingModuleAPI();if(Module["arguments"])arguments_=Module["arguments"];legacyModuleProp("arguments","arguments_");if(Module["thisProgram"])thisProgram=Module["thisProgram"];legacyModuleProp("thisProgram","thisProgram");if(Module["quit"])quit_=Module["quit"];legacyModuleProp("quit","quit_");assert(typeof Module["memoryInitializerPrefixURL"]=="undefined","Module.memoryInitializerPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["pthreadMainPrefixURL"]=="undefined","Module.pthreadMainPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["cdInitializerPrefixURL"]=="undefined","Module.cdInitializerPrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["filePackagePrefixURL"]=="undefined","Module.filePackagePrefixURL option was removed, use Module.locateFile instead");assert(typeof Module["read"]=="undefined","Module.read option was removed (modify read_ in JS)");assert(typeof Module["readAsync"]=="undefined","Module.readAsync option was removed (modify readAsync in JS)");assert(typeof Module["readBinary"]=="undefined","Module.readBinary option was removed (modify readBinary in JS)");assert(typeof Module["setWindowTitle"]=="undefined","Module.setWindowTitle option was removed (modify emscripten_set_window_title in JS)");assert(typeof Module["TOTAL_MEMORY"]=="undefined","Module.TOTAL_MEMORY has been renamed Module.INITIAL_MEMORY");legacyModuleProp("asm","wasmExports");legacyModuleProp("read","read_");legacyModuleProp("readAsync","readAsync");legacyModuleProp("readBinary","readBinary");legacyModuleProp("setWindowTitle","setWindowTitle");assert(!ENVIRONMENT_IS_SHELL,"shell environment detected but not enabled at build time.  Add 'shell' to `-sENVIRONMENT` to enable.");var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];legacyModuleProp("wasmBinary","wasmBinary");if(typeof WebAssembly!="object"){abort("no native wasm support detected")}var wasmMemory;var wasmModule;var ABORT=false;var EXITSTATUS;function assert(condition,text){if(!condition){abort("Assertion failed"+(text?": "+text:""))}}var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);Module["HEAP16"]=HEAP16=new Int16Array(b);Module["HEAPU8"]=HEAPU8=new Uint8Array(b);Module["HEAPU16"]=HEAPU16=new Uint16Array(b);Module["HEAP32"]=HEAP32=new Int32Array(b);Module["HEAPU32"]=HEAPU32=new Uint32Array(b);Module["HEAPF32"]=HEAPF32=new Float32Array(b);Module["HEAPF64"]=HEAPF64=new Float64Array(b)}assert(!Module["STACK_SIZE"],"STACK_SIZE can no longer be set at runtime.  Use -sSTACK_SIZE at link time");assert(typeof Int32Array!="undefined"&&typeof Float64Array!=="undefined"&&Int32Array.prototype.subarray!=undefined&&Int32Array.prototype.set!=undefined,"JS engine does not provide full typed array support");var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||268435456;legacyModuleProp("INITIAL_MEMORY","INITIAL_MEMORY");assert(INITIAL_MEMORY>=134217728,"INITIAL_MEMORY should be larger than STACK_SIZE, was "+INITIAL_MEMORY+"! (STACK_SIZE="+134217728+")");if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{wasmMemory=new WebAssembly.Memory({"initial":INITIAL_MEMORY/65536,"maximum":2147483648/65536,"shared":true});if(!(wasmMemory.buffer instanceof SharedArrayBuffer)){err("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag");if(ENVIRONMENT_IS_NODE){err("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)")}throw Error("bad memory")}}updateMemoryViews();INITIAL_MEMORY=wasmMemory.buffer.byteLength;assert(INITIAL_MEMORY%65536===0);function writeStackCookie(){var max=_emscripten_stack_get_end();assert((max&3)==0);if(max==0){max+=4}GROWABLE_HEAP_U32()[max>>2]=34821223;GROWABLE_HEAP_U32()[max+4>>2]=2310721022;GROWABLE_HEAP_U32()[0>>2]=1668509029}function checkStackCookie(){if(ABORT)return;var max=_emscripten_stack_get_end();if(max==0){max+=4}var cookie1=GROWABLE_HEAP_U32()[max>>2];var cookie2=GROWABLE_HEAP_U32()[max+4>>2];if(cookie1!=34821223||cookie2!=2310721022){abort(`Stack overflow! Stack cookie has been overwritten at ${ptrToString(max)}, expected hex dwords 0x89BACDFE and 0x2135467, but received ${ptrToString(cookie2)} ${ptrToString(cookie1)}`)}if(GROWABLE_HEAP_U32()[0>>2]!=1668509029){abort("Runtime error: The application has corrupted its heap memory area (address zero)!")}}(function(){var h16=new Int16Array(1);var h8=new Int8Array(h16.buffer);h16[0]=25459;if(h8[0]!==115||h8[1]!==99)throw"Runtime error: expected the system to be little-endian! (Run with -sSUPPORT_BIG_ENDIAN to bypass)"})();var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){assert(!runtimeInitialized);runtimeInitialized=true;if(ENVIRONMENT_IS_WASM_WORKER)return _wasmWorkerInitializeRuntime();checkStackCookie();callRuntimeCallbacks(__ATINIT__)}function postRun(){checkStackCookie();if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}assert(Math.imul,"This browser does not support Math.imul(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.fround,"This browser does not support Math.fround(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.clz32,"This browser does not support Math.clz32(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");assert(Math.trunc,"This browser does not support Math.trunc(), build with LEGACY_VM_SUPPORT or POLYFILL_OLD_MATH_FUNCTIONS to add in a polyfill");var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;var runDependencyTracking={};function addRunDependency(id){runDependencies++;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(id){assert(!runDependencyTracking[id]);runDependencyTracking[id]=1;if(runDependencyWatcher===null&&typeof setInterval!="undefined"){runDependencyWatcher=setInterval(()=>{if(ABORT){clearInterval(runDependencyWatcher);runDependencyWatcher=null;return}var shown=false;for(var dep in runDependencyTracking){if(!shown){shown=true;err("still waiting on run dependencies:")}err(`dependency: ${dep}`)}if(shown){err("(end of list)")}},1e4)}}else{err("warning: run dependency added without ID")}}function removeRunDependency(id){runDependencies--;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(id){assert(runDependencyTracking[id]);delete runDependencyTracking[id]}else{err("warning: run dependency removed without ID")}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){if(Module["onAbort"]){Module["onAbort"](what)}what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;if(what.indexOf("RuntimeError: unreachable")>=0){what+='. "unreachable" may be due to ASYNCIFY_STACK_SIZE not being large enough (try increasing it)'}var e=new WebAssembly.RuntimeError(what);readyPromiseReject(e);throw e}var FS={error(){abort("Filesystem support (FS) was not included. The problem is that you are using files from JS, but files were not used from C/C++, so filesystem support was not auto-included. You can force-include filesystem support with -sFORCE_FILESYSTEM")},init(){FS.error()},createDataFile(){FS.error()},createPreloadedFile(){FS.error()},createLazyFile(){FS.error()},open(){FS.error()},mkdev(){FS.error()},registerDevice(){FS.error()},analyzePath(){FS.error()},ErrnoError(){FS.error()}};Module["FS_createDataFile"]=FS.createDataFile;Module["FS_createPreloadedFile"]=FS.createPreloadedFile;var dataURIPrefix="data:application/octet-stream;base64,";var isDataURI=filename=>filename.startsWith(dataURIPrefix);var isFileURI=filename=>filename.startsWith("file://");function createExportWrapper(name){return function(){assert(runtimeInitialized,`native function \`${name}\` called before runtime initialization`);var f=wasmExports[name];assert(f,`exported native function \`${name}\` not found`);return f.apply(null,arguments)}}var wasmBinaryFile;wasmBinaryFile="amy.wasm";if(!isDataURI(wasmBinaryFile)){wasmBinaryFile=locateFile(wasmBinaryFile)}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}function getBinaryPromise(binaryFile){if(!wasmBinary&&(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER)){if(typeof fetch=="function"&&!isFileURI(binaryFile)){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{if(!response["ok"]){throw"failed to load wasm binary file at '"+binaryFile+"'"}return response["arrayBuffer"]()}).catch(()=>getBinarySync(binaryFile))}else if(readAsync){return new Promise((resolve,reject)=>{readAsync(binaryFile,response=>resolve(new Uint8Array(response)),reject)})}}return Promise.resolve().then(()=>getBinarySync(binaryFile))}function instantiateArrayBuffer(binaryFile,imports,receiver){return getBinaryPromise(binaryFile).then(binary=>WebAssembly.instantiate(binary,imports)).then(instance=>instance).then(receiver,reason=>{err(`failed to asynchronously prepare wasm: ${reason}`);if(isFileURI(wasmBinaryFile)){err(`warning: Loading from a file URI (${wasmBinaryFile}) is not supported in most browsers. See https://emscripten.org/docs/getting_started/FAQ.html#how-do-i-run-a-local-webserver-for-testing-why-does-my-program-stall-in-downloading-or-preparing`)}abort(reason)})}function instantiateAsync(binary,binaryFile,imports,callback){if(!binary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(binaryFile)&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE&&typeof fetch=="function"){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{var result=WebAssembly.instantiateStreaming(response,imports);return result.then(callback,function(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(binaryFile,imports,callback)})})}return instantiateArrayBuffer(binaryFile,imports,callback)}function createWasm(){var info={"env":wasmImports,"wasi_snapshot_preview1":wasmImports};function receiveInstance(instance,module){wasmExports=instance.exports;wasmExports=Asyncify.instrumentWasmExports(wasmExports);wasmTable=wasmExports["__indirect_function_table"];assert(wasmTable,"table not found in wasm exports");Module["wasmTable"]=wasmTable;addOnInit(wasmExports["__wasm_call_ctors"]);wasmModule=module;removeRunDependency("wasm-instantiate");return wasmExports}addRunDependency("wasm-instantiate");var trueModule=Module;function receiveInstantiationResult(result){assert(Module===trueModule,"the Module object should not be replaced during async compilation - perhaps the order of HTML elements is wrong?");trueModule=null;receiveInstance(result["instance"],result["module"])}if(Module["instantiateWasm"]){try{return Module["instantiateWasm"](info,receiveInstance)}catch(e){err(`Module.instantiateWasm callback failed with error: ${e}`);readyPromiseReject(e)}}instantiateAsync(wasmBinary,wasmBinaryFile,info,receiveInstantiationResult).catch(readyPromiseReject);return{}}function legacyModuleProp(prop,newName,incomming=true){if(!Object.getOwnPropertyDescriptor(Module,prop)){Object.defineProperty(Module,prop,{configurable:true,get(){let extra=incomming?" (the initial value can be provided on Module, but after startup the value is only looked for on a local variable of that name)":"";abort(`\`Module.${prop}\` has been replaced by \`${newName}\``+extra)}})}}function ignoredModuleProp(prop){if(Object.getOwnPropertyDescriptor(Module,prop)){abort(`\`Module.${prop}\` was supplied but \`${prop}\` not included in INCOMING_MODULE_JS_API`)}}function isExportedByForceFilesystem(name){return name==="FS_createPath"||name==="FS_createDataFile"||name==="FS_createPreloadedFile"||name==="FS_unlink"||name==="addRunDependency"||name==="FS_createLazyFile"||name==="FS_createDevice"||name==="removeRunDependency"}function missingGlobal(sym,msg){if(typeof globalThis!=="undefined"){Object.defineProperty(globalThis,sym,{configurable:true,get(){warnOnce(`\`${sym}\` is not longer defined by emscripten. ${msg}`);return undefined}})}}missingGlobal("buffer","Please use HEAP8.buffer or wasmMemory.buffer");missingGlobal("asm","Please use wasmExports instead");function missingLibrarySymbol(sym){if(typeof globalThis!=="undefined"&&!Object.getOwnPropertyDescriptor(globalThis,sym)){Object.defineProperty(globalThis,sym,{configurable:true,get(){var msg=`\`${sym}\` is a library symbol and not included by default; add it to your library.js __deps or to DEFAULT_LIBRARY_FUNCS_TO_INCLUDE on the command line`;var librarySymbol=sym;if(!librarySymbol.startsWith("_")){librarySymbol="$"+sym}msg+=` (e.g. -sDEFAULT_LIBRARY_FUNCS_TO_INCLUDE='${librarySymbol}')`;if(isExportedByForceFilesystem(sym)){msg+=". Alternatively, forcing filesystem support (-sFORCE_FILESYSTEM) can export this for you"}warnOnce(msg);return undefined}})}unexportedRuntimeSymbol(sym)}function unexportedRuntimeSymbol(sym){if(!Object.getOwnPropertyDescriptor(Module,sym)){Object.defineProperty(Module,sym,{configurable:true,get(){var msg=`'${sym}' was not exported. add it to EXPORTED_RUNTIME_METHODS (see the Emscripten FAQ)`;if(isExportedByForceFilesystem(sym)){msg+=". Alternatively, forcing filesystem support (-sFORCE_FILESYSTEM) can export this for you"}abort(msg)}})}}var ASM_CONSTS={1110016:$0=>{amy_sequencer_js_hook($0)},1110047:($0,$1,$2,$3,$4)=>{if(typeof window==="undefined"||(window.AudioContext||window.webkitAudioContext)===undefined){return 0}if(typeof window.miniaudio==="undefined"){window.miniaudio={referenceCount:0};window.miniaudio.device_type={};window.miniaudio.device_type.playback=$0;window.miniaudio.device_type.capture=$1;window.miniaudio.device_type.duplex=$2;window.miniaudio.device_state={};window.miniaudio.device_state.stopped=$3;window.miniaudio.device_state.started=$4;miniaudio.devices=[];miniaudio.track_device=function(device){for(var iDevice=0;iDevice<miniaudio.devices.length;++iDevice){if(miniaudio.devices[iDevice]==null){miniaudio.devices[iDevice]=device;return iDevice}}miniaudio.devices.push(device);return miniaudio.devices.length-1};miniaudio.untrack_device_by_index=function(deviceIndex){miniaudio.devices[deviceIndex]=null;while(miniaudio.devices.length>0){if(miniaudio.devices[miniaudio.devices.length-1]==null){miniaudio.devices.pop()}else{break}}};miniaudio.untrack_device=function(device){for(var iDevice=0;iDevice<miniaudio.devices.length;++iDevice){if(miniaudio.devices[iDevice]==device){return miniaudio.untrack_device_by_index(iDevice)}}};miniaudio.get_device_by_index=function(deviceIndex){return miniaudio.devices[deviceIndex]};miniaudio.unlock_event_types=function(){return["touchend","click"]}();miniaudio.unlock=function(){for(var i=0;i<miniaudio.devices.length;++i){var device=miniaudio.devices[i];if(device!=null&&device.webaudio!=null&&device.state===window.miniaudio.device_state.started){device.webaudio.resume().then(()=>{Module._ma_device__on_notification_unlocked(device.pDevice)},error=>{console.error("Failed to resume audiocontext",error)})}}miniaudio.unlock_event_types.map(function(event_type){document.removeEventListener(event_type,miniaudio.unlock,true)})};miniaudio.unlock_event_types.map(function(event_type){document.addEventListener(event_type,miniaudio.unlock,true)})}window.miniaudio.referenceCount+=1;return 1},1112205:()=>{if(typeof window.miniaudio!=="undefined"){window.miniaudio.referenceCount-=1;if(window.miniaudio.referenceCount===0){delete window.miniaudio}}},1112369:()=>navigator.mediaDevices!==undefined&&navigator.mediaDevices.getUserMedia!==undefined,1112473:()=>{try{var temp=new(window.AudioContext||window.webkitAudioContext);var sampleRate=temp.sampleRate;temp.close();return sampleRate}catch(e){return 0}},1112644:$0=>miniaudio.track_device({webaudio:emscriptenGetAudioObject($0),state:1}),1112733:($0,$1)=>{var getUserMediaResult=0;var audioWorklet=emscriptenGetAudioObject($0);var audioContext=emscriptenGetAudioObject($1);navigator.mediaDevices.getUserMedia({audio:true,video:false}).then(function(stream){audioContext.streamNode=audioContext.createMediaStreamSource(stream);audioContext.streamNode.connect(audioWorklet);audioWorklet.connect(audioContext.destination);getUserMediaResult=0}).catch(function(error){console.log("navigator.mediaDevices.getUserMedia Failed: "+error);getUserMediaResult=-1});return getUserMediaResult},1113295:($0,$1)=>{var audioWorklet=emscriptenGetAudioObject($0);var audioContext=emscriptenGetAudioObject($1);audioWorklet.connect(audioContext.destination);return 0},1113455:$0=>emscriptenGetAudioObject($0).sampleRate,1113507:$0=>{var device=miniaudio.get_device_by_index($0);if(device.streamNode!==undefined){device.streamNode.disconnect();device.streamNode=undefined}},1113663:$0=>{miniaudio.untrack_device_by_index($0)},1113706:$0=>{var device=miniaudio.get_device_by_index($0);device.webaudio.resume();device.state=miniaudio.device_state.started},1113831:$0=>{var device=miniaudio.get_device_by_index($0);device.webaudio.suspend();device.state=miniaudio.device_state.stopped}};function ExitStatus(status){this.name="ExitStatus";this.message=`Program terminated with exit(${status})`;this.status=status}var _wasmWorkerDelayedMessageQueue=[];var wasmTableMirror=[];var wasmTable;var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}assert(wasmTable.get(funcPtr)==func,"JavaScript-side Wasm function table mirror is out of date!");return func};var _wasmWorkerRunPostMessage=e=>{let data=ENVIRONMENT_IS_NODE?e:e.data;let wasmCall=data["_wsc"];wasmCall&&getWasmTableEntry(wasmCall)(...data["x"])};var _wasmWorkerAppendToQueue=e=>{_wasmWorkerDelayedMessageQueue.push(e)};var _wasmWorkerInitializeRuntime=()=>{let m=Module;assert(m["sb"]%16==0);assert(m["sz"]%16==0);_emscripten_wasm_worker_initialize(m["sb"],m["sz"]);if(typeof AudioWorkletGlobalScope==="undefined"){removeEventListener("message",_wasmWorkerAppendToQueue);_wasmWorkerDelayedMessageQueue=_wasmWorkerDelayedMessageQueue.forEach(_wasmWorkerRunPostMessage);addEventListener("message",_wasmWorkerRunPostMessage)}};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var noExitRuntime=Module["noExitRuntime"]||true;var ptrToString=ptr=>{assert(typeof ptr==="number");ptr>>>=0;return"0x"+ptr.toString(16).padStart(8,"0")};var warnOnce=text=>{if(!warnOnce.shown)warnOnce.shown={};if(!warnOnce.shown[text]){warnOnce.shown[text]=1;if(ENVIRONMENT_IS_NODE)text="warning: "+text;err(text)}};var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):undefined;var UTF8ArrayToString=(heapOrArray,idx,maxBytesToRead)=>{var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof SharedArrayBuffer?heapOrArray.slice(idx,endPtr):heapOrArray.subarray(idx,endPtr))}var str="";while(idx<endPtr){var u0=heapOrArray[idx++];if(!(u0&128)){str+=String.fromCharCode(u0);continue}var u1=heapOrArray[idx++]&63;if((u0&224)==192){str+=String.fromCharCode((u0&31)<<6|u1);continue}var u2=heapOrArray[idx++]&63;if((u0&240)==224){u0=(u0&15)<<12|u1<<6|u2}else{if((u0&248)!=240)warnOnce("Invalid UTF-8 leading byte "+ptrToString(u0)+" encountered when deserializing a UTF-8 string in wasm memory to a JS string!");u0=(u0&7)<<18|u1<<12|u2<<6|heapOrArray[idx++]&63}if(u0<65536){str+=String.fromCharCode(u0)}else{var ch=u0-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead)=>{assert(typeof ptr=="number",`UTF8ToString expects a number (got ${typeof ptr})`);return ptr?UTF8ArrayToString(GROWABLE_HEAP_U8(),ptr,maxBytesToRead):""};var ___assert_fail=(condition,filename,line,func)=>{abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])};var nowIsMonotonic=1;var __emscripten_get_now_is_monotonic=()=>nowIsMonotonic;var _abort=()=>{abort("native code called abort()")};var readEmAsmArgsArray=[];var readEmAsmArgs=(sigPtr,buf)=>{assert(Array.isArray(readEmAsmArgsArray));assert(buf%16==0);readEmAsmArgsArray.length=0;var ch;while(ch=GROWABLE_HEAP_U8()[sigPtr++]){var chr=String.fromCharCode(ch);var validChars=["d","f","i","p"];assert(validChars.includes(chr),`Invalid character ${ch}("${chr}") in readEmAsmArgs! Use only [${validChars}], and do not specify "v" for void return argument.`);var wide=ch!=105;wide&=ch!=112;buf+=wide&&buf%8?4:0;readEmAsmArgsArray.push(ch==112?GROWABLE_HEAP_U32()[buf>>2]:ch==105?GROWABLE_HEAP_I32()[buf>>2]:GROWABLE_HEAP_F64()[buf>>3]);buf+=wide?8:4}return readEmAsmArgsArray};var runEmAsmFunction=(code,sigPtr,argbuf)=>{var args=readEmAsmArgs(sigPtr,argbuf);assert(ASM_CONSTS.hasOwnProperty(code),`No EM_ASM constant found at address ${code}.  The loaded WebAssembly file is likely out of sync with the generated JavaScript.`);return ASM_CONSTS[code].apply(null,args)};var _emscripten_asm_const_int=(code,sigPtr,argbuf)=>runEmAsmFunction(code,sigPtr,argbuf);var EmAudio={};var EmAudioCounter=0;var emscriptenRegisterAudioObject=object=>{assert(object,"Called emscriptenRegisterAudioObject() with a null object handle!");EmAudio[++EmAudioCounter]=object;return EmAudioCounter};var emscriptenGetAudioObject=objectHandle=>EmAudio[objectHandle];var _emscripten_create_audio_context=options=>{let ctx=window.AudioContext||window.webkitAudioContext;if(!ctx)console.error("emscripten_create_audio_context failed! Web Audio is not supported.");options>>=2;let opts=options?{latencyHint:GROWABLE_HEAP_U32()[options]?UTF8ToString(GROWABLE_HEAP_U32()[options]):void 0,sampleRate:GROWABLE_HEAP_I32()[options+1]||void 0}:void 0;return ctx&&emscriptenRegisterAudioObject(new ctx(opts))};var _emscripten_create_wasm_audio_worklet_node=(contextHandle,name,options,callback,userData)=>{assert(contextHandle,`Called emscripten_create_wasm_audio_worklet_node() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_create_wasm_audio_worklet_node() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_create_wasm_audio_worklet_node() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);options>>=2;function readChannelCountArray(heapIndex,numOutputs){let channelCounts=[];while(numOutputs--)channelCounts.push(GROWABLE_HEAP_U32()[heapIndex++]);return channelCounts}let opts=options?{numberOfInputs:GROWABLE_HEAP_I32()[options],numberOfOutputs:GROWABLE_HEAP_I32()[options+1],outputChannelCount:GROWABLE_HEAP_U32()[options+2]?readChannelCountArray(GROWABLE_HEAP_U32()[options+2]>>2,GROWABLE_HEAP_I32()[options+1]):void 0,processorOptions:{"cb":callback,"ud":userData}}:void 0;return emscriptenRegisterAudioObject(new AudioWorkletNode(EmAudio[contextHandle],UTF8ToString(name),opts))};var _emscripten_create_wasm_audio_worklet_processor_async=(contextHandle,options,callback,userData)=>{assert(contextHandle,`Called emscripten_create_wasm_audio_worklet_processor_async() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_create_wasm_audio_worklet_processor_async() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_create_wasm_audio_worklet_processor_async() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);options>>=2;let audioParams=[],numAudioParams=GROWABLE_HEAP_U32()[options+1],audioParamDescriptors=GROWABLE_HEAP_U32()[options+2]>>2,i=0;while(numAudioParams--){audioParams.push({name:i++,defaultValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],minValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],maxValue:GROWABLE_HEAP_F32()[audioParamDescriptors++],automationRate:["a","k"][GROWABLE_HEAP_U32()[audioParamDescriptors++]]+"-rate"})}EmAudio[contextHandle].audioWorklet.bootstrapMessage.port.postMessage({_wpn:UTF8ToString(GROWABLE_HEAP_U32()[options]),audioParams:audioParams,contextHandle:contextHandle,callback:callback,userData:userData})};var _emscripten_date_now=()=>Date.now();var _emscripten_destroy_audio_context=contextHandle=>{assert(EmAudio[contextHandle],`Called emscripten_destroy_audio_context() on an already freed context handle ${contextHandle}`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_destroy_audio_context() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);EmAudio[contextHandle].suspend();delete EmAudio[contextHandle]};var _emscripten_destroy_web_audio_node=objectHandle=>{assert(EmAudio[objectHandle],`Called emscripten_destroy_web_audio_node() on a nonexisting/already freed object handle ${objectHandle}`);assert(EmAudio[objectHandle].disconnect,`Called emscripten_destroy_web_audio_node() on a handle ${objectHandle} that is not an Web Audio Node, but of type ${typeof EmAudio[objectHandle]}`);EmAudio[objectHandle].disconnect();delete EmAudio[objectHandle]};var _emscripten_get_now;if(typeof performance!="undefined"&&performance.now){_emscripten_get_now=()=>performance.now()}else{_emscripten_get_now=Date.now}var getHeapMax=()=>2147483648;var growMemory=size=>{var b=wasmMemory.buffer;var pages=(size-b.byteLength+65535)/65536;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){err(`growMemory: Attempted to grow heap from ${b.byteLength} bytes to ${size} bytes, but got error: ${e}`)}};var _emscripten_resize_heap=requestedSize=>{var oldSize=GROWABLE_HEAP_U8().length;requestedSize>>>=0;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){err(`Cannot enlarge memory, requested ${requestedSize} bytes, but the limit is ${maxHeapSize} bytes!`);return false}var alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}err(`Failed to grow the heap from ${oldSize} bytes to ${newSize} bytes, not enough memory!`);return false};var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}checkStackCookie();if(e instanceof WebAssembly.RuntimeError){if(_emscripten_stack_get_current()<=0){err("Stack overflow detected.  You can try increasing -sSTACK_SIZE (currently set to 134217728)")}}quit_(1,e)};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var SYSCALLS={varargs:undefined,get(){assert(SYSCALLS.varargs!=undefined);var ret=GROWABLE_HEAP_I32()[+SYSCALLS.varargs>>2];SYSCALLS.varargs+=4;return ret},getp(){return SYSCALLS.get()},getStr(ptr){var ret=UTF8ToString(ptr);return ret}};var _proc_exit=code=>{EXITSTATUS=code;if(!keepRuntimeAlive()){if(Module["onExit"])Module["onExit"](code);ABORT=true}quit_(code,new ExitStatus(code))};var exitJS=(status,implicit)=>{EXITSTATUS=status;checkUnflushedContent();if(keepRuntimeAlive()&&!implicit){var msg=`program exited (with status: ${status}), but keepRuntimeAlive() is set (counter=${runtimeKeepaliveCounter}) due to an async operation, so halting execution but not exiting the runtime or preventing further async execution (you can use emscripten_force_exit, if you want to force a true shutdown)`;readyPromiseReject(msg);err(msg)}_proc_exit(status)};var _exit=exitJS;var maybeExit=()=>{if(!keepRuntimeAlive()){try{_exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){err("user callback triggered after runtime exited or application aborted.  Ignoring.");return}try{func();maybeExit()}catch(e){handleException(e)}};var safeSetTimeout=(func,timeout)=>setTimeout(()=>{callUserCallback(func)},timeout);var preloadPlugins=Module["preloadPlugins"]||[];var Browser={mainLoop:{running:false,scheduler:null,method:"",currentlyRunningMainloop:0,func:null,arg:0,timingMode:0,timingValue:0,currentFrameNumber:0,queue:[],pause(){Browser.mainLoop.scheduler=null;Browser.mainLoop.currentlyRunningMainloop++},resume(){Browser.mainLoop.currentlyRunningMainloop++;var timingMode=Browser.mainLoop.timingMode;var timingValue=Browser.mainLoop.timingValue;var func=Browser.mainLoop.func;Browser.mainLoop.func=null;setMainLoop(func,0,false,Browser.mainLoop.arg,true);_emscripten_set_main_loop_timing(timingMode,timingValue);Browser.mainLoop.scheduler()},updateStatus(){if(Module["setStatus"]){var message=Module["statusMessage"]||"Please wait...";var remaining=Browser.mainLoop.remainingBlockers;var expected=Browser.mainLoop.expectedBlockers;if(remaining){if(remaining<expected){Module["setStatus"](message+" ("+(expected-remaining)+"/"+expected+")")}else{Module["setStatus"](message)}}else{Module["setStatus"]("")}}},runIter(func){if(ABORT)return;if(Module["preMainLoop"]){var preRet=Module["preMainLoop"]();if(preRet===false){return}}callUserCallback(func);if(Module["postMainLoop"])Module["postMainLoop"]()}},isFullscreen:false,pointerLock:false,moduleContextCreatedCallbacks:[],workers:[],init(){if(Browser.initted)return;Browser.initted=true;var imagePlugin={};imagePlugin["canHandle"]=function imagePlugin_canHandle(name){return!Module.noImageDecoding&&/\.(jpg|jpeg|png|bmp)$/i.test(name)};imagePlugin["handle"]=function imagePlugin_handle(byteArray,name,onload,onerror){var b=new Blob([byteArray],{type:Browser.getMimetype(name)});if(b.size!==byteArray.length){b=new Blob([new Uint8Array(byteArray).buffer],{type:Browser.getMimetype(name)})}var url=URL.createObjectURL(b);assert(typeof url=="string","createObjectURL must return a url as a string");var img=new Image;img.onload=()=>{assert(img.complete,`Image ${name} could not be decoded`);var canvas=document.createElement("canvas");canvas.width=img.width;canvas.height=img.height;var ctx=canvas.getContext("2d");ctx.drawImage(img,0,0);preloadedImages[name]=canvas;URL.revokeObjectURL(url);if(onload)onload(byteArray)};img.onerror=event=>{err(`Image ${url} could not be decoded`);if(onerror)onerror()};img.src=url};preloadPlugins.push(imagePlugin);var audioPlugin={};audioPlugin["canHandle"]=function audioPlugin_canHandle(name){return!Module.noAudioDecoding&&name.substr(-4)in{".ogg":1,".wav":1,".mp3":1}};audioPlugin["handle"]=function audioPlugin_handle(byteArray,name,onload,onerror){var done=false;function finish(audio){if(done)return;done=true;preloadedAudios[name]=audio;if(onload)onload(byteArray)}var b=new Blob([byteArray],{type:Browser.getMimetype(name)});var url=URL.createObjectURL(b);assert(typeof url=="string","createObjectURL must return a url as a string");var audio=new Audio;audio.addEventListener("canplaythrough",()=>finish(audio),false);audio.onerror=function audio_onerror(event){if(done)return;err(`warning: browser could not fully decode audio ${name}, trying slower base64 approach`);function encode64(data){var BASE="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var PAD="=";var ret="";var leftchar=0;var leftbits=0;for(var i=0;i<data.length;i++){leftchar=leftchar<<8|data[i];leftbits+=8;while(leftbits>=6){var curr=leftchar>>leftbits-6&63;leftbits-=6;ret+=BASE[curr]}}if(leftbits==2){ret+=BASE[(leftchar&3)<<4];ret+=PAD+PAD}else if(leftbits==4){ret+=BASE[(leftchar&15)<<2];ret+=PAD}return ret}audio.src="data:audio/x-"+name.substr(-3)+";base64,"+encode64(byteArray);finish(audio)};audio.src=url;safeSetTimeout(()=>{finish(audio)},1e4)};preloadPlugins.push(audioPlugin);function pointerLockChange(){Browser.pointerLock=document["pointerLockElement"]===Module["canvas"]||document["mozPointerLockElement"]===Module["canvas"]||document["webkitPointerLockElement"]===Module["canvas"]||document["msPointerLockElement"]===Module["canvas"]}var canvas=Module["canvas"];if(canvas){canvas.requestPointerLock=canvas["requestPointerLock"]||canvas["mozRequestPointerLock"]||canvas["webkitRequestPointerLock"]||canvas["msRequestPointerLock"]||(()=>{});canvas.exitPointerLock=document["exitPointerLock"]||document["mozExitPointerLock"]||document["webkitExitPointerLock"]||document["msExitPointerLock"]||(()=>{});canvas.exitPointerLock=canvas.exitPointerLock.bind(document);document.addEventListener("pointerlockchange",pointerLockChange,false);document.addEventListener("mozpointerlockchange",pointerLockChange,false);document.addEventListener("webkitpointerlockchange",pointerLockChange,false);document.addEventListener("mspointerlockchange",pointerLockChange,false);if(Module["elementPointerLock"]){canvas.addEventListener("click",ev=>{if(!Browser.pointerLock&&Module["canvas"].requestPointerLock){Module["canvas"].requestPointerLock();ev.preventDefault()}},false)}}},createContext(canvas,useWebGL,setInModule,webGLContextAttributes){if(useWebGL&&Module.ctx&&canvas==Module.canvas)return Module.ctx;var ctx;var contextHandle;if(useWebGL){var contextAttributes={antialias:false,alpha:false,majorVersion:1};if(webGLContextAttributes){for(var attribute in webGLContextAttributes){contextAttributes[attribute]=webGLContextAttributes[attribute]}}if(typeof GL!="undefined"){contextHandle=GL.createContext(canvas,contextAttributes);if(contextHandle){ctx=GL.getContext(contextHandle).GLctx}}}else{ctx=canvas.getContext("2d")}if(!ctx)return null;if(setInModule){if(!useWebGL)assert(typeof GLctx=="undefined","cannot set in module if GLctx is used, but we are a non-GL context that would replace it");Module.ctx=ctx;if(useWebGL)GL.makeContextCurrent(contextHandle);Module.useWebGL=useWebGL;Browser.moduleContextCreatedCallbacks.forEach(callback=>callback());Browser.init()}return ctx},destroyContext(canvas,useWebGL,setInModule){},fullscreenHandlersInstalled:false,lockPointer:undefined,resizeCanvas:undefined,requestFullscreen(lockPointer,resizeCanvas){Browser.lockPointer=lockPointer;Browser.resizeCanvas=resizeCanvas;if(typeof Browser.lockPointer=="undefined")Browser.lockPointer=true;if(typeof Browser.resizeCanvas=="undefined")Browser.resizeCanvas=false;var canvas=Module["canvas"];function fullscreenChange(){Browser.isFullscreen=false;var canvasContainer=canvas.parentNode;if((document["fullscreenElement"]||document["mozFullScreenElement"]||document["msFullscreenElement"]||document["webkitFullscreenElement"]||document["webkitCurrentFullScreenElement"])===canvasContainer){canvas.exitFullscreen=Browser.exitFullscreen;if(Browser.lockPointer)canvas.requestPointerLock();Browser.isFullscreen=true;if(Browser.resizeCanvas){Browser.setFullscreenCanvasSize()}else{Browser.updateCanvasDimensions(canvas)}}else{canvasContainer.parentNode.insertBefore(canvas,canvasContainer);canvasContainer.parentNode.removeChild(canvasContainer);if(Browser.resizeCanvas){Browser.setWindowedCanvasSize()}else{Browser.updateCanvasDimensions(canvas)}}if(Module["onFullScreen"])Module["onFullScreen"](Browser.isFullscreen);if(Module["onFullscreen"])Module["onFullscreen"](Browser.isFullscreen)}if(!Browser.fullscreenHandlersInstalled){Browser.fullscreenHandlersInstalled=true;document.addEventListener("fullscreenchange",fullscreenChange,false);document.addEventListener("mozfullscreenchange",fullscreenChange,false);document.addEventListener("webkitfullscreenchange",fullscreenChange,false);document.addEventListener("MSFullscreenChange",fullscreenChange,false)}var canvasContainer=document.createElement("div");canvas.parentNode.insertBefore(canvasContainer,canvas);canvasContainer.appendChild(canvas);canvasContainer.requestFullscreen=canvasContainer["requestFullscreen"]||canvasContainer["mozRequestFullScreen"]||canvasContainer["msRequestFullscreen"]||(canvasContainer["webkitRequestFullscreen"]?()=>canvasContainer["webkitRequestFullscreen"](Element["ALLOW_KEYBOARD_INPUT"]):null)||(canvasContainer["webkitRequestFullScreen"]?()=>canvasContainer["webkitRequestFullScreen"](Element["ALLOW_KEYBOARD_INPUT"]):null);canvasContainer.requestFullscreen()},requestFullScreen(){abort("Module.requestFullScreen has been replaced by Module.requestFullscreen (without a capital S)")},exitFullscreen(){if(!Browser.isFullscreen){return false}var CFS=document["exitFullscreen"]||document["cancelFullScreen"]||document["mozCancelFullScreen"]||document["msExitFullscreen"]||document["webkitCancelFullScreen"]||(()=>{});CFS.apply(document,[]);return true},nextRAF:0,fakeRequestAnimationFrame(func){var now=Date.now();if(Browser.nextRAF===0){Browser.nextRAF=now+1e3/60}else{while(now+2>=Browser.nextRAF){Browser.nextRAF+=1e3/60}}var delay=Math.max(Browser.nextRAF-now,0);setTimeout(func,delay)},requestAnimationFrame(func){if(typeof requestAnimationFrame=="function"){requestAnimationFrame(func);return}var RAF=Browser.fakeRequestAnimationFrame;RAF(func)},safeSetTimeout(func,timeout){return safeSetTimeout(func,timeout)},safeRequestAnimationFrame(func){return Browser.requestAnimationFrame(()=>{callUserCallback(func)})},getMimetype(name){return{"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png","bmp":"image/bmp","ogg":"audio/ogg","wav":"audio/wav","mp3":"audio/mpeg"}[name.substr(name.lastIndexOf(".")+1)]},getUserMedia(func){if(!window.getUserMedia){window.getUserMedia=navigator["getUserMedia"]||navigator["mozGetUserMedia"]}window.getUserMedia(func)},getMovementX(event){return event["movementX"]||event["mozMovementX"]||event["webkitMovementX"]||0},getMovementY(event){return event["movementY"]||event["mozMovementY"]||event["webkitMovementY"]||0},getMouseWheelDelta(event){var delta=0;switch(event.type){case"DOMMouseScroll":delta=event.detail/3;break;case"mousewheel":delta=event.wheelDelta/120;break;case"wheel":delta=event.deltaY;switch(event.deltaMode){case 0:delta/=100;break;case 1:delta/=3;break;case 2:delta*=80;break;default:throw"unrecognized mouse wheel delta mode: "+event.deltaMode}break;default:throw"unrecognized mouse wheel event: "+event.type}return delta},mouseX:0,mouseY:0,mouseMovementX:0,mouseMovementY:0,touches:{},lastTouches:{},calculateMouseEvent(event){if(Browser.pointerLock){if(event.type!="mousemove"&&"mozMovementX"in event){Browser.mouseMovementX=Browser.mouseMovementY=0}else{Browser.mouseMovementX=Browser.getMovementX(event);Browser.mouseMovementY=Browser.getMovementY(event)}if(typeof SDL!="undefined"){Browser.mouseX=SDL.mouseX+Browser.mouseMovementX;Browser.mouseY=SDL.mouseY+Browser.mouseMovementY}else{Browser.mouseX+=Browser.mouseMovementX;Browser.mouseY+=Browser.mouseMovementY}}else{var rect=Module["canvas"].getBoundingClientRect();var cw=Module["canvas"].width;var ch=Module["canvas"].height;var scrollX=typeof window.scrollX!="undefined"?window.scrollX:window.pageXOffset;var scrollY=typeof window.scrollY!="undefined"?window.scrollY:window.pageYOffset;assert(typeof scrollX!="undefined"&&typeof scrollY!="undefined","Unable to retrieve scroll position, mouse positions likely broken.");if(event.type==="touchstart"||event.type==="touchend"||event.type==="touchmove"){var touch=event.touch;if(touch===undefined){return}var adjustedX=touch.pageX-(scrollX+rect.left);var adjustedY=touch.pageY-(scrollY+rect.top);adjustedX=adjustedX*(cw/rect.width);adjustedY=adjustedY*(ch/rect.height);var coords={x:adjustedX,y:adjustedY};if(event.type==="touchstart"){Browser.lastTouches[touch.identifier]=coords;Browser.touches[touch.identifier]=coords}else if(event.type==="touchend"||event.type==="touchmove"){var last=Browser.touches[touch.identifier];if(!last)last=coords;Browser.lastTouches[touch.identifier]=last;Browser.touches[touch.identifier]=coords}return}var x=event.pageX-(scrollX+rect.left);var y=event.pageY-(scrollY+rect.top);x=x*(cw/rect.width);y=y*(ch/rect.height);Browser.mouseMovementX=x-Browser.mouseX;Browser.mouseMovementY=y-Browser.mouseY;Browser.mouseX=x;Browser.mouseY=y}},resizeListeners:[],updateResizeListeners(){var canvas=Module["canvas"];Browser.resizeListeners.forEach(listener=>listener(canvas.width,canvas.height))},setCanvasSize(width,height,noUpdates){var canvas=Module["canvas"];Browser.updateCanvasDimensions(canvas,width,height);if(!noUpdates)Browser.updateResizeListeners()},windowedWidth:0,windowedHeight:0,setFullscreenCanvasSize(){if(typeof SDL!="undefined"){var flags=GROWABLE_HEAP_U32()[SDL.screen>>2];flags=flags|8388608;GROWABLE_HEAP_I32()[SDL.screen>>2]=flags}Browser.updateCanvasDimensions(Module["canvas"]);Browser.updateResizeListeners()},setWindowedCanvasSize(){if(typeof SDL!="undefined"){var flags=GROWABLE_HEAP_U32()[SDL.screen>>2];flags=flags&~8388608;GROWABLE_HEAP_I32()[SDL.screen>>2]=flags}Browser.updateCanvasDimensions(Module["canvas"]);Browser.updateResizeListeners()},updateCanvasDimensions(canvas,wNative,hNative){if(wNative&&hNative){canvas.widthNative=wNative;canvas.heightNative=hNative}else{wNative=canvas.widthNative;hNative=canvas.heightNative}var w=wNative;var h=hNative;if(Module["forcedAspectRatio"]&&Module["forcedAspectRatio"]>0){if(w/h<Module["forcedAspectRatio"]){w=Math.round(h*Module["forcedAspectRatio"])}else{h=Math.round(w/Module["forcedAspectRatio"])}}if((document["fullscreenElement"]||document["mozFullScreenElement"]||document["msFullscreenElement"]||document["webkitFullscreenElement"]||document["webkitCurrentFullScreenElement"])===canvas.parentNode&&typeof screen!="undefined"){var factor=Math.min(screen.width/w,screen.height/h);w=Math.round(w*factor);h=Math.round(h*factor)}if(Browser.resizeCanvas){if(canvas.width!=w)canvas.width=w;if(canvas.height!=h)canvas.height=h;if(typeof canvas.style!="undefined"){canvas.style.removeProperty("width");canvas.style.removeProperty("height")}}else{if(canvas.width!=wNative)canvas.width=wNative;if(canvas.height!=hNative)canvas.height=hNative;if(typeof canvas.style!="undefined"){if(w!=wNative||h!=hNative){canvas.style.setProperty("width",w+"px","important");canvas.style.setProperty("height",h+"px","important")}else{canvas.style.removeProperty("width");canvas.style.removeProperty("height")}}}}};var _emscripten_set_main_loop_timing=(mode,value)=>{Browser.mainLoop.timingMode=mode;Browser.mainLoop.timingValue=value;if(!Browser.mainLoop.func){err("emscripten_set_main_loop_timing: Cannot set timing mode for main loop since a main loop does not exist! Call emscripten_set_main_loop first to set one up.");return 1}if(!Browser.mainLoop.running){Browser.mainLoop.running=true}if(mode==0){Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_setTimeout(){var timeUntilNextTick=Math.max(0,Browser.mainLoop.tickStartTime+value-_emscripten_get_now())|0;setTimeout(Browser.mainLoop.runner,timeUntilNextTick)};Browser.mainLoop.method="timeout"}else if(mode==1){Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_rAF(){Browser.requestAnimationFrame(Browser.mainLoop.runner)};Browser.mainLoop.method="rAF"}else if(mode==2){if(typeof Browser.setImmediate=="undefined"){if(typeof setImmediate=="undefined"){var setImmediates=[];var emscriptenMainLoopMessageId="setimmediate";var Browser_setImmediate_messageHandler=event=>{if(event.data===emscriptenMainLoopMessageId||event.data.target===emscriptenMainLoopMessageId){event.stopPropagation();setImmediates.shift()()}};addEventListener("message",Browser_setImmediate_messageHandler,true);Browser.setImmediate=function Browser_emulated_setImmediate(func){setImmediates.push(func);if(ENVIRONMENT_IS_WORKER){if(Module["setImmediates"]===undefined)Module["setImmediates"]=[];Module["setImmediates"].push(func);postMessage({target:emscriptenMainLoopMessageId})}else postMessage(emscriptenMainLoopMessageId,"*")}}else{Browser.setImmediate=setImmediate}}Browser.mainLoop.scheduler=function Browser_mainLoop_scheduler_setImmediate(){Browser.setImmediate(Browser.mainLoop.runner)};Browser.mainLoop.method="immediate"}return 0};var setMainLoop=(browserIterationFunc,fps,simulateInfiniteLoop,arg,noSetTiming)=>{assert(!Browser.mainLoop.func,"emscripten_set_main_loop: there can only be one main loop function at once: call emscripten_cancel_main_loop to cancel the previous one before setting a new one with different parameters.");Browser.mainLoop.func=browserIterationFunc;Browser.mainLoop.arg=arg;var thisMainLoopId=Browser.mainLoop.currentlyRunningMainloop;function checkIsRunning(){if(thisMainLoopId<Browser.mainLoop.currentlyRunningMainloop){return false}return true}Browser.mainLoop.running=false;Browser.mainLoop.runner=function Browser_mainLoop_runner(){if(ABORT)return;if(Browser.mainLoop.queue.length>0){var start=Date.now();var blocker=Browser.mainLoop.queue.shift();blocker.func(blocker.arg);if(Browser.mainLoop.remainingBlockers){var remaining=Browser.mainLoop.remainingBlockers;var next=remaining%1==0?remaining-1:Math.floor(remaining);if(blocker.counted){Browser.mainLoop.remainingBlockers=next}else{next=next+.5;Browser.mainLoop.remainingBlockers=(8*remaining+next)/9}}Browser.mainLoop.updateStatus();if(!checkIsRunning())return;setTimeout(Browser.mainLoop.runner,0);return}if(!checkIsRunning())return;Browser.mainLoop.currentFrameNumber=Browser.mainLoop.currentFrameNumber+1|0;if(Browser.mainLoop.timingMode==1&&Browser.mainLoop.timingValue>1&&Browser.mainLoop.currentFrameNumber%Browser.mainLoop.timingValue!=0){Browser.mainLoop.scheduler();return}else if(Browser.mainLoop.timingMode==0){Browser.mainLoop.tickStartTime=_emscripten_get_now()}if(Browser.mainLoop.method==="timeout"&&Module.ctx){warnOnce("Looks like you are rendering without using requestAnimationFrame for the main loop. You should use 0 for the frame rate in emscripten_set_main_loop in order to use requestAnimationFrame, as that can greatly improve your frame rates!");Browser.mainLoop.method=""}Browser.mainLoop.runIter(browserIterationFunc);checkStackCookie();if(!checkIsRunning())return;if(typeof SDL=="object"&&SDL.audio&&SDL.audio.queueNewAudioData)SDL.audio.queueNewAudioData();Browser.mainLoop.scheduler()};if(!noSetTiming){if(fps&&fps>0){_emscripten_set_main_loop_timing(0,1e3/fps)}else{_emscripten_set_main_loop_timing(1,1)}Browser.mainLoop.scheduler()}if(simulateInfiniteLoop){throw"unwind"}};var _emscripten_set_main_loop=(func,fps,simulateInfiniteLoop)=>{var browserIterationFunc=()=>dynCall_v.call(null,func);setMainLoop(browserIterationFunc,fps,simulateInfiniteLoop)};var _emscripten_sleep=ms=>Asyncify.handleSleep(wakeUp=>safeSetTimeout(wakeUp,ms));_emscripten_sleep.isAsync=true;var _wasmWorkersID=1;var _EmAudioDispatchProcessorCallback=e=>{let data=e.data,wasmCall=data["_wsc"];wasmCall&&getWasmTableEntry(wasmCall)(...data["x"])};var _emscripten_start_wasm_audio_worklet_thread_async=(contextHandle,stackLowestAddress,stackSize,callback,userData)=>{assert(contextHandle,`Called emscripten_start_wasm_audio_worklet_thread_async() with a null Web Audio Context handle!`);assert(EmAudio[contextHandle],`Called emscripten_start_wasm_audio_worklet_thread_async() with a nonexisting/already freed Web Audio Context handle ${contextHandle}!`);assert(EmAudio[contextHandle]instanceof(window.AudioContext||window.webkitAudioContext),`Called emscripten_start_wasm_audio_worklet_thread_async() on a context handle ${contextHandle} that is not an AudioContext, but of type ${typeof EmAudio[contextHandle]}`);let audioContext=EmAudio[contextHandle],audioWorklet=audioContext.audioWorklet;assert(stackLowestAddress!=0,"AudioWorklets require a dedicated stack space for audio data marshalling between Wasm and JS!");assert(stackLowestAddress%16==0,`AudioWorklet stack should be aligned to 16 bytes! (was ${stackLowestAddress} == ${stackLowestAddress%16} mod 16) Use e.g. memalign(16, stackSize) to align the stack!`);assert(stackSize!=0,"AudioWorklets require a dedicated stack space for audio data marshalling between Wasm and JS!");assert(stackSize%16==0,`AudioWorklet stack size should be a multiple of 16 bytes! (was ${stackSize} == ${stackSize%16} mod 16)`);assert(!audioContext.audioWorkletInitialized,"emscripten_create_wasm_audio_worklet() was already called for AudioContext "+contextHandle+"! Only call this function once per AudioContext!");audioContext.audioWorkletInitialized=1;let audioWorkletCreationFailed=()=>{((a1,a2,a3)=>dynCall_viii.apply(null,[callback,a1,a2,a3]))(contextHandle,0,userData)};if(!audioWorklet){return audioWorkletCreationFailed()}audioWorklet.addModule("amy.aw.js").then(()=>{audioWorklet.bootstrapMessage=new AudioWorkletNode(audioContext,"message",{processorOptions:{"$ww":_wasmWorkersID++,"wasm":wasmModule,"wasmMemory":wasmMemory,"sb":stackLowestAddress,"sz":stackSize}});audioWorklet.bootstrapMessage.port.onmessage=_EmAudioDispatchProcessorCallback;return audioWorklet.addModule(Module["mainScriptUrlOrBlob"]||_scriptDir)}).then(()=>{((a1,a2,a3)=>dynCall_viii.apply(null,[callback,a1,a2,a3]))(contextHandle,1,userData)}).catch(audioWorkletCreationFailed)};var _fd_close=fd=>{abort("fd_close called without SYSCALLS_REQUIRE_FILESYSTEM")};var convertI32PairToI53Checked=(lo,hi)=>{assert(lo==lo>>>0||lo==(lo|0));assert(hi===(hi|0));return hi+2097152>>>0<4194305-!!lo?(lo>>>0)+hi*4294967296:NaN};function _fd_seek(fd,offset_low,offset_high,whence,newOffset){var offset=convertI32PairToI53Checked(offset_low,offset_high);return 70}var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];assert(buffer);if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer,0));buffer.length=0}else{buffer.push(curr)}};var flush_NO_FILESYSTEM=()=>{_fflush(0);if(printCharBuffers[1].length)printChar(1,10);if(printCharBuffers[2].length)printChar(2,10)};var _fd_write=(fd,iov,iovcnt,pnum)=>{var num=0;for(var i=0;i<iovcnt;i++){var ptr=GROWABLE_HEAP_U32()[iov>>2];var len=GROWABLE_HEAP_U32()[iov+4>>2];iov+=8;for(var j=0;j<len;j++){printChar(fd,GROWABLE_HEAP_U8()[ptr+j])}num+=len}GROWABLE_HEAP_U32()[pnum>>2]=num;return 0};var runAndAbortIfError=func=>{try{return func()}catch(e){abort(e)}};var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var runtimeKeepalivePop=()=>{assert(runtimeKeepaliveCounter>0);runtimeKeepaliveCounter-=1};var Asyncify={instrumentWasmImports(imports){var importPattern=/^(invoke_.*|__asyncjs__.*)$/;for(var x in imports){(function(x){var original=imports[x];var sig=original.sig;if(typeof original=="function"){var isAsyncifyImport=original.isAsync||importPattern.test(x);imports[x]=function(){var originalAsyncifyState=Asyncify.state;try{return original.apply(null,arguments)}finally{var changedToDisabled=originalAsyncifyState===Asyncify.State.Normal&&Asyncify.state===Asyncify.State.Disabled;var ignoredInvoke=x.startsWith("invoke_")&&true;if(Asyncify.state!==originalAsyncifyState&&!isAsyncifyImport&&!changedToDisabled&&!ignoredInvoke){throw new Error(`import ${x} was not in ASYNCIFY_IMPORTS, but changed the state`)}}}}})(x)}},instrumentWasmExports(exports){var ret={};for(var x in exports){(function(x){var original=exports[x];if(typeof original=="function"){ret[x]=function(){Asyncify.exportCallStack.push(x);try{return original.apply(null,arguments)}finally{if(!ABORT){var y=Asyncify.exportCallStack.pop();assert(y===x);Asyncify.maybeStopUnwind()}}}}else{ret[x]=original}})(x)}return ret},State:{Normal:0,Unwinding:1,Rewinding:2,Disabled:3},state:0,StackSize:128e3,currData:null,handleSleepReturnValue:0,exportCallStack:[],callStackNameToId:{},callStackIdToName:{},callStackId:0,asyncPromiseHandlers:null,sleepCallbacks:[],getCallStackId(funcName){var id=Asyncify.callStackNameToId[funcName];if(id===undefined){id=Asyncify.callStackId++;Asyncify.callStackNameToId[funcName]=id;Asyncify.callStackIdToName[id]=funcName}return id},maybeStopUnwind(){if(Asyncify.currData&&Asyncify.state===Asyncify.State.Unwinding&&Asyncify.exportCallStack.length===0){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_unwind);if(typeof Fibers!="undefined"){Fibers.trampoline()}}},whenDone(){assert(Asyncify.currData,"Tried to wait for an async operation when none is in progress.");assert(!Asyncify.asyncPromiseHandlers,"Cannot have multiple async operations in flight at once");return new Promise((resolve,reject)=>{Asyncify.asyncPromiseHandlers={resolve:resolve,reject:reject}})},allocateData(){var ptr=_malloc(12+Asyncify.StackSize);Asyncify.setDataHeader(ptr,ptr+12,Asyncify.StackSize);Asyncify.setDataRewindFunc(ptr);return ptr},setDataHeader(ptr,stack,stackSize){GROWABLE_HEAP_U32()[ptr>>2]=stack;GROWABLE_HEAP_U32()[ptr+4>>2]=stack+stackSize},setDataRewindFunc(ptr){var bottomOfCallStack=Asyncify.exportCallStack[0];var rewindId=Asyncify.getCallStackId(bottomOfCallStack);GROWABLE_HEAP_I32()[ptr+8>>2]=rewindId},getDataRewindFunc(ptr){var id=GROWABLE_HEAP_I32()[ptr+8>>2];var name=Asyncify.callStackIdToName[id];var func=wasmExports[name];return func},doRewind(ptr){var start=Asyncify.getDataRewindFunc(ptr);return start()},handleSleep(startAsync){assert(Asyncify.state!==Asyncify.State.Disabled,"Asyncify cannot be done during or after the runtime exits");if(ABORT)return;if(Asyncify.state===Asyncify.State.Normal){var reachedCallback=false;var reachedAfterCallback=false;startAsync((handleSleepReturnValue=0)=>{assert(!handleSleepReturnValue||typeof handleSleepReturnValue=="number"||typeof handleSleepReturnValue=="boolean");if(ABORT)return;Asyncify.handleSleepReturnValue=handleSleepReturnValue;reachedCallback=true;if(!reachedAfterCallback){return}assert(!Asyncify.exportCallStack.length,"Waking up (starting to rewind) must be done from JS, without compiled code on the stack.");Asyncify.state=Asyncify.State.Rewinding;runAndAbortIfError(()=>_asyncify_start_rewind(Asyncify.currData));if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.resume()}var asyncWasmReturnValue,isError=false;try{asyncWasmReturnValue=Asyncify.doRewind(Asyncify.currData)}catch(err){asyncWasmReturnValue=err;isError=true}var handled=false;if(!Asyncify.currData){var asyncPromiseHandlers=Asyncify.asyncPromiseHandlers;if(asyncPromiseHandlers){Asyncify.asyncPromiseHandlers=null;(isError?asyncPromiseHandlers.reject:asyncPromiseHandlers.resolve)(asyncWasmReturnValue);handled=true}}if(isError&&!handled){throw asyncWasmReturnValue}});reachedAfterCallback=true;if(!reachedCallback){Asyncify.state=Asyncify.State.Unwinding;Asyncify.currData=Asyncify.allocateData();if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.pause()}runAndAbortIfError(()=>_asyncify_start_unwind(Asyncify.currData))}}else if(Asyncify.state===Asyncify.State.Rewinding){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_rewind);_free(Asyncify.currData);Asyncify.currData=null;Asyncify.sleepCallbacks.forEach(func=>callUserCallback(func))}else{abort(`invalid state: ${Asyncify.state}`)}return Asyncify.handleSleepReturnValue},handleAsync(startAsync){return Asyncify.handleSleep(wakeUp=>{startAsync().then(wakeUp)})}};var getCFunc=ident=>{var func=Module["_"+ident];assert(func,"Cannot call unknown function "+ident+", make sure it is exported");return func};var writeArrayToMemory=(array,buffer)=>{assert(array.length>=0,"writeArrayToMemory array must have a length (should be an array or typed array)");GROWABLE_HEAP_I8().set(array,buffer)};var lengthBytesUTF8=str=>{var len=0;for(var i=0;i<str.length;++i){var c=str.charCodeAt(i);if(c<=127){len++}else if(c<=2047){len+=2}else if(c>=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{assert(typeof str==="string",`stringToUTF8Array expects a string (got ${typeof str})`);if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i<str.length;++i){var u=str.charCodeAt(i);if(u>=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;if(u>1114111)warnOnce("Invalid Unicode code point "+ptrToString(u)+" encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x10FFFF).");heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}}heap[outIdx]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>{assert(typeof maxBytesToWrite=="number","stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!");return stringToUTF8Array(str,GROWABLE_HEAP_U8(),outPtr,maxBytesToWrite)};var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={"string":str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},"array":arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;assert(returnType!=="array",'Return type should not be "array".');if(args){for(var i=0;i<args.length;i++){var converter=toC[argTypes[i]];if(converter){if(stack===0)stack=stackSave();cArgs[i]=converter(args[i])}else{cArgs[i]=args[i]}}}var previousAsync=Asyncify.currData;var ret=func.apply(null,cArgs);function onDone(ret){runtimeKeepalivePop();if(stack!==0)stackRestore(stack);return convertReturnValue(ret)}var asyncMode=opts&&opts.async;runtimeKeepalivePush();if(Asyncify.currData!=previousAsync){assert(!(previousAsync&&Asyncify.currData),"We cannot start an async operation when one is already flight");assert(!(previousAsync&&!Asyncify.currData),"We cannot stop an async operation in flight");assert(asyncMode,"The call to "+ident+" is running asynchronously. If this was intended, add the async option to the ccall/cwrap call.");return Asyncify.whenDone().then(onDone)}ret=onDone(ret);if(asyncMode)return Promise.resolve(ret);return ret};var cwrap=(ident,returnType,argTypes,opts)=>function(){return ccall(ident,returnType,argTypes,arguments,opts)};Module["requestFullscreen"]=Browser.requestFullscreen;Module["requestFullScreen"]=Browser.requestFullScreen;Module["requestAnimationFrame"]=Browser.requestAnimationFrame;Module["setCanvasSize"]=Browser.setCanvasSize;Module["pauseMainLoop"]=Browser.mainLoop.pause;Module["resumeMainLoop"]=Browser.mainLoop.resume;Module["getUserMedia"]=Browser.getUserMedia;Module["createContext"]=Browser.createContext;var preloadedImages={};var preloadedAudios={};function checkIncomingModuleAPI(){ignoredModuleProp("fetchSettings")}var wasmImports={__assert_fail:___assert_fail,_emscripten_get_now_is_monotonic:__emscripten_get_now_is_monotonic,abort:_abort,emscripten_asm_const_int:_emscripten_asm_const_int,emscripten_create_audio_context:_emscripten_create_audio_context,emscripten_create_wasm_audio_worklet_node:_emscripten_create_wasm_audio_worklet_node,emscripten_create_wasm_audio_worklet_processor_async:_emscripten_create_wasm_audio_worklet_processor_async,emscripten_date_now:_emscripten_date_now,emscripten_destroy_audio_context:_emscripten_destroy_audio_context,emscripten_destroy_web_audio_node:_emscripten_destroy_web_audio_node,emscripten_get_now:_emscripten_get_now,emscripten_resize_heap:_emscripten_resize_heap,emscripten_set_main_loop:_emscripten_set_main_loop,emscripten_sleep:_emscripten_sleep,emscripten_start_wasm_audio_worklet_thread_async:_emscripten_start_wasm_audio_worklet_thread_async,exit:_exit,fd_close:_fd_close,fd_seek:_fd_seek,fd_write:_fd_write,memory:wasmMemory};Asyncify.instrumentWasmImports(wasmImports);var wasmExports=createWasm();var ___wasm_call_ctors=createExportWrapper("__wasm_call_ctors");var _free=Module["_free"]=createExportWrapper("free");var _malloc=Module["_malloc"]=createExportWrapper("malloc");var _amy_start=Module["_amy_start"]=createExportWrapper("amy_start");var _amy_reset_sysclock=Module["_amy_reset_sysclock"]=createExportWrapper("amy_reset_sysclock");var _amy_play_message=Module["_amy_play_message"]=createExportWrapper("amy_play_message");var _sequencer_ticks=Module["_sequencer_ticks"]=createExportWrapper("sequencer_ticks");var ___errno_location=createExportWrapper("__errno_location");var _ma_device__on_notification_unlocked=Module["_ma_device__on_notification_unlocked"]=createExportWrapper("ma_device__on_notification_unlocked");var _ma_malloc_emscripten=Module["_ma_malloc_emscripten"]=createExportWrapper("ma_malloc_emscripten");var _ma_free_emscripten=Module["_ma_free_emscripten"]=createExportWrapper("ma_free_emscripten");var _ma_device_process_pcm_frames_capture__webaudio=Module["_ma_device_process_pcm_frames_capture__webaudio"]=createExportWrapper("ma_device_process_pcm_frames_capture__webaudio");var _ma_device_process_pcm_frames_playback__webaudio=Module["_ma_device_process_pcm_frames_playback__webaudio"]=createExportWrapper("ma_device_process_pcm_frames_playback__webaudio");var _amy_live_start=Module["_amy_live_start"]=createExportWrapper("amy_live_start");var _fflush=Module["_fflush"]=createExportWrapper("fflush");var _emscripten_stack_init=()=>(_emscripten_stack_init=wasmExports["emscripten_stack_init"])();var _emscripten_stack_get_free=()=>(_emscripten_stack_get_free=wasmExports["emscripten_stack_get_free"])();var _emscripten_stack_get_base=()=>(_emscripten_stack_get_base=wasmExports["emscripten_stack_get_base"])();var _emscripten_stack_get_end=()=>(_emscripten_stack_get_end=wasmExports["emscripten_stack_get_end"])();var _emscripten_wasm_worker_initialize=Module["_emscripten_wasm_worker_initialize"]=createExportWrapper("emscripten_wasm_worker_initialize");var stackSave=createExportWrapper("stackSave");var stackRestore=createExportWrapper("stackRestore");var stackAlloc=createExportWrapper("stackAlloc");var _emscripten_stack_get_current=()=>(_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"])();var dynCall_ii=Module["dynCall_ii"]=createExportWrapper("dynCall_ii");var dynCall_vii=Module["dynCall_vii"]=createExportWrapper("dynCall_vii");var dynCall_iiii=Module["dynCall_iiii"]=createExportWrapper("dynCall_iiii");var dynCall_iii=Module["dynCall_iii"]=createExportWrapper("dynCall_iii");var dynCall_iiiii=Module["dynCall_iiiii"]=createExportWrapper("dynCall_iiiii");var dynCall_viii=Module["dynCall_viii"]=createExportWrapper("dynCall_viii");var dynCall_viiii=Module["dynCall_viiii"]=createExportWrapper("dynCall_viiii");var dynCall_v=Module["dynCall_v"]=createExportWrapper("dynCall_v");var dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=createExportWrapper("dynCall_iiiiiiii");var dynCall_iiiji=Module["dynCall_iiiji"]=createExportWrapper("dynCall_iiiji");var dynCall_iiiiiii=Module["dynCall_iiiiiii"]=createExportWrapper("dynCall_iiiiiii");var dynCall_jii=Module["dynCall_jii"]=createExportWrapper("dynCall_jii");var dynCall_jiji=Module["dynCall_jiji"]=createExportWrapper("dynCall_jiji");var dynCall_iidiiii=Module["dynCall_iidiiii"]=createExportWrapper("dynCall_iidiiii");var _asyncify_start_unwind=createExportWrapper("asyncify_start_unwind");var _asyncify_stop_unwind=createExportWrapper("asyncify_stop_unwind");var _asyncify_start_rewind=createExportWrapper("asyncify_start_rewind");var _asyncify_stop_rewind=createExportWrapper("asyncify_stop_rewind");Module["stackAlloc"]=stackAlloc;Module["stackSave"]=stackSave;Module["stackRestore"]=stackRestore;Module["ccall"]=ccall;Module["cwrap"]=cwrap;var missingLibrarySymbols=["writeI53ToI64","writeI53ToI64Clamped","writeI53ToI64Signaling","writeI53ToU64Clamped","writeI53ToU64Signaling","readI53FromI64","readI53FromU64","convertI32PairToI53","convertU32PairToI53","zeroMemory","isLeapYear","ydayFromDate","arraySum","addDays","setErrNo","inetPton4","inetNtop4","inetPton6","inetNtop6","readSockaddr","writeSockaddr","getHostByName","initRandomFill","randomFill","getCallstack","emscriptenLog","convertPCtoSourceLocation","runMainThreadEmAsm","jstoi_q","jstoi_s","getExecutableName","listenOnce","autoResumeAudioContext","dynCallLegacy","getDynCaller","dynCall","asmjsMangle","asyncLoad","alignMemory","mmapAlloc","handleAllocatorInit","HandleAllocator","getNativeTypeSize","STACK_SIZE","STACK_ALIGN","POINTER_SIZE","ASSERTIONS","uleb128Encode","generateFuncType","convertJsFunctionToWasm","getEmptyTableSlot","updateTableMap","getFunctionAddress","addFunction","removeFunction","reallyNegative","unSign","strLen","reSign","formatString","intArrayFromString","intArrayToString","AsciiToString","stringToAscii","UTF16ToString","stringToUTF16","lengthBytesUTF16","UTF32ToString","stringToUTF32","lengthBytesUTF32","stringToNewUTF8","registerKeyEventCallback","maybeCStringToJsString","findEventTarget","findCanvasEventTarget","getBoundingClientRect","fillMouseEventData","registerMouseEventCallback","registerWheelEventCallback","registerUiEventCallback","registerFocusEventCallback","fillDeviceOrientationEventData","registerDeviceOrientationEventCallback","fillDeviceMotionEventData","registerDeviceMotionEventCallback","screenOrientation","fillOrientationChangeEventData","registerOrientationChangeEventCallback","fillFullscreenChangeEventData","registerFullscreenChangeEventCallback","JSEvents_requestFullscreen","JSEvents_resizeCanvasForFullscreen","registerRestoreOldStyle","hideEverythingExceptGivenElement","restoreHiddenElements","setLetterbox","softFullscreenResizeWebGLRenderTarget","doRequestFullscreen","fillPointerlockChangeEventData","registerPointerlockChangeEventCallback","registerPointerlockErrorEventCallback","requestPointerLock","fillVisibilityChangeEventData","registerVisibilityChangeEventCallback","registerTouchEventCallback","fillGamepadEventData","registerGamepadEventCallback","registerBeforeUnloadEventCallback","fillBatteryEventData","battery","registerBatteryEventCallback","setCanvasElementSize","getCanvasElementSize","demangle","demangleAll","jsStackTrace","stackTrace","getEnvStrings","checkWasiClock","wasiRightsToMuslOFlags","wasiOFlagsToMuslOFlags","createDyncallWrapper","setImmediateWrapped","clearImmediateWrapped","polyfillSetImmediate","getPromise","makePromise","idsToPromises","makePromiseCallback","ExceptionInfo","findMatchingCatch","getSocketFromFD","getSocketAddress","FS_createPreloadedFile","FS_modeStringToFlags","FS_getMode","FS_stdin_getChar","FS_createDataFile","FS_unlink","FS_mkdirTree","_setNetworkCallback","heapObjectForWebGLType","heapAccessShiftForWebGLHeap","webgl_enable_ANGLE_instanced_arrays","webgl_enable_OES_vertex_array_object","webgl_enable_WEBGL_draw_buffers","webgl_enable_WEBGL_multi_draw","emscriptenWebGLGet","computeUnpackAlignedImageSize","colorChannelsInGlTextureFormat","emscriptenWebGLGetTexPixelData","__glGenObject","emscriptenWebGLGetUniform","webglGetUniformLocation","webglPrepareUniformLocationsBeforeFirstUse","webglGetLeftBracePos","emscriptenWebGLGetVertexAttrib","__glGetActiveAttribOrUniform","writeGLArray","registerWebGlEventCallback","SDL_unicode","SDL_ttfContext","SDL_audio","ALLOC_NORMAL","ALLOC_STACK","allocate","writeStringToMemory","writeAsciiToMemory","_wasmWorkerPostFunction1","_wasmWorkerPostFunction2","_wasmWorkerPostFunction3","emscripten_audio_worklet_post_function_1","emscripten_audio_worklet_post_function_2","emscripten_audio_worklet_post_function_3"];missingLibrarySymbols.forEach(missingLibrarySymbol);var unexportedSymbols=["run","addOnPreRun","addOnInit","addOnPreMain","addOnExit","addOnPostRun","addRunDependency","removeRunDependency","FS_createFolder","FS_createPath","FS_createLazyFile","FS_createLink","FS_createDevice","FS_readFile","out","err","callMain","abort","wasmMemory","wasmExports","getTempRet0","setTempRet0","writeStackCookie","checkStackCookie","convertI32PairToI53Checked","ptrToString","exitJS","getHeapMax","growMemory","ENV","MONTH_DAYS_REGULAR","MONTH_DAYS_LEAP","MONTH_DAYS_REGULAR_CUMULATIVE","MONTH_DAYS_LEAP_CUMULATIVE","ERRNO_CODES","ERRNO_MESSAGES","DNS","Protocols","Sockets","timers","warnOnce","UNWIND_CACHE","readEmAsmArgsArray","readEmAsmArgs","runEmAsmFunction","handleException","keepRuntimeAlive","runtimeKeepalivePush","runtimeKeepalivePop","callUserCallback","maybeExit","wasmTable","noExitRuntime","getCFunc","sigToWasmTypes","freeTableIndexes","functionsInTableMap","setValue","getValue","PATH","PATH_FS","UTF8Decoder","UTF8ArrayToString","UTF8ToString","stringToUTF8Array","stringToUTF8","lengthBytesUTF8","UTF16Decoder","stringToUTF8OnStack","writeArrayToMemory","JSEvents","specialHTMLTargets","currentFullscreenStrategy","restoreOldWindowedStyle","ExitStatus","flush_NO_FILESYSTEM","safeSetTimeout","promiseMap","uncaughtExceptionCount","exceptionLast","exceptionCaught","Browser","setMainLoop","wget","SYSCALLS","preloadPlugins","FS_stdin_getChar_buffer","FS","MEMFS","TTY","PIPEFS","SOCKFS","tempFixedLengthArray","miniTempWebGLFloatBuffers","miniTempWebGLIntBuffers","GL","emscripten_webgl_power_preferences","AL","GLUT","EGL","GLEW","IDBStore","runAndAbortIfError","Asyncify","Fibers","SDL","SDL_gfx","allocateUTF8","allocateUTF8OnStack","_wasmWorkers","_wasmWorkersID","_wasmWorkerDelayedMessageQueue","_wasmWorkerAppendToQueue","_wasmWorkerRunPostMessage","_wasmWorkerInitializeRuntime","EmAudio","EmAudioCounter","emscriptenRegisterAudioObject","emscriptenDestroyAudioContext","emscriptenGetAudioObject","_EmAudioDispatchProcessorCallback"];unexportedSymbols.forEach(unexportedRuntimeSymbol);var calledRun;dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function stackCheckInit(){_emscripten_stack_init();writeStackCookie()}function run(){if(runDependencies>0){return}stackCheckInit();if(ENVIRONMENT_IS_WASM_WORKER){readyPromiseResolve(Module);return initRuntime()}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve(Module);if(Module["onRuntimeInitialized"])Module["onRuntimeInitialized"]();assert(!Module["_main"],'compiled without a main, but one is present. if you added it from JS, use Module["onRuntimeInitialized"]');postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}checkStackCookie()}function checkUnflushedContent(){var oldOut=out;var oldErr=err;var has=false;out=err=x=>{has=true};try{flush_NO_FILESYSTEM()}catch(e){}out=oldOut;err=oldErr;if(has){warnOnce("stdio streams had content in them that was not flushed. you should set EXIT_RUNTIME to 1 (see the Emscripten FAQ), or make sure to emit a newline when you printf etc.");warnOnce("(this may also be due to not including full filesystem support - try building with -sFORCE_FILESYSTEM)")}}if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run();
 
 
-  return moduleRtn;
+  return moduleArg.ready
 }
 );
 })();
diff --git a/www/run/amy.wasm b/www/run/amy.wasm
index f5c07383c..ee79c5dc0 100755
Binary files a/www/run/amy.wasm and b/www/run/amy.wasm differ
diff --git a/www/run/index.html b/www/run/index.html
index 7f292f399..021195227 100644
--- a/www/run/index.html
+++ b/www/run/index.html
@@ -34,7 +34,7 @@
       <script src="https://cdn.jsdelivr.net/npm/webmidi@latest/dist/iife/webmidi.iife.js"></script>
       <script src="micropython.mjs" type="module"></script>
       <script type="text/javascript" src="enable-threads.js"></script>
-      <script type="text/javascript" src="amy.js"></script>
+      <script type="text/javascript" src="amy-audioin.js"></script>
       <script type="text/javascript" src="spss.js"></script>
   </head>
 
diff --git a/www/run/micropython.data b/www/run/micropython.data
index 87b600337..5ff98cb52 100644
Binary files a/www/run/micropython.data and b/www/run/micropython.data differ
diff --git a/www/run/micropython.mjs b/www/run/micropython.mjs
index 26c896e09..1763b14ae 100644
--- a/www/run/micropython.mjs
+++ b/www/run/micropython.mjs
@@ -1,16 +1,15 @@
 
 var _createMicroPythonModule = (() => {
-  var _scriptName = import.meta.url;
+  var _scriptDir = import.meta.url;
   
   return (
 async function(moduleArg = {}) {
-  var moduleRtn;
 
 // include: shell.js
 // The Module object: Our interface to the outside world. We import
 // and export values on it. There are various ways Module can be used:
 // 1. Not defined. We create it here
-// 2. A function parameter, function(moduleArg) => Promise<Module>
+// 2. A function parameter, function(Module) { ..generated code.. }
 // 3. pre-run appended it, var Module = {}; ..generated code..
 // 4. External script tag defines var Module.
 // We need to check if Module already exists (e.g. case 3 above).
@@ -24,60 +23,31 @@ var Module = moduleArg;
 
 // Set up the promise that indicates the Module is initialized
 var readyPromiseResolve, readyPromiseReject;
-var readyPromise = new Promise((resolve, reject) => {
+Module['ready'] = new Promise((resolve, reject) => {
   readyPromiseResolve = resolve;
   readyPromiseReject = reject;
 });
-["_free","_malloc","_mp_js_init","_mp_js_repl_init","_mp_js_repl_process_char","_mp_hal_get_interrupt_char","_mp_handle_pending","_process_single_midi_byte","_mp_sched_keyboard_interrupt","_mp_js_do_exec","_mp_js_do_exec_async","_mp_js_frozen_exec","_mp_js_do_import","_mp_js_register_js_module","_proxy_c_free_obj","_proxy_c_init","_proxy_c_to_js_call","_proxy_c_to_js_delete_attr","_proxy_c_to_js_dir","_proxy_c_to_js_get_array","_proxy_c_to_js_get_dict","_proxy_c_to_js_get_iter","_proxy_c_to_js_get_type","_proxy_c_to_js_has_attr","_proxy_c_to_js_iternext","_proxy_c_to_js_lookup_attr","_proxy_c_to_js_resume","_proxy_c_to_js_store_attr","_proxy_convert_mp_to_js_obj_cside","_tulip_tick","_memory","___indirect_function_table","_proxy_convert_mp_to_js_then_js_to_mp_obj_jsside","_proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside","_js_get_proxy_js_ref_info","_has_attr","_lookup_attr","_store_attr","_call0","_call1","_call2","_calln","_call0_kwarg","_call1_kwarg","_js_reflect_construct","_js_get_iter","_js_iter_next","_js_subscr_load","_js_subscr_store","_proxy_js_free_obj","_js_check_existing","_js_get_error_info","_js_then_resolve","_js_then_reject","_js_then_continue","_create_promise","___em_lib_deps_sdlaudio","___em_lib_deps_sdlmouse","onRuntimeInitialized"].forEach((prop) => {
-  if (!Object.getOwnPropertyDescriptor(readyPromise, prop)) {
-    Object.defineProperty(readyPromise, prop, {
+["_free","_malloc","_mp_js_init","_mp_js_repl_init","_mp_js_repl_process_char","_mp_hal_get_interrupt_char","_mp_handle_pending","_process_single_midi_byte","_mp_sched_keyboard_interrupt","_mp_js_do_exec","_mp_js_do_exec_async","_mp_js_frozen_exec","_mp_js_do_import","_mp_js_register_js_module","_proxy_c_free_obj","_proxy_c_init","_proxy_c_to_js_call","_proxy_c_to_js_delete_attr","_proxy_c_to_js_dir","_proxy_c_to_js_get_array","_proxy_c_to_js_get_dict","_proxy_c_to_js_get_iter","_proxy_c_to_js_get_type","_proxy_c_to_js_has_attr","_proxy_c_to_js_iternext","_proxy_c_to_js_lookup_attr","_proxy_c_to_js_resume","_proxy_c_to_js_store_attr","_proxy_convert_mp_to_js_obj_cside","_tulip_tick","_memory","___indirect_function_table","_proxy_convert_mp_to_js_then_js_to_mp_obj_jsside","_proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside","_js_get_proxy_js_ref_info","_has_attr","_lookup_attr","_store_attr","_call0","_call1","_call2","_calln","_call0_kwarg","_call1_kwarg","_js_reflect_construct","_js_get_iter","_js_iter_next","_js_subscr_load","_js_subscr_store","_proxy_js_free_obj","_js_check_existing","_js_get_error_info","_js_then_resolve","_js_then_reject","_js_then_continue","_create_promise","_fflush","___start_em_asm","___stop_em_asm","___start_em_js","___stop_em_js","onRuntimeInitialized"].forEach((prop) => {
+  if (!Object.getOwnPropertyDescriptor(Module['ready'], prop)) {
+    Object.defineProperty(Module['ready'], prop, {
       get: () => abort('You are getting ' + prop + ' on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js'),
       set: () => abort('You are setting ' + prop + ' on the Promise object, instead of the instance. Use .then() to get called back with the instance, see the MODULARIZE docs in src/settings.js'),
     });
   }
 });
 
-// Determine the runtime environment we are in. You can customize this by
-// setting the ENVIRONMENT setting at compile time (see settings.js).
-
-// Attempt to auto-detect the environment
-var ENVIRONMENT_IS_WEB = typeof window == 'object';
-var ENVIRONMENT_IS_WORKER = typeof importScripts == 'function';
-// N.b. Electron.js environment is simultaneously a NODE-environment, but
-// also a web environment.
-var ENVIRONMENT_IS_NODE = typeof process == 'object' && typeof process.versions == 'object' && typeof process.versions.node == 'string';
-var ENVIRONMENT_IS_SHELL = !ENVIRONMENT_IS_WEB && !ENVIRONMENT_IS_NODE && !ENVIRONMENT_IS_WORKER;
-
-if (Module['ENVIRONMENT']) {
-  throw new Error('Module.ENVIRONMENT has been deprecated. To force the environment, use the ENVIRONMENT compile-time option (for example, -sENVIRONMENT=web or -sENVIRONMENT=node)');
-}
-
-if (ENVIRONMENT_IS_NODE) {
-  // `require()` is no-op in an ESM module, use `createRequire()` to construct
-  // the require()` function.  This is only necessary for multi-environment
-  // builds, `-sENVIRONMENT=node` emits a static import declaration instead.
-  // TODO: Swap all `require()`'s with `import()`'s?
-  const { createRequire } = await import('module');
-  /** @suppress{duplicate} */
-  var require = createRequire(import.meta.url);
-
-}
-
 // --pre-jses are emitted after the Module integration code, so that they can
 // refer to Module (if they choose; they can also define Module)
-// include: /var/folders/ys/g3zjs1s13z3chzx5zwnyk1bw0000gn/T/tmpub4_unxs.js
 
-  if (!Module['expectedDataFileDownloads']) {
-    Module['expectedDataFileDownloads'] = 0;
+  if (!Module.expectedDataFileDownloads) {
+    Module.expectedDataFileDownloads = 0;
   }
 
-  Module['expectedDataFileDownloads']++;
-  (() => {
+  Module.expectedDataFileDownloads++;
+  (function() {
     // Do not attempt to redownload the virtual filesystem data when in a pthread or a Wasm Worker context.
-    var isPthread = typeof ENVIRONMENT_IS_PTHREAD != 'undefined' && ENVIRONMENT_IS_PTHREAD;
-    var isWasmWorker = typeof ENVIRONMENT_IS_WASM_WORKER != 'undefined' && ENVIRONMENT_IS_WASM_WORKER;
-    if (isPthread || isWasmWorker) return;
-    function loadPackage(metadata) {
+    if (Module['ENVIRONMENT_IS_PTHREAD'] || Module['$ww']) return;
+    var loadPackage = function(metadata) {
 
       var PACKAGE_PATH = '';
       if (typeof window === 'object') {
@@ -97,7 +67,7 @@ var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];
 
       function fetchRemotePackage(packageName, packageSize, callback, errback) {
         if (typeof process === 'object' && typeof process.versions === 'object' && typeof process.versions.node === 'string') {
-          require('fs').readFile(packageName, (err, contents) => {
+          require('fs').readFile(packageName, function(err, contents) {
             if (err) {
               errback(err);
             } else {
@@ -109,40 +79,40 @@ var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];
         var xhr = new XMLHttpRequest();
         xhr.open('GET', packageName, true);
         xhr.responseType = 'arraybuffer';
-        xhr.onprogress = (event) => {
+        xhr.onprogress = function(event) {
           var url = packageName;
           var size = packageSize;
           if (event.total) size = event.total;
           if (event.loaded) {
             if (!xhr.addedTotal) {
               xhr.addedTotal = true;
-              if (!Module['dataFileDownloads']) Module['dataFileDownloads'] = {};
-              Module['dataFileDownloads'][url] = {
+              if (!Module.dataFileDownloads) Module.dataFileDownloads = {};
+              Module.dataFileDownloads[url] = {
                 loaded: event.loaded,
                 total: size
               };
             } else {
-              Module['dataFileDownloads'][url].loaded = event.loaded;
+              Module.dataFileDownloads[url].loaded = event.loaded;
             }
             var total = 0;
             var loaded = 0;
             var num = 0;
-            for (var download in Module['dataFileDownloads']) {
-            var data = Module['dataFileDownloads'][download];
+            for (var download in Module.dataFileDownloads) {
+            var data = Module.dataFileDownloads[download];
               total += data.total;
               loaded += data.loaded;
               num++;
             }
-            total = Math.ceil(total * Module['expectedDataFileDownloads']/num);
-            Module['setStatus']?.(`Downloading data... (${loaded}/${total})`);
-          } else if (!Module['dataFileDownloads']) {
-            Module['setStatus']?.('Downloading data...');
+            total = Math.ceil(total * Module.expectedDataFileDownloads/num);
+            if (Module['setStatus']) Module['setStatus'](`Downloading data... (${loaded}/${total})`);
+          } else if (!Module.dataFileDownloads) {
+            if (Module['setStatus']) Module['setStatus']('Downloading data...');
           }
         };
-        xhr.onerror = (event) => {
+        xhr.onerror = function(event) {
           throw new Error("NetworkError for: " + packageName);
         }
-        xhr.onload = (event) => {
+        xhr.onload = function(event) {
           if (xhr.status == 200 || xhr.status == 304 || xhr.status == 206 || (xhr.status == 0 && xhr.response)) { // file URLs can return 0
             var packageData = xhr.response;
             callback(packageData);
@@ -160,7 +130,7 @@ var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];
       var fetchedCallback = null;
       var fetched = Module['getPreloadedPackage'] ? Module['getPreloadedPackage'](REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE) : null;
 
-      if (!fetched) fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE, (data) => {
+      if (!fetched) fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE, function(data) {
         if (fetchedCallback) {
           fetchedCallback(data);
           fetchedCallback = null;
@@ -169,7 +139,7 @@ var REMOTE_PACKAGE_SIZE = metadata['remote_package_size'];
         }
       }, handleError);
 
-    function runWithFS(Module) {
+    function runWithFS() {
 
       function assert(check, msg) {
         if (!check) throw msg + new Error().stack;
@@ -232,9 +202,9 @@ Module['FS_createPath']("/tulip4/sys/im", "tiny_town", true, true);
       };
       Module['addRunDependency']('datafile_build-standard/tulip/obj/micropython.data');
 
-      if (!Module['preloadResults']) Module['preloadResults'] = {};
+      if (!Module.preloadResults) Module.preloadResults = {};
 
-      Module['preloadResults'][PACKAGE_NAME] = {fromCache: false};
+      Module.preloadResults[PACKAGE_NAME] = {fromCache: false};
       if (fetched) {
         processPackageData(fetched);
         fetched = null;
@@ -244,33 +214,28 @@ Module['FS_createPath']("/tulip4/sys/im", "tiny_town", true, true);
 
     }
     if (Module['calledRun']) {
-      runWithFS(Module);
+      runWithFS();
     } else {
       if (!Module['preRun']) Module['preRun'] = [];
       Module["preRun"].push(runWithFS); // FS is not initialized yet, wait for it
     }
 
     }
-    loadPackage({"files": [{"filename": "/tulip4/sys/ex/.DS_Store", "start": 0, "end": 8196}, {"filename": "/tulip4/sys/ex/ansi.py", "start": 8196, "end": 8407}, {"filename": "/tulip4/sys/ex/bcla3.wav", "start": 8407, "end": 594189, "audio": 1}, {"filename": "/tulip4/sys/ex/bunny_bounce/bunny_bounce.py", "start": 594189, "end": 601964}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_0.png", "start": 601964, "end": 604374}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_1.png", "start": 604374, "end": 606569}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_2.png", "start": 606569, "end": 608847}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_3.png", "start": 608847, "end": 611145}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_0.png", "start": 611145, "end": 613439}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_1.png", "start": 613439, "end": 615715}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_2.png", "start": 615715, "end": 617923}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_3.png", "start": 617923, "end": 620343}, {"filename": "/tulip4/sys/ex/buttons.py", "start": 620343, "end": 621959}, {"filename": "/tulip4/sys/ex/calibrate.py", "start": 621959, "end": 623326}, {"filename": "/tulip4/sys/ex/fonts.py", "start": 623326, "end": 623497}, {"filename": "/tulip4/sys/ex/g/brick.png", "start": 623497, "end": 623790}, {"filename": "/tulip4/sys/ex/g/cave.png", "start": 623790, "end": 624392}, {"filename": "/tulip4/sys/ex/g/clouds.png", "start": 624392, "end": 625222}, {"filename": "/tulip4/sys/ex/g/clouds2.png", "start": 625222, "end": 626019}, {"filename": "/tulip4/sys/ex/g/colorbars.png", "start": 626019, "end": 635860}, {"filename": "/tulip4/sys/ex/g/desert.png", "start": 635860, "end": 636498}, {"filename": "/tulip4/sys/ex/g/earth.png", "start": 636498, "end": 637341}, {"filename": "/tulip4/sys/ex/g/earth2.png", "start": 637341, "end": 638187}, {"filename": "/tulip4/sys/ex/g/fire.png", "start": 638187, "end": 639177}, {"filename": "/tulip4/sys/ex/g/flowers.png", "start": 639177, "end": 639939}, {"filename": "/tulip4/sys/ex/g/grass.png", "start": 639939, "end": 641110}, {"filename": "/tulip4/sys/ex/g/meadow.png", "start": 641110, "end": 641616}, {"filename": "/tulip4/sys/ex/g/mountain-bg.png", "start": 641616, "end": 646219}, {"filename": "/tulip4/sys/ex/g/mountain-far.png", "start": 646219, "end": 648634}, {"filename": "/tulip4/sys/ex/g/mountain.png", "start": 648634, "end": 653516}, {"filename": "/tulip4/sys/ex/g/rabbit_l_0.png", "start": 653516, "end": 655926}, {"filename": "/tulip4/sys/ex/g/rabbit_l_1.png", "start": 655926, "end": 658121}, {"filename": "/tulip4/sys/ex/g/rabbit_l_2.png", "start": 658121, "end": 660399}, {"filename": "/tulip4/sys/ex/g/rabbit_l_3.png", "start": 660399, "end": 662697}, {"filename": "/tulip4/sys/ex/g/rabbit_r_0.png", "start": 662697, "end": 664991}, {"filename": "/tulip4/sys/ex/g/rabbit_r_1.png", "start": 664991, "end": 667267}, {"filename": "/tulip4/sys/ex/g/rabbit_r_2.png", "start": 667267, "end": 669475}, {"filename": "/tulip4/sys/ex/g/rabbit_r_3.png", "start": 669475, "end": 671895}, {"filename": "/tulip4/sys/ex/g/trees-far.png", "start": 671895, "end": 677610}, {"filename": "/tulip4/sys/ex/g/trees.png", "start": 677610, "end": 684536}, {"filename": "/tulip4/sys/ex/g/tulip3.png", "start": 684536, "end": 719654}, {"filename": "/tulip4/sys/ex/g/tulipbw.png", "start": 719654, "end": 722533}, {"filename": "/tulip4/sys/ex/g/water.png", "start": 722533, "end": 723117}, {"filename": "/tulip4/sys/ex/joy.py", "start": 723117, "end": 724118}, {"filename": "/tulip4/sys/ex/my_drums.py", "start": 724118, "end": 733715}, {"filename": "/tulip4/sys/ex/my_juno6.py", "start": 733715, "end": 758495}, {"filename": "/tulip4/sys/ex/my_voices.py", "start": 758495, "end": 771784}, {"filename": "/tulip4/sys/ex/my_worldui.py", "start": 771784, "end": 775795}, {"filename": "/tulip4/sys/ex/parallax.py", "start": 775795, "end": 780000}, {"filename": "/tulip4/sys/ex/planet_boing/pix/bang_texture_03.png", "start": 780000, "end": 781945}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_00.png", "start": 781945, "end": 783785}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_01.png", "start": 783785, "end": 787290}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_02.png", "start": 787290, "end": 789500}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_03.png", "start": 789500, "end": 791580}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_04.png", "start": 791580, "end": 793768}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_05.png", "start": 793768, "end": 795763}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_06.png", "start": 795763, "end": 797879}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_07.png", "start": 797879, "end": 800024}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_08.png", "start": 800024, "end": 802176}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_09.png", "start": 802176, "end": 804289}, {"filename": "/tulip4/sys/ex/planet_boing/pix/wormhole.png", "start": 804289, "end": 805051}, {"filename": "/tulip4/sys/ex/planet_boing/planet_boing.py", "start": 805051, "end": 820957}, {"filename": "/tulip4/sys/ex/rgb332.py", "start": 820957, "end": 821295}, {"filename": "/tulip4/sys/ex/screensaver.py", "start": 821295, "end": 822311}, {"filename": "/tulip4/sys/ex/vlng3.wav", "start": 822311, "end": 1125101, "audio": 1}, {"filename": "/tulip4/sys/ex/vlsa3.wav", "start": 1125101, "end": 1583675, "audio": 1}, {"filename": "/tulip4/sys/ex/woodpiano.txt", "start": 1583675, "end": 1584258}, {"filename": "/tulip4/sys/ex/wordpad.py", "start": 1584258, "end": 1584934}, {"filename": "/tulip4/sys/ex/xanadu.py", "start": 1584934, "end": 1590525}, {"filename": "/tulip4/sys/im/tiny_town/tile_0000.png", "start": 1590525, "end": 1590624}, {"filename": "/tulip4/sys/im/tiny_town/tile_0001.png", "start": 1590624, "end": 1590768}, {"filename": "/tulip4/sys/im/tiny_town/tile_0002.png", "start": 1590768, "end": 1590942}, {"filename": "/tulip4/sys/im/tiny_town/tile_0003.png", "start": 1590942, "end": 1591118}, {"filename": "/tulip4/sys/im/tiny_town/tile_0004.png", "start": 1591118, "end": 1591293}, {"filename": "/tulip4/sys/im/tiny_town/tile_0005.png", "start": 1591293, "end": 1591490}, {"filename": "/tulip4/sys/im/tiny_town/tile_0006.png", "start": 1591490, "end": 1591670}, {"filename": "/tulip4/sys/im/tiny_town/tile_0007.png", "start": 1591670, "end": 1591827}, {"filename": "/tulip4/sys/im/tiny_town/tile_0008.png", "start": 1591827, "end": 1592012}, {"filename": "/tulip4/sys/im/tiny_town/tile_0009.png", "start": 1592012, "end": 1592192}, {"filename": "/tulip4/sys/im/tiny_town/tile_0010.png", "start": 1592192, "end": 1592349}, {"filename": "/tulip4/sys/im/tiny_town/tile_0011.png", "start": 1592349, "end": 1592534}, {"filename": "/tulip4/sys/im/tiny_town/tile_0012.png", "start": 1592534, "end": 1592695}, {"filename": "/tulip4/sys/im/tiny_town/tile_0013.png", "start": 1592695, "end": 1592848}, {"filename": "/tulip4/sys/im/tiny_town/tile_0014.png", "start": 1592848, "end": 1593009}, {"filename": "/tulip4/sys/im/tiny_town/tile_0015.png", "start": 1593009, "end": 1593202}, {"filename": "/tulip4/sys/im/tiny_town/tile_0016.png", "start": 1593202, "end": 1593394}, {"filename": "/tulip4/sys/im/tiny_town/tile_0017.png", "start": 1593394, "end": 1593571}, {"filename": "/tulip4/sys/im/tiny_town/tile_0018.png", "start": 1593571, "end": 1593734}, {"filename": "/tulip4/sys/im/tiny_town/tile_0019.png", "start": 1593734, "end": 1593934}, {"filename": "/tulip4/sys/im/tiny_town/tile_0020.png", "start": 1593934, "end": 1594100}, {"filename": "/tulip4/sys/im/tiny_town/tile_0021.png", "start": 1594100, "end": 1594263}, {"filename": "/tulip4/sys/im/tiny_town/tile_0022.png", "start": 1594263, "end": 1594463}, {"filename": "/tulip4/sys/im/tiny_town/tile_0023.png", "start": 1594463, "end": 1594629}, {"filename": "/tulip4/sys/im/tiny_town/tile_0024.png", "start": 1594629, "end": 1594764}, {"filename": "/tulip4/sys/im/tiny_town/tile_0025.png", "start": 1594764, "end": 1594863}, {"filename": "/tulip4/sys/im/tiny_town/tile_0026.png", "start": 1594863, "end": 1595003}, {"filename": "/tulip4/sys/im/tiny_town/tile_0027.png", "start": 1595003, "end": 1595194}, {"filename": "/tulip4/sys/im/tiny_town/tile_0028.png", "start": 1595194, "end": 1595385}, {"filename": "/tulip4/sys/im/tiny_town/tile_0029.png", "start": 1595385, "end": 1595582}, {"filename": "/tulip4/sys/im/tiny_town/tile_0030.png", "start": 1595582, "end": 1595763}, {"filename": "/tulip4/sys/im/tiny_town/tile_0031.png", "start": 1595763, "end": 1595920}, {"filename": "/tulip4/sys/im/tiny_town/tile_0032.png", "start": 1595920, "end": 1596101}, {"filename": "/tulip4/sys/im/tiny_town/tile_0033.png", "start": 1596101, "end": 1596282}, {"filename": "/tulip4/sys/im/tiny_town/tile_0034.png", "start": 1596282, "end": 1596439}, {"filename": "/tulip4/sys/im/tiny_town/tile_0035.png", "start": 1596439, "end": 1596620}, {"filename": "/tulip4/sys/im/tiny_town/tile_0036.png", "start": 1596620, "end": 1596774}, {"filename": "/tulip4/sys/im/tiny_town/tile_0037.png", "start": 1596774, "end": 1596908}, {"filename": "/tulip4/sys/im/tiny_town/tile_0038.png", "start": 1596908, "end": 1597057}, {"filename": "/tulip4/sys/im/tiny_town/tile_0039.png", "start": 1597057, "end": 1597186}, {"filename": "/tulip4/sys/im/tiny_town/tile_0040.png", "start": 1597186, "end": 1597325}, {"filename": "/tulip4/sys/im/tiny_town/tile_0041.png", "start": 1597325, "end": 1597451}, {"filename": "/tulip4/sys/im/tiny_town/tile_0042.png", "start": 1597451, "end": 1597588}, {"filename": "/tulip4/sys/im/tiny_town/tile_0043.png", "start": 1597588, "end": 1597754}, {"filename": "/tulip4/sys/im/tiny_town/tile_0044.png", "start": 1597754, "end": 1597918}, {"filename": "/tulip4/sys/im/tiny_town/tile_0045.png", "start": 1597918, "end": 1598096}, {"filename": "/tulip4/sys/im/tiny_town/tile_0046.png", "start": 1598096, "end": 1598260}, {"filename": "/tulip4/sys/im/tiny_town/tile_0047.png", "start": 1598260, "end": 1598404}, {"filename": "/tulip4/sys/im/tiny_town/tile_0048.png", "start": 1598404, "end": 1598570}, {"filename": "/tulip4/sys/im/tiny_town/tile_0049.png", "start": 1598570, "end": 1598731}, {"filename": "/tulip4/sys/im/tiny_town/tile_0050.png", "start": 1598731, "end": 1598902}, {"filename": "/tulip4/sys/im/tiny_town/tile_0051.png", "start": 1598902, "end": 1599069}, {"filename": "/tulip4/sys/im/tiny_town/tile_0052.png", "start": 1599069, "end": 1599249}, {"filename": "/tulip4/sys/im/tiny_town/tile_0053.png", "start": 1599249, "end": 1599427}, {"filename": "/tulip4/sys/im/tiny_town/tile_0054.png", "start": 1599427, "end": 1599608}, {"filename": "/tulip4/sys/im/tiny_town/tile_0055.png", "start": 1599608, "end": 1599796}, {"filename": "/tulip4/sys/im/tiny_town/tile_0056.png", "start": 1599796, "end": 1599942}, {"filename": "/tulip4/sys/im/tiny_town/tile_0057.png", "start": 1599942, "end": 1600155}, {"filename": "/tulip4/sys/im/tiny_town/tile_0058.png", "start": 1600155, "end": 1600302}, {"filename": "/tulip4/sys/im/tiny_town/tile_0059.png", "start": 1600302, "end": 1600447}, {"filename": "/tulip4/sys/im/tiny_town/tile_0060.png", "start": 1600447, "end": 1600624}, {"filename": "/tulip4/sys/im/tiny_town/tile_0061.png", "start": 1600624, "end": 1600790}, {"filename": "/tulip4/sys/im/tiny_town/tile_0062.png", "start": 1600790, "end": 1600974}, {"filename": "/tulip4/sys/im/tiny_town/tile_0063.png", "start": 1600974, "end": 1601190}, {"filename": "/tulip4/sys/im/tiny_town/tile_0064.png", "start": 1601190, "end": 1601367}, {"filename": "/tulip4/sys/im/tiny_town/tile_0065.png", "start": 1601367, "end": 1601533}, {"filename": "/tulip4/sys/im/tiny_town/tile_0066.png", "start": 1601533, "end": 1601717}, {"filename": "/tulip4/sys/im/tiny_town/tile_0067.png", "start": 1601717, "end": 1601933}, {"filename": "/tulip4/sys/im/tiny_town/tile_0068.png", "start": 1601933, "end": 1602097}, {"filename": "/tulip4/sys/im/tiny_town/tile_0069.png", "start": 1602097, "end": 1602268}, {"filename": "/tulip4/sys/im/tiny_town/tile_0070.png", "start": 1602268, "end": 1602433}, {"filename": "/tulip4/sys/im/tiny_town/tile_0071.png", "start": 1602433, "end": 1602572}, {"filename": "/tulip4/sys/im/tiny_town/tile_0072.png", "start": 1602572, "end": 1602709}, {"filename": "/tulip4/sys/im/tiny_town/tile_0073.png", "start": 1602709, "end": 1602838}, {"filename": "/tulip4/sys/im/tiny_town/tile_0074.png", "start": 1602838, "end": 1602993}, {"filename": "/tulip4/sys/im/tiny_town/tile_0075.png", "start": 1602993, "end": 1603136}, {"filename": "/tulip4/sys/im/tiny_town/tile_0076.png", "start": 1603136, "end": 1603273}, {"filename": "/tulip4/sys/im/tiny_town/tile_0077.png", "start": 1603273, "end": 1603402}, {"filename": "/tulip4/sys/im/tiny_town/tile_0078.png", "start": 1603402, "end": 1603557}, {"filename": "/tulip4/sys/im/tiny_town/tile_0079.png", "start": 1603557, "end": 1603700}, {"filename": "/tulip4/sys/im/tiny_town/tile_0080.png", "start": 1603700, "end": 1603860}, {"filename": "/tulip4/sys/im/tiny_town/tile_0081.png", "start": 1603860, "end": 1603980}, {"filename": "/tulip4/sys/im/tiny_town/tile_0082.png", "start": 1603980, "end": 1604142}, {"filename": "/tulip4/sys/im/tiny_town/tile_0083.png", "start": 1604142, "end": 1604331}, {"filename": "/tulip4/sys/im/tiny_town/tile_0084.png", "start": 1604331, "end": 1604501}, {"filename": "/tulip4/sys/im/tiny_town/tile_0085.png", "start": 1604501, "end": 1604664}, {"filename": "/tulip4/sys/im/tiny_town/tile_0086.png", "start": 1604664, "end": 1604825}, {"filename": "/tulip4/sys/im/tiny_town/tile_0087.png", "start": 1604825, "end": 1604986}, {"filename": "/tulip4/sys/im/tiny_town/tile_0088.png", "start": 1604986, "end": 1605153}, {"filename": "/tulip4/sys/im/tiny_town/tile_0089.png", "start": 1605153, "end": 1605316}, {"filename": "/tulip4/sys/im/tiny_town/tile_0090.png", "start": 1605316, "end": 1605477}, {"filename": "/tulip4/sys/im/tiny_town/tile_0091.png", "start": 1605477, "end": 1605639}, {"filename": "/tulip4/sys/im/tiny_town/tile_0092.png", "start": 1605639, "end": 1605797}, {"filename": "/tulip4/sys/im/tiny_town/tile_0093.png", "start": 1605797, "end": 1605959}, {"filename": "/tulip4/sys/im/tiny_town/tile_0094.png", "start": 1605959, "end": 1606144}, {"filename": "/tulip4/sys/im/tiny_town/tile_0095.png", "start": 1606144, "end": 1606339}, {"filename": "/tulip4/sys/im/tiny_town/tile_0096.png", "start": 1606339, "end": 1606528}, {"filename": "/tulip4/sys/im/tiny_town/tile_0097.png", "start": 1606528, "end": 1606667}, {"filename": "/tulip4/sys/im/tiny_town/tile_0098.png", "start": 1606667, "end": 1606848}, {"filename": "/tulip4/sys/im/tiny_town/tile_0099.png", "start": 1606848, "end": 1607019}, {"filename": "/tulip4/sys/im/tiny_town/tile_0100.png", "start": 1607019, "end": 1607151}, {"filename": "/tulip4/sys/im/tiny_town/tile_0101.png", "start": 1607151, "end": 1607325}, {"filename": "/tulip4/sys/im/tiny_town/tile_0102.png", "start": 1607325, "end": 1607500}, {"filename": "/tulip4/sys/im/tiny_town/tile_0103.png", "start": 1607500, "end": 1607696}, {"filename": "/tulip4/sys/im/tiny_town/tile_0104.png", "start": 1607696, "end": 1607938}, {"filename": "/tulip4/sys/im/tiny_town/tile_0105.png", "start": 1607938, "end": 1608136}, {"filename": "/tulip4/sys/im/tiny_town/tile_0106.png", "start": 1608136, "end": 1608314}, {"filename": "/tulip4/sys/im/tiny_town/tile_0107.png", "start": 1608314, "end": 1608516}, {"filename": "/tulip4/sys/im/tiny_town/tile_0108.png", "start": 1608516, "end": 1608638}, {"filename": "/tulip4/sys/im/tiny_town/tile_0109.png", "start": 1608638, "end": 1608737}, {"filename": "/tulip4/sys/im/tiny_town/tile_0110.png", "start": 1608737, "end": 1608858}, {"filename": "/tulip4/sys/im/tiny_town/tile_0111.png", "start": 1608858, "end": 1609061}, {"filename": "/tulip4/sys/im/tiny_town/tile_0112.png", "start": 1609061, "end": 1609264}, {"filename": "/tulip4/sys/im/tiny_town/tile_0113.png", "start": 1609264, "end": 1609464}, {"filename": "/tulip4/sys/im/tiny_town/tile_0114.png", "start": 1609464, "end": 1609664}, {"filename": "/tulip4/sys/im/tiny_town/tile_0115.png", "start": 1609664, "end": 1609850}, {"filename": "/tulip4/sys/im/tiny_town/tile_0116.png", "start": 1609850, "end": 1610034}, {"filename": "/tulip4/sys/im/tiny_town/tile_0117.png", "start": 1610034, "end": 1610206}, {"filename": "/tulip4/sys/im/tiny_town/tile_0118.png", "start": 1610206, "end": 1610389}, {"filename": "/tulip4/sys/im/tiny_town/tile_0119.png", "start": 1610389, "end": 1610572}, {"filename": "/tulip4/sys/im/tiny_town/tile_0120.png", "start": 1610572, "end": 1610730}, {"filename": "/tulip4/sys/im/tiny_town/tile_0121.png", "start": 1610730, "end": 1610871}, {"filename": "/tulip4/sys/im/tiny_town/tile_0122.png", "start": 1610871, "end": 1611034}, {"filename": "/tulip4/sys/im/tiny_town/tile_0123.png", "start": 1611034, "end": 1611179}, {"filename": "/tulip4/sys/im/tiny_town/tile_0124.png", "start": 1611179, "end": 1611325}, {"filename": "/tulip4/sys/im/tiny_town/tile_0125.png", "start": 1611325, "end": 1611511}, {"filename": "/tulip4/sys/im/tiny_town/tile_0126.png", "start": 1611511, "end": 1611669}, {"filename": "/tulip4/sys/im/tiny_town/tile_0127.png", "start": 1611669, "end": 1611853}, {"filename": "/tulip4/sys/im/tiny_town/tile_0128.png", "start": 1611853, "end": 1612053}, {"filename": "/tulip4/sys/im/tiny_town/tile_0129.png", "start": 1612053, "end": 1612253}, {"filename": "/tulip4/sys/im/tiny_town/tile_0130.png", "start": 1612253, "end": 1612459}, {"filename": "/tulip4/sys/im/tiny_town/tile_0131.png", "start": 1612459, "end": 1612679}], "remote_package_size": 1612679});
+    loadPackage({"files": [{"filename": "/tulip4/sys/ex/ansi.py", "start": 0, "end": 211}, {"filename": "/tulip4/sys/ex/bcla3.wav", "start": 211, "end": 585993, "audio": 1}, {"filename": "/tulip4/sys/ex/bunny_bounce/bunny_bounce.py", "start": 585993, "end": 593768}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_0.png", "start": 593768, "end": 596178}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_1.png", "start": 596178, "end": 598373}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_2.png", "start": 598373, "end": 600651}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_l_3.png", "start": 600651, "end": 602949}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_0.png", "start": 602949, "end": 605243}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_1.png", "start": 605243, "end": 607519}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_2.png", "start": 607519, "end": 609727}, {"filename": "/tulip4/sys/ex/bunny_bounce/pix/rabbit_r_3.png", "start": 609727, "end": 612147}, {"filename": "/tulip4/sys/ex/buttons.py", "start": 612147, "end": 613763}, {"filename": "/tulip4/sys/ex/calibrate.py", "start": 613763, "end": 615130}, {"filename": "/tulip4/sys/ex/fonts.py", "start": 615130, "end": 615301}, {"filename": "/tulip4/sys/ex/g/brick.png", "start": 615301, "end": 615594}, {"filename": "/tulip4/sys/ex/g/cave.png", "start": 615594, "end": 616196}, {"filename": "/tulip4/sys/ex/g/clouds.png", "start": 616196, "end": 617026}, {"filename": "/tulip4/sys/ex/g/clouds2.png", "start": 617026, "end": 617823}, {"filename": "/tulip4/sys/ex/g/colorbars.png", "start": 617823, "end": 627664}, {"filename": "/tulip4/sys/ex/g/desert.png", "start": 627664, "end": 628302}, {"filename": "/tulip4/sys/ex/g/earth.png", "start": 628302, "end": 629145}, {"filename": "/tulip4/sys/ex/g/earth2.png", "start": 629145, "end": 629991}, {"filename": "/tulip4/sys/ex/g/fire.png", "start": 629991, "end": 630981}, {"filename": "/tulip4/sys/ex/g/flowers.png", "start": 630981, "end": 631743}, {"filename": "/tulip4/sys/ex/g/grass.png", "start": 631743, "end": 632914}, {"filename": "/tulip4/sys/ex/g/meadow.png", "start": 632914, "end": 633420}, {"filename": "/tulip4/sys/ex/g/mountain-bg.png", "start": 633420, "end": 638023}, {"filename": "/tulip4/sys/ex/g/mountain-far.png", "start": 638023, "end": 640438}, {"filename": "/tulip4/sys/ex/g/mountain.png", "start": 640438, "end": 645320}, {"filename": "/tulip4/sys/ex/g/rabbit_l_0.png", "start": 645320, "end": 647730}, {"filename": "/tulip4/sys/ex/g/rabbit_l_1.png", "start": 647730, "end": 649925}, {"filename": "/tulip4/sys/ex/g/rabbit_l_2.png", "start": 649925, "end": 652203}, {"filename": "/tulip4/sys/ex/g/rabbit_l_3.png", "start": 652203, "end": 654501}, {"filename": "/tulip4/sys/ex/g/rabbit_r_0.png", "start": 654501, "end": 656795}, {"filename": "/tulip4/sys/ex/g/rabbit_r_1.png", "start": 656795, "end": 659071}, {"filename": "/tulip4/sys/ex/g/rabbit_r_2.png", "start": 659071, "end": 661279}, {"filename": "/tulip4/sys/ex/g/rabbit_r_3.png", "start": 661279, "end": 663699}, {"filename": "/tulip4/sys/ex/g/trees-far.png", "start": 663699, "end": 669414}, {"filename": "/tulip4/sys/ex/g/trees.png", "start": 669414, "end": 676340}, {"filename": "/tulip4/sys/ex/g/tulip3.png", "start": 676340, "end": 711458}, {"filename": "/tulip4/sys/ex/g/tulipbw.png", "start": 711458, "end": 714337}, {"filename": "/tulip4/sys/ex/g/water.png", "start": 714337, "end": 714921}, {"filename": "/tulip4/sys/ex/joy.py", "start": 714921, "end": 715922}, {"filename": "/tulip4/sys/ex/my_drums.py", "start": 715922, "end": 724860}, {"filename": "/tulip4/sys/ex/my_juno6.py", "start": 724860, "end": 749640}, {"filename": "/tulip4/sys/ex/my_voices.py", "start": 749640, "end": 762929}, {"filename": "/tulip4/sys/ex/my_worldui.py", "start": 762929, "end": 766940}, {"filename": "/tulip4/sys/ex/parallax.py", "start": 766940, "end": 771145}, {"filename": "/tulip4/sys/ex/planet_boing/pix/bang_texture_03.png", "start": 771145, "end": 773090}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_00.png", "start": 773090, "end": 774930}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_01.png", "start": 774930, "end": 778435}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_02.png", "start": 778435, "end": 780645}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_03.png", "start": 780645, "end": 782725}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_04.png", "start": 782725, "end": 784913}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_05.png", "start": 784913, "end": 786908}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_06.png", "start": 786908, "end": 789024}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_07.png", "start": 789024, "end": 791169}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_08.png", "start": 791169, "end": 793321}, {"filename": "/tulip4/sys/ex/planet_boing/pix/blob_texture_09.png", "start": 793321, "end": 795434}, {"filename": "/tulip4/sys/ex/planet_boing/pix/wormhole.png", "start": 795434, "end": 796196}, {"filename": "/tulip4/sys/ex/planet_boing/planet_boing.py", "start": 796196, "end": 812102}, {"filename": "/tulip4/sys/ex/rgb332.py", "start": 812102, "end": 812440}, {"filename": "/tulip4/sys/ex/screensaver.py", "start": 812440, "end": 813456}, {"filename": "/tulip4/sys/ex/vlng3.wav", "start": 813456, "end": 1116246, "audio": 1}, {"filename": "/tulip4/sys/ex/vlsa3.wav", "start": 1116246, "end": 1574820, "audio": 1}, {"filename": "/tulip4/sys/ex/woodpiano.txt", "start": 1574820, "end": 1575403}, {"filename": "/tulip4/sys/ex/wordpad.py", "start": 1575403, "end": 1576079}, {"filename": "/tulip4/sys/ex/xanadu.py", "start": 1576079, "end": 1581670}, {"filename": "/tulip4/sys/im/tiny_town/tile_0000.png", "start": 1581670, "end": 1581769}, {"filename": "/tulip4/sys/im/tiny_town/tile_0001.png", "start": 1581769, "end": 1581913}, {"filename": "/tulip4/sys/im/tiny_town/tile_0002.png", "start": 1581913, "end": 1582087}, {"filename": "/tulip4/sys/im/tiny_town/tile_0003.png", "start": 1582087, "end": 1582263}, {"filename": "/tulip4/sys/im/tiny_town/tile_0004.png", "start": 1582263, "end": 1582438}, {"filename": "/tulip4/sys/im/tiny_town/tile_0005.png", "start": 1582438, "end": 1582635}, {"filename": "/tulip4/sys/im/tiny_town/tile_0006.png", "start": 1582635, "end": 1582815}, {"filename": "/tulip4/sys/im/tiny_town/tile_0007.png", "start": 1582815, "end": 1582972}, {"filename": "/tulip4/sys/im/tiny_town/tile_0008.png", "start": 1582972, "end": 1583157}, {"filename": "/tulip4/sys/im/tiny_town/tile_0009.png", "start": 1583157, "end": 1583337}, {"filename": "/tulip4/sys/im/tiny_town/tile_0010.png", "start": 1583337, "end": 1583494}, {"filename": "/tulip4/sys/im/tiny_town/tile_0011.png", "start": 1583494, "end": 1583679}, {"filename": "/tulip4/sys/im/tiny_town/tile_0012.png", "start": 1583679, "end": 1583840}, {"filename": "/tulip4/sys/im/tiny_town/tile_0013.png", "start": 1583840, "end": 1583993}, {"filename": "/tulip4/sys/im/tiny_town/tile_0014.png", "start": 1583993, "end": 1584154}, {"filename": "/tulip4/sys/im/tiny_town/tile_0015.png", "start": 1584154, "end": 1584347}, {"filename": "/tulip4/sys/im/tiny_town/tile_0016.png", "start": 1584347, "end": 1584539}, {"filename": "/tulip4/sys/im/tiny_town/tile_0017.png", "start": 1584539, "end": 1584716}, {"filename": "/tulip4/sys/im/tiny_town/tile_0018.png", "start": 1584716, "end": 1584879}, {"filename": "/tulip4/sys/im/tiny_town/tile_0019.png", "start": 1584879, "end": 1585079}, {"filename": "/tulip4/sys/im/tiny_town/tile_0020.png", "start": 1585079, "end": 1585245}, {"filename": "/tulip4/sys/im/tiny_town/tile_0021.png", "start": 1585245, "end": 1585408}, {"filename": "/tulip4/sys/im/tiny_town/tile_0022.png", "start": 1585408, "end": 1585608}, {"filename": "/tulip4/sys/im/tiny_town/tile_0023.png", "start": 1585608, "end": 1585774}, {"filename": "/tulip4/sys/im/tiny_town/tile_0024.png", "start": 1585774, "end": 1585909}, {"filename": "/tulip4/sys/im/tiny_town/tile_0025.png", "start": 1585909, "end": 1586008}, {"filename": "/tulip4/sys/im/tiny_town/tile_0026.png", "start": 1586008, "end": 1586148}, {"filename": "/tulip4/sys/im/tiny_town/tile_0027.png", "start": 1586148, "end": 1586339}, {"filename": "/tulip4/sys/im/tiny_town/tile_0028.png", "start": 1586339, "end": 1586530}, {"filename": "/tulip4/sys/im/tiny_town/tile_0029.png", "start": 1586530, "end": 1586727}, {"filename": "/tulip4/sys/im/tiny_town/tile_0030.png", "start": 1586727, "end": 1586908}, {"filename": "/tulip4/sys/im/tiny_town/tile_0031.png", "start": 1586908, "end": 1587065}, {"filename": "/tulip4/sys/im/tiny_town/tile_0032.png", "start": 1587065, "end": 1587246}, {"filename": "/tulip4/sys/im/tiny_town/tile_0033.png", "start": 1587246, "end": 1587427}, {"filename": "/tulip4/sys/im/tiny_town/tile_0034.png", "start": 1587427, "end": 1587584}, {"filename": "/tulip4/sys/im/tiny_town/tile_0035.png", "start": 1587584, "end": 1587765}, {"filename": "/tulip4/sys/im/tiny_town/tile_0036.png", "start": 1587765, "end": 1587919}, {"filename": "/tulip4/sys/im/tiny_town/tile_0037.png", "start": 1587919, "end": 1588053}, {"filename": "/tulip4/sys/im/tiny_town/tile_0038.png", "start": 1588053, "end": 1588202}, {"filename": "/tulip4/sys/im/tiny_town/tile_0039.png", "start": 1588202, "end": 1588331}, {"filename": "/tulip4/sys/im/tiny_town/tile_0040.png", "start": 1588331, "end": 1588470}, {"filename": "/tulip4/sys/im/tiny_town/tile_0041.png", "start": 1588470, "end": 1588596}, {"filename": "/tulip4/sys/im/tiny_town/tile_0042.png", "start": 1588596, "end": 1588733}, {"filename": "/tulip4/sys/im/tiny_town/tile_0043.png", "start": 1588733, "end": 1588899}, {"filename": "/tulip4/sys/im/tiny_town/tile_0044.png", "start": 1588899, "end": 1589063}, {"filename": "/tulip4/sys/im/tiny_town/tile_0045.png", "start": 1589063, "end": 1589241}, {"filename": "/tulip4/sys/im/tiny_town/tile_0046.png", "start": 1589241, "end": 1589405}, {"filename": "/tulip4/sys/im/tiny_town/tile_0047.png", "start": 1589405, "end": 1589549}, {"filename": "/tulip4/sys/im/tiny_town/tile_0048.png", "start": 1589549, "end": 1589715}, {"filename": "/tulip4/sys/im/tiny_town/tile_0049.png", "start": 1589715, "end": 1589876}, {"filename": "/tulip4/sys/im/tiny_town/tile_0050.png", "start": 1589876, "end": 1590047}, {"filename": "/tulip4/sys/im/tiny_town/tile_0051.png", "start": 1590047, "end": 1590214}, {"filename": "/tulip4/sys/im/tiny_town/tile_0052.png", "start": 1590214, "end": 1590394}, {"filename": "/tulip4/sys/im/tiny_town/tile_0053.png", "start": 1590394, "end": 1590572}, {"filename": "/tulip4/sys/im/tiny_town/tile_0054.png", "start": 1590572, "end": 1590753}, {"filename": "/tulip4/sys/im/tiny_town/tile_0055.png", "start": 1590753, "end": 1590941}, {"filename": "/tulip4/sys/im/tiny_town/tile_0056.png", "start": 1590941, "end": 1591087}, {"filename": "/tulip4/sys/im/tiny_town/tile_0057.png", "start": 1591087, "end": 1591300}, {"filename": "/tulip4/sys/im/tiny_town/tile_0058.png", "start": 1591300, "end": 1591447}, {"filename": "/tulip4/sys/im/tiny_town/tile_0059.png", "start": 1591447, "end": 1591592}, {"filename": "/tulip4/sys/im/tiny_town/tile_0060.png", "start": 1591592, "end": 1591769}, {"filename": "/tulip4/sys/im/tiny_town/tile_0061.png", "start": 1591769, "end": 1591935}, {"filename": "/tulip4/sys/im/tiny_town/tile_0062.png", "start": 1591935, "end": 1592119}, {"filename": "/tulip4/sys/im/tiny_town/tile_0063.png", "start": 1592119, "end": 1592335}, {"filename": "/tulip4/sys/im/tiny_town/tile_0064.png", "start": 1592335, "end": 1592512}, {"filename": "/tulip4/sys/im/tiny_town/tile_0065.png", "start": 1592512, "end": 1592678}, {"filename": "/tulip4/sys/im/tiny_town/tile_0066.png", "start": 1592678, "end": 1592862}, {"filename": "/tulip4/sys/im/tiny_town/tile_0067.png", "start": 1592862, "end": 1593078}, {"filename": "/tulip4/sys/im/tiny_town/tile_0068.png", "start": 1593078, "end": 1593242}, {"filename": "/tulip4/sys/im/tiny_town/tile_0069.png", "start": 1593242, "end": 1593413}, {"filename": "/tulip4/sys/im/tiny_town/tile_0070.png", "start": 1593413, "end": 1593578}, {"filename": "/tulip4/sys/im/tiny_town/tile_0071.png", "start": 1593578, "end": 1593717}, {"filename": "/tulip4/sys/im/tiny_town/tile_0072.png", "start": 1593717, "end": 1593854}, {"filename": "/tulip4/sys/im/tiny_town/tile_0073.png", "start": 1593854, "end": 1593983}, {"filename": "/tulip4/sys/im/tiny_town/tile_0074.png", "start": 1593983, "end": 1594138}, {"filename": "/tulip4/sys/im/tiny_town/tile_0075.png", "start": 1594138, "end": 1594281}, {"filename": "/tulip4/sys/im/tiny_town/tile_0076.png", "start": 1594281, "end": 1594418}, {"filename": "/tulip4/sys/im/tiny_town/tile_0077.png", "start": 1594418, "end": 1594547}, {"filename": "/tulip4/sys/im/tiny_town/tile_0078.png", "start": 1594547, "end": 1594702}, {"filename": "/tulip4/sys/im/tiny_town/tile_0079.png", "start": 1594702, "end": 1594845}, {"filename": "/tulip4/sys/im/tiny_town/tile_0080.png", "start": 1594845, "end": 1595005}, {"filename": "/tulip4/sys/im/tiny_town/tile_0081.png", "start": 1595005, "end": 1595125}, {"filename": "/tulip4/sys/im/tiny_town/tile_0082.png", "start": 1595125, "end": 1595287}, {"filename": "/tulip4/sys/im/tiny_town/tile_0083.png", "start": 1595287, "end": 1595476}, {"filename": "/tulip4/sys/im/tiny_town/tile_0084.png", "start": 1595476, "end": 1595646}, {"filename": "/tulip4/sys/im/tiny_town/tile_0085.png", "start": 1595646, "end": 1595809}, {"filename": "/tulip4/sys/im/tiny_town/tile_0086.png", "start": 1595809, "end": 1595970}, {"filename": "/tulip4/sys/im/tiny_town/tile_0087.png", "start": 1595970, "end": 1596131}, {"filename": "/tulip4/sys/im/tiny_town/tile_0088.png", "start": 1596131, "end": 1596298}, {"filename": "/tulip4/sys/im/tiny_town/tile_0089.png", "start": 1596298, "end": 1596461}, {"filename": "/tulip4/sys/im/tiny_town/tile_0090.png", "start": 1596461, "end": 1596622}, {"filename": "/tulip4/sys/im/tiny_town/tile_0091.png", "start": 1596622, "end": 1596784}, {"filename": "/tulip4/sys/im/tiny_town/tile_0092.png", "start": 1596784, "end": 1596942}, {"filename": "/tulip4/sys/im/tiny_town/tile_0093.png", "start": 1596942, "end": 1597104}, {"filename": "/tulip4/sys/im/tiny_town/tile_0094.png", "start": 1597104, "end": 1597289}, {"filename": "/tulip4/sys/im/tiny_town/tile_0095.png", "start": 1597289, "end": 1597484}, {"filename": "/tulip4/sys/im/tiny_town/tile_0096.png", "start": 1597484, "end": 1597673}, {"filename": "/tulip4/sys/im/tiny_town/tile_0097.png", "start": 1597673, "end": 1597812}, {"filename": "/tulip4/sys/im/tiny_town/tile_0098.png", "start": 1597812, "end": 1597993}, {"filename": "/tulip4/sys/im/tiny_town/tile_0099.png", "start": 1597993, "end": 1598164}, {"filename": "/tulip4/sys/im/tiny_town/tile_0100.png", "start": 1598164, "end": 1598296}, {"filename": "/tulip4/sys/im/tiny_town/tile_0101.png", "start": 1598296, "end": 1598470}, {"filename": "/tulip4/sys/im/tiny_town/tile_0102.png", "start": 1598470, "end": 1598645}, {"filename": "/tulip4/sys/im/tiny_town/tile_0103.png", "start": 1598645, "end": 1598841}, {"filename": "/tulip4/sys/im/tiny_town/tile_0104.png", "start": 1598841, "end": 1599083}, {"filename": "/tulip4/sys/im/tiny_town/tile_0105.png", "start": 1599083, "end": 1599281}, {"filename": "/tulip4/sys/im/tiny_town/tile_0106.png", "start": 1599281, "end": 1599459}, {"filename": "/tulip4/sys/im/tiny_town/tile_0107.png", "start": 1599459, "end": 1599661}, {"filename": "/tulip4/sys/im/tiny_town/tile_0108.png", "start": 1599661, "end": 1599783}, {"filename": "/tulip4/sys/im/tiny_town/tile_0109.png", "start": 1599783, "end": 1599882}, {"filename": "/tulip4/sys/im/tiny_town/tile_0110.png", "start": 1599882, "end": 1600003}, {"filename": "/tulip4/sys/im/tiny_town/tile_0111.png", "start": 1600003, "end": 1600206}, {"filename": "/tulip4/sys/im/tiny_town/tile_0112.png", "start": 1600206, "end": 1600409}, {"filename": "/tulip4/sys/im/tiny_town/tile_0113.png", "start": 1600409, "end": 1600609}, {"filename": "/tulip4/sys/im/tiny_town/tile_0114.png", "start": 1600609, "end": 1600809}, {"filename": "/tulip4/sys/im/tiny_town/tile_0115.png", "start": 1600809, "end": 1600995}, {"filename": "/tulip4/sys/im/tiny_town/tile_0116.png", "start": 1600995, "end": 1601179}, {"filename": "/tulip4/sys/im/tiny_town/tile_0117.png", "start": 1601179, "end": 1601351}, {"filename": "/tulip4/sys/im/tiny_town/tile_0118.png", "start": 1601351, "end": 1601534}, {"filename": "/tulip4/sys/im/tiny_town/tile_0119.png", "start": 1601534, "end": 1601717}, {"filename": "/tulip4/sys/im/tiny_town/tile_0120.png", "start": 1601717, "end": 1601875}, {"filename": "/tulip4/sys/im/tiny_town/tile_0121.png", "start": 1601875, "end": 1602016}, {"filename": "/tulip4/sys/im/tiny_town/tile_0122.png", "start": 1602016, "end": 1602179}, {"filename": "/tulip4/sys/im/tiny_town/tile_0123.png", "start": 1602179, "end": 1602324}, {"filename": "/tulip4/sys/im/tiny_town/tile_0124.png", "start": 1602324, "end": 1602470}, {"filename": "/tulip4/sys/im/tiny_town/tile_0125.png", "start": 1602470, "end": 1602656}, {"filename": "/tulip4/sys/im/tiny_town/tile_0126.png", "start": 1602656, "end": 1602814}, {"filename": "/tulip4/sys/im/tiny_town/tile_0127.png", "start": 1602814, "end": 1602998}, {"filename": "/tulip4/sys/im/tiny_town/tile_0128.png", "start": 1602998, "end": 1603198}, {"filename": "/tulip4/sys/im/tiny_town/tile_0129.png", "start": 1603198, "end": 1603398}, {"filename": "/tulip4/sys/im/tiny_town/tile_0130.png", "start": 1603398, "end": 1603604}, {"filename": "/tulip4/sys/im/tiny_town/tile_0131.png", "start": 1603604, "end": 1603824}], "remote_package_size": 1603824});
 
   })();
 
-// end include: /var/folders/ys/g3zjs1s13z3chzx5zwnyk1bw0000gn/T/tmpub4_unxs.js
-// include: /var/folders/ys/g3zjs1s13z3chzx5zwnyk1bw0000gn/T/tmpozo9hvlk.js
 
     // All the pre-js content up to here must remain later on, we need to run
     // it.
-    if (Module['$ww'] || (typeof ENVIRONMENT_IS_PTHREAD != 'undefined' && ENVIRONMENT_IS_PTHREAD)) Module['preRun'] = [];
+    if (Module['ENVIRONMENT_IS_PTHREAD'] || Module['$ww']) Module['preRun'] = [];
     var necessaryPreJSTasks = Module['preRun'].slice();
-  // end include: /var/folders/ys/g3zjs1s13z3chzx5zwnyk1bw0000gn/T/tmpozo9hvlk.js
-// include: /var/folders/ys/g3zjs1s13z3chzx5zwnyk1bw0000gn/T/tmpwi308vqv.js
-
+  
     if (!Module['preRun']) throw 'Module.preRun should exist because file support used it; did a pre-js delete it?';
-    necessaryPreJSTasks.forEach((task) => {
+    necessaryPreJSTasks.forEach(function(task) {
       if (Module['preRun'].indexOf(task) < 0) throw 'All preRun tasks that exist before user pre-js code should remain after; did you replace Module or modify Module.preRun?';
     });
-  // end include: /var/folders/ys/g3zjs1s13z3chzx5zwnyk1bw0000gn/T/tmpwi308vqv.js
-
+  
 
 // Sometimes an existing Module object exists with properties
 // meant to overwrite the default module functionality. Here
@@ -285,6 +250,21 @@ var quit_ = (status, toThrow) => {
   throw toThrow;
 };
 
+// Determine the runtime environment we are in. You can customize this by
+// setting the ENVIRONMENT setting at compile time (see settings.js).
+
+// Attempt to auto-detect the environment
+var ENVIRONMENT_IS_WEB = typeof window == 'object';
+var ENVIRONMENT_IS_WORKER = typeof importScripts == 'function';
+// N.b. Electron.js environment is simultaneously a NODE-environment, but
+// also a web environment.
+var ENVIRONMENT_IS_NODE = typeof process == 'object' && typeof process.versions == 'object' && typeof process.versions.node == 'string';
+var ENVIRONMENT_IS_SHELL = !ENVIRONMENT_IS_WEB && !ENVIRONMENT_IS_NODE && !ENVIRONMENT_IS_WORKER;
+
+if (Module['ENVIRONMENT']) {
+  throw new Error('Module.ENVIRONMENT has been deprecated. To force the environment, use the ENVIRONMENT compile-time option (for example, -sENVIRONMENT=web or -sENVIRONMENT=node)');
+}
+
 // `/` should be present at the end if `scriptDirectory` is not empty
 var scriptDirectory = '';
 function locateFile(path) {
@@ -295,7 +275,9 @@ function locateFile(path) {
 }
 
 // Hooks that are implemented differently in different runtime environments.
-var readAsync, readBinary;
+var read_,
+    readAsync,
+    readBinary;
 
 if (ENVIRONMENT_IS_NODE) {
   if (typeof process == 'undefined' || !process.release || process.release.name !== 'node') throw new Error('not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)');
@@ -308,34 +290,50 @@ if (ENVIRONMENT_IS_NODE) {
     throw new Error('This emscripten-generated code requires node v16.0.0 (detected v' + nodeVersion + ')');
   }
 
+  // `require()` is no-op in an ESM module, use `createRequire()` to construct
+  // the require()` function.  This is only necessary for multi-environment
+  // builds, `-sENVIRONMENT=node` emits a static import declaration instead.
+  // TODO: Swap all `require()`'s with `import()`'s?
+  const { createRequire } = await import('module');
+  /** @suppress{duplicate} */
+  var require = createRequire(import.meta.url);
   // These modules will usually be used on Node.js. Load them eagerly to avoid
   // the complexity of lazy-loading.
   var fs = require('fs');
   var nodePath = require('path');
 
-  // EXPORT_ES6 + ENVIRONMENT_IS_NODE always requires use of import.meta.url,
-  // since there's no way getting the current absolute path of the module when
-  // support for that is not available.
-  scriptDirectory = require('url').fileURLToPath(new URL('./', import.meta.url)); // includes trailing slash
+  if (ENVIRONMENT_IS_WORKER) {
+    scriptDirectory = nodePath.dirname(scriptDirectory) + '/';
+  } else {
+    // EXPORT_ES6 + ENVIRONMENT_IS_NODE always requires use of import.meta.url,
+    // since there's no way getting the current absolute path of the module when
+    // support for that is not available.
+    scriptDirectory = require('url').fileURLToPath(new URL('./', import.meta.url)); // includes trailing slash
+  }
 
 // include: node_shell_read.js
-readBinary = (filename) => {
+read_ = (filename, binary) => {
   // We need to re-wrap `file://` strings to URLs. Normalizing isn't
   // necessary in that case, the path should already be absolute.
   filename = isFileURI(filename) ? new URL(filename) : nodePath.normalize(filename);
-  var ret = fs.readFileSync(filename);
+  return fs.readFileSync(filename, binary ? undefined : 'utf8');
+};
+
+readBinary = (filename) => {
+  var ret = read_(filename, true);
+  if (!ret.buffer) {
+    ret = new Uint8Array(ret);
+  }
   assert(ret.buffer);
   return ret;
 };
 
-readAsync = (filename, binary = true) => {
-  // See the comment in the `readBinary` function.
+readAsync = (filename, onload, onerror, binary = true) => {
+  // See the comment in the `read_` function.
   filename = isFileURI(filename) ? new URL(filename) : nodePath.normalize(filename);
-  return new Promise((resolve, reject) => {
-    fs.readFile(filename, binary ? undefined : 'utf8', (err, data) => {
-      if (err) reject(err);
-      else resolve(binary ? data.buffer : data);
-    });
+  fs.readFile(filename, binary ? undefined : 'utf8', (err, data) => {
+    if (err) onerror(err);
+    else onload(binary ? data.buffer : data);
   });
 };
 // end include: node_shell_read.js
@@ -352,11 +350,77 @@ readAsync = (filename, binary = true) => {
     throw toThrow;
   };
 
+  Module['inspect'] = () => '[Emscripten Module object]';
+
 } else
 if (ENVIRONMENT_IS_SHELL) {
 
   if ((typeof process == 'object' && typeof require === 'function') || typeof window == 'object' || typeof importScripts == 'function') throw new Error('not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)');
 
+  if (typeof read != 'undefined') {
+    read_ = read;
+  }
+
+  readBinary = (f) => {
+    if (typeof readbuffer == 'function') {
+      return new Uint8Array(readbuffer(f));
+    }
+    let data = read(f, 'binary');
+    assert(typeof data == 'object');
+    return data;
+  };
+
+  readAsync = (f, onload, onerror) => {
+    setTimeout(() => onload(readBinary(f)));
+  };
+
+  if (typeof clearTimeout == 'undefined') {
+    globalThis.clearTimeout = (id) => {};
+  }
+
+  if (typeof setTimeout == 'undefined') {
+    // spidermonkey lacks setTimeout but we use it above in readAsync.
+    globalThis.setTimeout = (f) => (typeof f == 'function') ? f() : abort();
+  }
+
+  if (typeof scriptArgs != 'undefined') {
+    arguments_ = scriptArgs;
+  } else if (typeof arguments != 'undefined') {
+    arguments_ = arguments;
+  }
+
+  if (typeof quit == 'function') {
+    quit_ = (status, toThrow) => {
+      // Unlike node which has process.exitCode, d8 has no such mechanism. So we
+      // have no way to set the exit code and then let the program exit with
+      // that code when it naturally stops running (say, when all setTimeouts
+      // have completed). For that reason, we must call `quit` - the only way to
+      // set the exit code - but quit also halts immediately.  To increase
+      // consistency with node (and the web) we schedule the actual quit call
+      // using a setTimeout to give the current stack and any exception handlers
+      // a chance to run.  This enables features such as addOnPostRun (which
+      // expected to be able to run code after main returns).
+      setTimeout(() => {
+        if (!(toThrow instanceof ExitStatus)) {
+          let toLog = toThrow;
+          if (toThrow && typeof toThrow == 'object' && toThrow.stack) {
+            toLog = [toThrow, toThrow.stack];
+          }
+          err(`exiting due to exception: ${toLog}`);
+        }
+        quit(status);
+      });
+      throw toThrow;
+    };
+  }
+
+  if (typeof print != 'undefined') {
+    // Prefer to use print/printErr where they exist, as they usually work better.
+    if (typeof console == 'undefined') console = /** @type{!Console} */({});
+    console.log = /** @type{!function(this:Console, ...*): undefined} */ (print);
+    console.warn = console.error = /** @type{!function(this:Console, ...*): undefined} */ (typeof printErr != 'undefined' ? printErr : print);
+  }
+
 } else
 
 // Note that this includes Node.js workers when relevant (pthreads is enabled).
@@ -370,8 +434,8 @@ if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) {
   }
   // When MODULARIZE, this JS may be executed later, after document.currentScript
   // is gone, so we saved it, and we use it here instead of any other info.
-  if (_scriptName) {
-    scriptDirectory = _scriptName;
+  if (_scriptDir) {
+    scriptDirectory = _scriptDir;
   }
   // blob urls look like blob:http://site.com/etc/etc and we cannot infer anything from them.
   // otherwise, slice off the final part of the url to find the script directory.
@@ -379,17 +443,26 @@ if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) {
   // and scriptDirectory will correctly be replaced with an empty string.
   // If scriptDirectory contains a query (starting with ?) or a fragment (starting with #),
   // they are removed because they could contain a slash.
-  if (scriptDirectory.startsWith('blob:')) {
-    scriptDirectory = '';
+  if (scriptDirectory.indexOf('blob:') !== 0) {
+    scriptDirectory = scriptDirectory.substr(0, scriptDirectory.replace(/[?#].*/, "").lastIndexOf('/')+1);
   } else {
-    scriptDirectory = scriptDirectory.substr(0, scriptDirectory.replace(/[?#].*/, '').lastIndexOf('/')+1);
+    scriptDirectory = '';
   }
 
   if (!(typeof window == 'object' || typeof importScripts == 'function')) throw new Error('not compiled for this environment (did you build to HTML and try to run it not on the web, or set ENVIRONMENT to something - like node - and run it someplace else - like on the web?)');
 
+  // Differentiate the Web Worker from the Node Worker case, as reading must
+  // be done differently.
   {
 // include: web_or_worker_shell_read.js
-if (ENVIRONMENT_IS_WORKER) {
+read_ = (url) => {
+    var xhr = new XMLHttpRequest();
+    xhr.open('GET', url, false);
+    xhr.send(null);
+    return xhr.responseText;
+  }
+
+  if (ENVIRONMENT_IS_WORKER) {
     readBinary = (url) => {
       var xhr = new XMLHttpRequest();
       xhr.open('GET', url, false);
@@ -399,35 +472,21 @@ if (ENVIRONMENT_IS_WORKER) {
     };
   }
 
-  readAsync = (url) => {
-    // Fetch has some additional restrictions over XHR, like it can't be used on a file:// url.
-    // See https://github.com/github/fetch/pull/92#issuecomment-140665932
-    // Cordova or Electron apps are typically loaded from a file:// url.
-    // So use XHR on webview if URL is a file URL.
-    if (isFileURI(url)) {
-      return new Promise((resolve, reject) => {
-        var xhr = new XMLHttpRequest();
-        xhr.open('GET', url, true);
-        xhr.responseType = 'arraybuffer';
-        xhr.onload = () => {
-          if (xhr.status == 200 || (xhr.status == 0 && xhr.response)) { // file URLs can return 0
-            resolve(xhr.response);
-            return;
-          }
-          reject(xhr.status);
-        };
-        xhr.onerror = reject;
-        xhr.send(null);
-      });
-    }
-    return fetch(url, { credentials: 'same-origin' })
-      .then((response) => {
-        if (response.ok) {
-          return response.arrayBuffer();
-        }
-        return Promise.reject(new Error(response.status + ' : ' + response.url));
-      })
-  };
+  readAsync = (url, onload, onerror) => {
+    var xhr = new XMLHttpRequest();
+    xhr.open('GET', url, true);
+    xhr.responseType = 'arraybuffer';
+    xhr.onload = () => {
+      if (xhr.status == 200 || (xhr.status == 0 && xhr.response)) { // file URLs can return 0
+        onload(xhr.response);
+        return;
+      }
+      onerror();
+    };
+    xhr.onerror = onerror;
+    xhr.send(null);
+  }
+
 // end include: web_or_worker_shell_read.js
   }
 } else
@@ -441,7 +500,7 @@ var err = Module['printErr'] || console.error.bind(console);
 // Merge back in the overrides
 Object.assign(Module, moduleOverrides);
 // Free the object hierarchy contained in the overrides, this lets the GC
-// reclaim data used.
+// reclaim data used e.g. in memoryInitializerRequest, which is a large typed array.
 moduleOverrides = null;
 checkIncomingModuleAPI();
 
@@ -454,18 +513,21 @@ if (Module['arguments']) arguments_ = Module['arguments'];legacyModuleProp('argu
 
 if (Module['thisProgram']) thisProgram = Module['thisProgram'];legacyModuleProp('thisProgram', 'thisProgram');
 
+if (Module['quit']) quit_ = Module['quit'];legacyModuleProp('quit', 'quit_');
+
 // perform assertions in shell.js after we set up out() and err(), as otherwise if an assertion fails it cannot print the message
 // Assertions on removed incoming Module JS APIs.
 assert(typeof Module['memoryInitializerPrefixURL'] == 'undefined', 'Module.memoryInitializerPrefixURL option was removed, use Module.locateFile instead');
 assert(typeof Module['pthreadMainPrefixURL'] == 'undefined', 'Module.pthreadMainPrefixURL option was removed, use Module.locateFile instead');
 assert(typeof Module['cdInitializerPrefixURL'] == 'undefined', 'Module.cdInitializerPrefixURL option was removed, use Module.locateFile instead');
 assert(typeof Module['filePackagePrefixURL'] == 'undefined', 'Module.filePackagePrefixURL option was removed, use Module.locateFile instead');
-assert(typeof Module['read'] == 'undefined', 'Module.read option was removed');
+assert(typeof Module['read'] == 'undefined', 'Module.read option was removed (modify read_ in JS)');
 assert(typeof Module['readAsync'] == 'undefined', 'Module.readAsync option was removed (modify readAsync in JS)');
 assert(typeof Module['readBinary'] == 'undefined', 'Module.readBinary option was removed (modify readBinary in JS)');
 assert(typeof Module['setWindowTitle'] == 'undefined', 'Module.setWindowTitle option was removed (modify emscripten_set_window_title in JS)');
 assert(typeof Module['TOTAL_MEMORY'] == 'undefined', 'Module.TOTAL_MEMORY has been renamed Module.INITIAL_MEMORY');
 legacyModuleProp('asm', 'wasmExports');
+legacyModuleProp('read', 'read_');
 legacyModuleProp('readAsync', 'readAsync');
 legacyModuleProp('readBinary', 'readBinary');
 legacyModuleProp('setWindowTitle', 'setWindowTitle');
@@ -479,10 +541,10 @@ var OPFS = 'OPFS is no longer included by default; build with -lopfs.js';
 
 var NODEFS = 'NODEFS is no longer included by default; build with -lnodefs.js';
 
-assert(!ENVIRONMENT_IS_SHELL, 'shell environment detected but not enabled at build time.  Add `shell` to `-sENVIRONMENT` to enable.');
+assert(!ENVIRONMENT_IS_SHELL, "shell environment detected but not enabled at build time.  Add 'shell' to `-sENVIRONMENT` to enable.");
 
-// end include: shell.js
 
+// end include: shell.js
 // include: preamble.js
 // === Preamble library stuff ===
 
@@ -494,10 +556,11 @@ assert(!ENVIRONMENT_IS_SHELL, 'shell environment detected but not enabled at bui
 // An online HTML version (which may be of a different version of Emscripten)
 //    is up at http://kripken.github.io/emscripten-site/docs/api_reference/preamble.js.html
 
-var wasmBinary = Module['wasmBinary'];legacyModuleProp('wasmBinary', 'wasmBinary');
+var wasmBinary; 
+if (Module['wasmBinary']) wasmBinary = Module['wasmBinary'];legacyModuleProp('wasmBinary', 'wasmBinary');
 
 if (typeof WebAssembly != 'object') {
-  err('no native wasm support detected');
+  abort('no native wasm support detected');
 }
 
 // include: base64Utils.js
@@ -577,7 +640,6 @@ var HEAP,
 /** @type {!Float64Array} */
   HEAPF64;
 
-// include: runtime_shared.js
 function updateMemoryViews() {
   var b = wasmMemory.buffer;
   Module['HEAP8'] = HEAP8 = new Int8Array(b);
@@ -590,7 +652,6 @@ function updateMemoryViews() {
   Module['HEAPF64'] = HEAPF64 = new Float64Array(b);
 }
 
-// end include: runtime_shared.js
 assert(!Module['STACK_SIZE'], 'STACK_SIZE can no longer be set at runtime.  Use -sSTACK_SIZE at link time')
 
 assert(typeof Int32Array != 'undefined' && typeof Float64Array !== 'undefined' && Int32Array.prototype.subarray != undefined && Int32Array.prototype.set != undefined,
@@ -638,6 +699,16 @@ function checkStackCookie() {
   }
 }
 // end include: runtime_stack_check.js
+// include: runtime_assertions.js
+// Endianness check
+(function() {
+  var h16 = new Int16Array(1);
+  var h8 = new Int8Array(h16.buffer);
+  h16[0] = 0x6373;
+  if (h8[0] !== 0x73 || h8[1] !== 0x63) throw 'Runtime error: expected the system to be little-endian! (Run with -sSUPPORT_BIG_ENDIAN to bypass)';
+})();
+
+// end include: runtime_assertions.js
 var __ATPRERUN__  = []; // functions called before the runtime is initialized
 var __ATINIT__    = []; // functions called during startup
 var __ATEXIT__    = []; // functions called during shutdown
@@ -662,7 +733,7 @@ function initRuntime() {
   checkStackCookie();
 
   
-if (!Module['noFSInit'] && !FS.initialized)
+if (!Module["noFSInit"] && !FS.init.initialized)
   FS.init();
 FS.ignorePermissions = false;
 
@@ -735,7 +806,9 @@ function getUniqueRunDependency(id) {
 function addRunDependency(id) {
   runDependencies++;
 
-  Module['monitorRunDependencies']?.(runDependencies);
+  if (Module['monitorRunDependencies']) {
+    Module['monitorRunDependencies'](runDependencies);
+  }
 
   if (id) {
     assert(!runDependencyTracking[id]);
@@ -769,7 +842,9 @@ function addRunDependency(id) {
 function removeRunDependency(id) {
   runDependencies--;
 
-  Module['monitorRunDependencies']?.(runDependencies);
+  if (Module['monitorRunDependencies']) {
+    Module['monitorRunDependencies'](runDependencies);
+  }
 
   if (id) {
     assert(runDependencyTracking[id]);
@@ -792,7 +867,9 @@ function removeRunDependency(id) {
 
 /** @param {string|number=} what */
 function abort(what) {
-  Module['onAbort']?.(what);
+  if (Module['onAbort']) {
+    Module['onAbort'](what);
+  }
 
   what = 'Aborted(' + what + ')';
   // TODO(sbc): Should we remove printing and leave it up to whoever
@@ -800,6 +877,7 @@ function abort(what) {
   err(what);
 
   ABORT = true;
+  EXITSTATUS = 1;
 
   if (what.indexOf('RuntimeError: unreachable') >= 0) {
     what += '. "unreachable" may be due to ASYNCIFY_STACK_SIZE not being large enough (try increasing it)';
@@ -815,7 +893,7 @@ function abort(what) {
   // allows this in the wasm spec.
 
   // Suppress closure compiler warning here. Closure compiler's builtin extern
-  // definition for WebAssembly.RuntimeError claims it takes no arguments even
+  // defintion for WebAssembly.RuntimeError claims it takes no arguments even
   // though it can.
   // TODO(https://github.com/google/closure-compiler/pull/3913): Remove if/when upstream closure gets fixed.
   /** @suppress {checkTypes} */
@@ -846,33 +924,28 @@ var isDataURI = (filename) => filename.startsWith(dataURIPrefix);
  */
 var isFileURI = (filename) => filename.startsWith('file://');
 // end include: URIUtils.js
-function createExportWrapper(name, nargs) {
-  return (...args) => {
+function createExportWrapper(name) {
+  return function() {
     assert(runtimeInitialized, `native function \`${name}\` called before runtime initialization`);
     var f = wasmExports[name];
     assert(f, `exported native function \`${name}\` not found`);
-    // Only assert for too many arguments. Too few can be valid since the missing arguments will be zero filled.
-    assert(args.length <= nargs, `native function \`${name}\` called with ${args.length} args but expects ${nargs}`);
-    return f(...args);
+    return f.apply(null, arguments);
   };
 }
 
 // include: runtime_exceptions.js
 // end include: runtime_exceptions.js
-function findWasmBinary() {
-  if (Module['locateFile']) {
-    var f = 'micropython.wasm';
-    if (!isDataURI(f)) {
-      return locateFile(f);
-    }
-    return f;
+var wasmBinaryFile;
+if (Module['locateFile']) {
+  wasmBinaryFile = 'micropython.wasm';
+  if (!isDataURI(wasmBinaryFile)) {
+    wasmBinaryFile = locateFile(wasmBinaryFile);
   }
+} else {
   // Use bundler-friendly `new URL(..., import.meta.url)` pattern; works in browsers too.
-  return new URL('micropython.wasm', import.meta.url).href;
+  wasmBinaryFile = new URL('micropython.wasm', import.meta.url).href;
 }
 
-var wasmBinaryFile;
-
 function getBinarySync(file) {
   if (file == wasmBinaryFile && wasmBinary) {
     return new Uint8Array(wasmBinary);
@@ -880,19 +953,33 @@ function getBinarySync(file) {
   if (readBinary) {
     return readBinary(file);
   }
-  throw 'both async and sync fetching of the wasm failed';
+  throw "both async and sync fetching of the wasm failed";
 }
 
 function getBinaryPromise(binaryFile) {
-  // If we don't have the binary yet, load it asynchronously using readAsync.
+  // If we don't have the binary yet, try to load it asynchronously.
+  // Fetch has some additional restrictions over XHR, like it can't be used on a file:// url.
+  // See https://github.com/github/fetch/pull/92#issuecomment-140665932
+  // Cordova or Electron apps are typically loaded from a file:// url.
+  // So use fetch if it is available and the url is not a file, otherwise fall back to XHR.
   if (!wasmBinary
-      ) {
-    // Fetch the binary using readAsync
-    return readAsync(binaryFile).then(
-      (response) => new Uint8Array(/** @type{!ArrayBuffer} */(response)),
-      // Fall back to getBinarySync if readAsync fails
-      () => getBinarySync(binaryFile)
-    );
+      && (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER)) {
+    if (typeof fetch == 'function'
+      && !isFileURI(binaryFile)
+    ) {
+      return fetch(binaryFile, { credentials: 'same-origin' }).then((response) => {
+        if (!response['ok']) {
+          throw "failed to load wasm binary file at '" + binaryFile + "'";
+        }
+        return response['arrayBuffer']();
+      }).catch(() => getBinarySync(binaryFile));
+    }
+    else if (readAsync) {
+      // fetch is not available or url is file => try XHR (readAsync uses XHR internally)
+      return new Promise((resolve, reject) => {
+        readAsync(binaryFile, (response) => resolve(new Uint8Array(/** @type{!ArrayBuffer} */(response))), reject)
+      });
+    }
   }
 
   // Otherwise, getBinarySync should be able to get it synchronously
@@ -902,6 +989,8 @@ function getBinaryPromise(binaryFile) {
 function instantiateArrayBuffer(binaryFile, imports, receiver) {
   return getBinaryPromise(binaryFile).then((binary) => {
     return WebAssembly.instantiate(binary, imports);
+  }).then((instance) => {
+    return instance;
   }).then(receiver, (reason) => {
     err(`failed to asynchronously prepare wasm: ${reason}`);
 
@@ -949,22 +1038,14 @@ function instantiateAsync(binary, binaryFile, imports, callback) {
   return instantiateArrayBuffer(binaryFile, imports, callback);
 }
 
-function getWasmImports() {
-  // instrumenting imports is used in asyncify in two ways: to add assertions
-  // that check for proper import use, and for ASYNCIFY=2 we use them to set up
-  // the Promise API on the import side.
-  Asyncify.instrumentWasmImports(wasmImports);
-  // prepare imports
-  return {
-    'env': wasmImports,
-    'wasi_snapshot_preview1': wasmImports,
-  }
-}
-
 // Create the wasm instance.
 // Receives the wasm imports, returns the exports.
 function createWasm() {
-  var info = getWasmImports();
+  // prepare imports
+  var info = {
+    'env': wasmImports,
+    'wasi_snapshot_preview1': wasmImports,
+  };
   // Load the wasm module and create an instance of using native support in the JS engine.
   // handle a generated wasm instance, receiving its exports and
   // performing other necessary setup
@@ -978,12 +1059,16 @@ function createWasm() {
 
     wasmMemory = wasmExports['memory'];
     
-    assert(wasmMemory, 'memory not found in wasm exports');
+    assert(wasmMemory, "memory not found in wasm exports");
+    // This assertion doesn't hold when emscripten is run in --post-link
+    // mode.
+    // TODO(sbc): Read INITIAL_MEMORY out of the wasm file in post-link mode.
+    //assert(wasmMemory.buffer.byteLength === 134217728);
     updateMemoryViews();
 
     wasmTable = wasmExports['__indirect_function_table'];
     
-    assert(wasmTable, 'table not found in wasm exports');
+    assert(wasmTable, "table not found in wasm exports");
 
     addOnInit(wasmExports['__wasm_call_ctors']);
 
@@ -1015,6 +1100,7 @@ function createWasm() {
   // Also pthreads and wasm workers initialize the wasm instance through this
   // path.
   if (Module['instantiateWasm']) {
+
     try {
       return Module['instantiateWasm'](info, receiveInstance);
     } catch(e) {
@@ -1024,8 +1110,6 @@ function createWasm() {
     }
   }
 
-  if (!wasmBinaryFile) wasmBinaryFile = findWasmBinary();
-
   // If instantiation fails, reject the module ready promise.
   instantiateAsync(wasmBinary, wasmBinaryFile, info, receiveInstantiationResult).catch(readyPromiseReject);
   return {}; // no exports yet; we'll fill them in later
@@ -1036,20 +1120,12 @@ var tempDouble;
 var tempI64;
 
 // include: runtime_debug.js
-// Endianness check
-(() => {
-  var h16 = new Int16Array(1);
-  var h8 = new Int8Array(h16.buffer);
-  h16[0] = 0x6373;
-  if (h8[0] !== 0x73 || h8[1] !== 0x63) throw 'Runtime error: expected the system to be little-endian! (Run with -sSUPPORT_BIG_ENDIAN to bypass)';
-})();
-
-function legacyModuleProp(prop, newName, incoming=true) {
+function legacyModuleProp(prop, newName, incomming=true) {
   if (!Object.getOwnPropertyDescriptor(Module, prop)) {
     Object.defineProperty(Module, prop, {
       configurable: true,
       get() {
-        let extra = incoming ? ' (the initial value can be provided on Module, but after startup the value is only looked for on a local variable of that name)' : '';
+        let extra = incomming ? ' (the initial value can be provided on Module, but after startup the value is only looked for on a local variable of that name)' : '';
         abort(`\`Module.${prop}\` has been replaced by \`${newName}\`` + extra);
 
       }
@@ -1077,7 +1153,7 @@ function isExportedByForceFilesystem(name) {
 }
 
 function missingGlobal(sym, msg) {
-  if (typeof globalThis != 'undefined') {
+  if (typeof globalThis !== 'undefined') {
     Object.defineProperty(globalThis, sym, {
       configurable: true,
       get() {
@@ -1092,7 +1168,7 @@ missingGlobal('buffer', 'Please use HEAP8.buffer or wasmMemory.buffer');
 missingGlobal('asm', 'Please use wasmExports instead');
 
 function missingLibrarySymbol(sym) {
-  if (typeof globalThis != 'undefined' && !Object.getOwnPropertyDescriptor(globalThis, sym)) {
+  if (typeof globalThis !== 'undefined' && !Object.getOwnPropertyDescriptor(globalThis, sym)) {
     Object.defineProperty(globalThis, sym, {
       configurable: true,
       get() {
@@ -1115,7 +1191,7 @@ function missingLibrarySymbol(sym) {
       }
     });
   }
-  // Any symbol that is not included from the JS library is also (by definition)
+  // Any symbol that is not included from the JS libary is also (by definition)
   // not exported on the Module object.
   unexportedRuntimeSymbol(sym);
 }
@@ -1136,33 +1212,33 @@ function unexportedRuntimeSymbol(sym) {
 }
 
 // Used by XXXXX_DEBUG settings to output debug messages.
-function dbg(...args) {
+function dbg(text) {
   // TODO(sbc): Make this configurable somehow.  Its not always convenient for
   // logging to show up as warnings.
-  console.warn(...args);
+  console.warn.apply(console, arguments);
 }
 // end include: runtime_debug.js
 // === Body ===
 
 var ASM_CONSTS = {
-  67601776: () => { try { FS.mkdir('/tulip4/user'); } catch (err) { console.log('tulip4/user already exist'); } FS.mount(IDBFS, {autoPersist:true}, '/tulip4/user'); FS.syncfs(true, function (err) { }); },  
- 67601958: ($0, $1) => { if(midiOutputDevice != null) { midiOutputDevice.send(HEAPU8.subarray($0, $0 + $1)); } },  
- 67602044: ($0) => { var str = UTF8ToString($0) + '\n\n' + 'Abort/Retry/Ignore/AlwaysIgnore? [ariA] :'; var reply = window.prompt(str, "i"); if (reply === null) { reply = "i"; } return allocate(intArrayFromString(reply), 'i8', ALLOC_NORMAL); },  
- 67602269: () => { if (typeof(AudioContext) !== 'undefined') { return true; } else if (typeof(webkitAudioContext) !== 'undefined') { return true; } return false; },  
- 67602416: () => { if ((typeof(navigator.mediaDevices) !== 'undefined') && (typeof(navigator.mediaDevices.getUserMedia) !== 'undefined')) { return true; } else if (typeof(navigator.webkitGetUserMedia) !== 'undefined') { return true; } return false; },  
- 67602650: ($0) => { if(typeof(Module['SDL2']) === 'undefined') { Module['SDL2'] = {}; } var SDL2 = Module['SDL2']; if (!$0) { SDL2.audio = {}; } else { SDL2.capture = {}; } if (!SDL2.audioContext) { if (typeof(AudioContext) !== 'undefined') { SDL2.audioContext = new AudioContext(); } else if (typeof(webkitAudioContext) !== 'undefined') { SDL2.audioContext = new webkitAudioContext(); } if (SDL2.audioContext) { autoResumeAudioContext(SDL2.audioContext); } } return SDL2.audioContext === undefined ? -1 : 0; },  
- 67603143: () => { var SDL2 = Module['SDL2']; return SDL2.audioContext.sampleRate; },  
- 67603211: ($0, $1, $2, $3) => { var SDL2 = Module['SDL2']; var have_microphone = function(stream) { if (SDL2.capture.silenceTimer !== undefined) { clearTimeout(SDL2.capture.silenceTimer); SDL2.capture.silenceTimer = undefined; } SDL2.capture.mediaStreamNode = SDL2.audioContext.createMediaStreamSource(stream); SDL2.capture.scriptProcessorNode = SDL2.audioContext.createScriptProcessor($1, $0, 1); SDL2.capture.scriptProcessorNode.onaudioprocess = function(audioProcessingEvent) { if ((SDL2 === undefined) || (SDL2.capture === undefined)) { return; } audioProcessingEvent.outputBuffer.getChannelData(0).fill(0.0); SDL2.capture.currentCaptureBuffer = audioProcessingEvent.inputBuffer; dynCall('vi', $2, [$3]); }; SDL2.capture.mediaStreamNode.connect(SDL2.capture.scriptProcessorNode); SDL2.capture.scriptProcessorNode.connect(SDL2.audioContext.destination); SDL2.capture.stream = stream; }; var no_microphone = function(error) { }; SDL2.capture.silenceBuffer = SDL2.audioContext.createBuffer($0, $1, SDL2.audioContext.sampleRate); SDL2.capture.silenceBuffer.getChannelData(0).fill(0.0); var silence_callback = function() { SDL2.capture.currentCaptureBuffer = SDL2.capture.silenceBuffer; dynCall('vi', $2, [$3]); }; SDL2.capture.silenceTimer = setTimeout(silence_callback, ($1 / SDL2.audioContext.sampleRate) * 1000); if ((navigator.mediaDevices !== undefined) && (navigator.mediaDevices.getUserMedia !== undefined)) { navigator.mediaDevices.getUserMedia({ audio: true, video: false }).then(have_microphone).catch(no_microphone); } else if (navigator.webkitGetUserMedia !== undefined) { navigator.webkitGetUserMedia({ audio: true, video: false }, have_microphone, no_microphone); } },  
- 67604863: ($0, $1, $2, $3) => { var SDL2 = Module['SDL2']; SDL2.audio.scriptProcessorNode = SDL2.audioContext['createScriptProcessor']($1, 0, $0); SDL2.audio.scriptProcessorNode['onaudioprocess'] = function (e) { if ((SDL2 === undefined) || (SDL2.audio === undefined)) { return; } SDL2.audio.currentOutputBuffer = e['outputBuffer']; dynCall('vi', $2, [$3]); }; SDL2.audio.scriptProcessorNode['connect'](SDL2.audioContext['destination']); },  
- 67605273: ($0, $1) => { var SDL2 = Module['SDL2']; var numChannels = SDL2.capture.currentCaptureBuffer.numberOfChannels; for (var c = 0; c < numChannels; ++c) { var channelData = SDL2.capture.currentCaptureBuffer.getChannelData(c); if (channelData.length != $1) { throw 'Web Audio capture buffer length mismatch! Destination size: ' + channelData.length + ' samples vs expected ' + $1 + ' samples!'; } if (numChannels == 1) { for (var j = 0; j < $1; ++j) { setValue($0 + (j * 4), channelData[j], 'float'); } } else { for (var j = 0; j < $1; ++j) { setValue($0 + (((j * numChannels) + c) * 4), channelData[j], 'float'); } } } },  
- 67605878: ($0, $1) => { var SDL2 = Module['SDL2']; var numChannels = SDL2.audio.currentOutputBuffer['numberOfChannels']; for (var c = 0; c < numChannels; ++c) { var channelData = SDL2.audio.currentOutputBuffer['getChannelData'](c); if (channelData.length != $1) { throw 'Web Audio output buffer length mismatch! Destination size: ' + channelData.length + ' samples vs expected ' + $1 + ' samples!'; } for (var j = 0; j < $1; ++j) { channelData[j] = HEAPF32[$0 + ((j*numChannels + c) << 2) >> 2]; } } },  
- 67606358: ($0) => { var SDL2 = Module['SDL2']; if ($0) { if (SDL2.capture.silenceTimer !== undefined) { clearTimeout(SDL2.capture.silenceTimer); } if (SDL2.capture.stream !== undefined) { var tracks = SDL2.capture.stream.getAudioTracks(); for (var i = 0; i < tracks.length; i++) { SDL2.capture.stream.removeTrack(tracks[i]); } SDL2.capture.stream = undefined; } if (SDL2.capture.scriptProcessorNode !== undefined) { SDL2.capture.scriptProcessorNode.onaudioprocess = function(audioProcessingEvent) {}; SDL2.capture.scriptProcessorNode.disconnect(); SDL2.capture.scriptProcessorNode = undefined; } if (SDL2.capture.mediaStreamNode !== undefined) { SDL2.capture.mediaStreamNode.disconnect(); SDL2.capture.mediaStreamNode = undefined; } if (SDL2.capture.silenceBuffer !== undefined) { SDL2.capture.silenceBuffer = undefined } SDL2.capture = undefined; } else { if (SDL2.audio.scriptProcessorNode != undefined) { SDL2.audio.scriptProcessorNode.disconnect(); SDL2.audio.scriptProcessorNode = undefined; } SDL2.audio = undefined; } if ((SDL2.audioContext !== undefined) && (SDL2.audio === undefined) && (SDL2.capture === undefined)) { SDL2.audioContext.close(); SDL2.audioContext = undefined; } },  
- 67607530: ($0, $1, $2) => { var w = $0; var h = $1; var pixels = $2; if (!Module['SDL2']) Module['SDL2'] = {}; var SDL2 = Module['SDL2']; if (SDL2.ctxCanvas !== Module['canvas']) { SDL2.ctx = Module['createContext'](Module['canvas'], false, true); SDL2.ctxCanvas = Module['canvas']; } if (SDL2.w !== w || SDL2.h !== h || SDL2.imageCtx !== SDL2.ctx) { SDL2.image = SDL2.ctx.createImageData(w, h); SDL2.w = w; SDL2.h = h; SDL2.imageCtx = SDL2.ctx; } var data = SDL2.image.data; var src = pixels >> 2; var dst = 0; var num; if (typeof CanvasPixelArray !== 'undefined' && data instanceof CanvasPixelArray) { num = data.length; while (dst < num) { var val = HEAP32[src]; data[dst ] = val & 0xff; data[dst+1] = (val >> 8) & 0xff; data[dst+2] = (val >> 16) & 0xff; data[dst+3] = 0xff; src++; dst += 4; } } else { if (SDL2.data32Data !== data) { SDL2.data32 = new Int32Array(data.buffer); SDL2.data8 = new Uint8Array(data.buffer); SDL2.data32Data = data; } var data32 = SDL2.data32; num = data32.length; data32.set(HEAP32.subarray(src, src + num)); var data8 = SDL2.data8; var i = 3; var j = i + 4*num; if (num % 8 == 0) { while (i < j) { data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; } } else { while (i < j) { data8[i] = 0xff; i = i + 4 | 0; } } } SDL2.ctx.putImageData(SDL2.image, 0, 0); },  
- 67608999: ($0, $1, $2, $3, $4) => { var w = $0; var h = $1; var hot_x = $2; var hot_y = $3; var pixels = $4; var canvas = document.createElement("canvas"); canvas.width = w; canvas.height = h; var ctx = canvas.getContext("2d"); var image = ctx.createImageData(w, h); var data = image.data; var src = pixels >> 2; var dst = 0; var num; if (typeof CanvasPixelArray !== 'undefined' && data instanceof CanvasPixelArray) { num = data.length; while (dst < num) { var val = HEAP32[src]; data[dst ] = val & 0xff; data[dst+1] = (val >> 8) & 0xff; data[dst+2] = (val >> 16) & 0xff; data[dst+3] = (val >> 24) & 0xff; src++; dst += 4; } } else { var data32 = new Int32Array(data.buffer); num = data32.length; data32.set(HEAP32.subarray(src, src + num)); } ctx.putImageData(image, 0, 0); var url = hot_x === 0 && hot_y === 0 ? "url(" + canvas.toDataURL() + "), auto" : "url(" + canvas.toDataURL() + ") " + hot_x + " " + hot_y + ", auto"; var urlBuf = _malloc(url.length + 1); stringToUTF8(url, urlBuf, url.length + 1); return urlBuf; },  
- 67609988: ($0) => { if (Module['canvas']) { Module['canvas'].style['cursor'] = UTF8ToString($0); } },  
- 67610071: () => { if (Module['canvas']) { Module['canvas'].style['cursor'] = 'none'; } },  
- 67610140: () => { return window.innerWidth; },  
- 67610170: () => { return window.innerHeight; }
+  67663616: () => { try { FS.mkdir('/tulip4/user'); } catch (err) { console.log('tulip4/user already exist'); } FS.mount(IDBFS, {autoPersist:true}, '/tulip4/user'); FS.syncfs(true, function (err) { }); },  
+ 67663798: ($0, $1) => { if(midiOutputDevice != null) { midiOutputDevice.send(HEAPU8.subarray($0, $0 + $1)); } },  
+ 67663884: ($0) => { var str = UTF8ToString($0) + '\n\n' + 'Abort/Retry/Ignore/AlwaysIgnore? [ariA] :'; var reply = window.prompt(str, "i"); if (reply === null) { reply = "i"; } return allocate(intArrayFromString(reply), 'i8', ALLOC_NORMAL); },  
+ 67664109: () => { if (typeof(AudioContext) !== 'undefined') { return true; } else if (typeof(webkitAudioContext) !== 'undefined') { return true; } return false; },  
+ 67664256: () => { if ((typeof(navigator.mediaDevices) !== 'undefined') && (typeof(navigator.mediaDevices.getUserMedia) !== 'undefined')) { return true; } else if (typeof(navigator.webkitGetUserMedia) !== 'undefined') { return true; } return false; },  
+ 67664490: ($0) => { if(typeof(Module['SDL2']) === 'undefined') { Module['SDL2'] = {}; } var SDL2 = Module['SDL2']; if (!$0) { SDL2.audio = {}; } else { SDL2.capture = {}; } if (!SDL2.audioContext) { if (typeof(AudioContext) !== 'undefined') { SDL2.audioContext = new AudioContext(); } else if (typeof(webkitAudioContext) !== 'undefined') { SDL2.audioContext = new webkitAudioContext(); } if (SDL2.audioContext) { autoResumeAudioContext(SDL2.audioContext); } } return SDL2.audioContext === undefined ? -1 : 0; },  
+ 67664983: () => { var SDL2 = Module['SDL2']; return SDL2.audioContext.sampleRate; },  
+ 67665051: ($0, $1, $2, $3) => { var SDL2 = Module['SDL2']; var have_microphone = function(stream) { if (SDL2.capture.silenceTimer !== undefined) { clearTimeout(SDL2.capture.silenceTimer); SDL2.capture.silenceTimer = undefined; } SDL2.capture.mediaStreamNode = SDL2.audioContext.createMediaStreamSource(stream); SDL2.capture.scriptProcessorNode = SDL2.audioContext.createScriptProcessor($1, $0, 1); SDL2.capture.scriptProcessorNode.onaudioprocess = function(audioProcessingEvent) { if ((SDL2 === undefined) || (SDL2.capture === undefined)) { return; } audioProcessingEvent.outputBuffer.getChannelData(0).fill(0.0); SDL2.capture.currentCaptureBuffer = audioProcessingEvent.inputBuffer; dynCall('vi', $2, [$3]); }; SDL2.capture.mediaStreamNode.connect(SDL2.capture.scriptProcessorNode); SDL2.capture.scriptProcessorNode.connect(SDL2.audioContext.destination); SDL2.capture.stream = stream; }; var no_microphone = function(error) { }; SDL2.capture.silenceBuffer = SDL2.audioContext.createBuffer($0, $1, SDL2.audioContext.sampleRate); SDL2.capture.silenceBuffer.getChannelData(0).fill(0.0); var silence_callback = function() { SDL2.capture.currentCaptureBuffer = SDL2.capture.silenceBuffer; dynCall('vi', $2, [$3]); }; SDL2.capture.silenceTimer = setTimeout(silence_callback, ($1 / SDL2.audioContext.sampleRate) * 1000); if ((navigator.mediaDevices !== undefined) && (navigator.mediaDevices.getUserMedia !== undefined)) { navigator.mediaDevices.getUserMedia({ audio: true, video: false }).then(have_microphone).catch(no_microphone); } else if (navigator.webkitGetUserMedia !== undefined) { navigator.webkitGetUserMedia({ audio: true, video: false }, have_microphone, no_microphone); } },  
+ 67666703: ($0, $1, $2, $3) => { var SDL2 = Module['SDL2']; SDL2.audio.scriptProcessorNode = SDL2.audioContext['createScriptProcessor']($1, 0, $0); SDL2.audio.scriptProcessorNode['onaudioprocess'] = function (e) { if ((SDL2 === undefined) || (SDL2.audio === undefined)) { return; } SDL2.audio.currentOutputBuffer = e['outputBuffer']; dynCall('vi', $2, [$3]); }; SDL2.audio.scriptProcessorNode['connect'](SDL2.audioContext['destination']); },  
+ 67667113: ($0, $1) => { var SDL2 = Module['SDL2']; var numChannels = SDL2.capture.currentCaptureBuffer.numberOfChannels; for (var c = 0; c < numChannels; ++c) { var channelData = SDL2.capture.currentCaptureBuffer.getChannelData(c); if (channelData.length != $1) { throw 'Web Audio capture buffer length mismatch! Destination size: ' + channelData.length + ' samples vs expected ' + $1 + ' samples!'; } if (numChannels == 1) { for (var j = 0; j < $1; ++j) { setValue($0 + (j * 4), channelData[j], 'float'); } } else { for (var j = 0; j < $1; ++j) { setValue($0 + (((j * numChannels) + c) * 4), channelData[j], 'float'); } } } },  
+ 67667718: ($0, $1) => { var SDL2 = Module['SDL2']; var numChannels = SDL2.audio.currentOutputBuffer['numberOfChannels']; for (var c = 0; c < numChannels; ++c) { var channelData = SDL2.audio.currentOutputBuffer['getChannelData'](c); if (channelData.length != $1) { throw 'Web Audio output buffer length mismatch! Destination size: ' + channelData.length + ' samples vs expected ' + $1 + ' samples!'; } for (var j = 0; j < $1; ++j) { channelData[j] = HEAPF32[$0 + ((j*numChannels + c) << 2) >> 2]; } } },  
+ 67668198: ($0) => { var SDL2 = Module['SDL2']; if ($0) { if (SDL2.capture.silenceTimer !== undefined) { clearTimeout(SDL2.capture.silenceTimer); } if (SDL2.capture.stream !== undefined) { var tracks = SDL2.capture.stream.getAudioTracks(); for (var i = 0; i < tracks.length; i++) { SDL2.capture.stream.removeTrack(tracks[i]); } SDL2.capture.stream = undefined; } if (SDL2.capture.scriptProcessorNode !== undefined) { SDL2.capture.scriptProcessorNode.onaudioprocess = function(audioProcessingEvent) {}; SDL2.capture.scriptProcessorNode.disconnect(); SDL2.capture.scriptProcessorNode = undefined; } if (SDL2.capture.mediaStreamNode !== undefined) { SDL2.capture.mediaStreamNode.disconnect(); SDL2.capture.mediaStreamNode = undefined; } if (SDL2.capture.silenceBuffer !== undefined) { SDL2.capture.silenceBuffer = undefined } SDL2.capture = undefined; } else { if (SDL2.audio.scriptProcessorNode != undefined) { SDL2.audio.scriptProcessorNode.disconnect(); SDL2.audio.scriptProcessorNode = undefined; } SDL2.audio = undefined; } if ((SDL2.audioContext !== undefined) && (SDL2.audio === undefined) && (SDL2.capture === undefined)) { SDL2.audioContext.close(); SDL2.audioContext = undefined; } },  
+ 67669370: ($0, $1, $2) => { var w = $0; var h = $1; var pixels = $2; if (!Module['SDL2']) Module['SDL2'] = {}; var SDL2 = Module['SDL2']; if (SDL2.ctxCanvas !== Module['canvas']) { SDL2.ctx = Module['createContext'](Module['canvas'], false, true); SDL2.ctxCanvas = Module['canvas']; } if (SDL2.w !== w || SDL2.h !== h || SDL2.imageCtx !== SDL2.ctx) { SDL2.image = SDL2.ctx.createImageData(w, h); SDL2.w = w; SDL2.h = h; SDL2.imageCtx = SDL2.ctx; } var data = SDL2.image.data; var src = pixels >> 2; var dst = 0; var num; if (typeof CanvasPixelArray !== 'undefined' && data instanceof CanvasPixelArray) { num = data.length; while (dst < num) { var val = HEAP32[src]; data[dst ] = val & 0xff; data[dst+1] = (val >> 8) & 0xff; data[dst+2] = (val >> 16) & 0xff; data[dst+3] = 0xff; src++; dst += 4; } } else { if (SDL2.data32Data !== data) { SDL2.data32 = new Int32Array(data.buffer); SDL2.data8 = new Uint8Array(data.buffer); SDL2.data32Data = data; } var data32 = SDL2.data32; num = data32.length; data32.set(HEAP32.subarray(src, src + num)); var data8 = SDL2.data8; var i = 3; var j = i + 4*num; if (num % 8 == 0) { while (i < j) { data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; data8[i] = 0xff; i = i + 4 | 0; } } else { while (i < j) { data8[i] = 0xff; i = i + 4 | 0; } } } SDL2.ctx.putImageData(SDL2.image, 0, 0); },  
+ 67670839: ($0, $1, $2, $3, $4) => { var w = $0; var h = $1; var hot_x = $2; var hot_y = $3; var pixels = $4; var canvas = document.createElement("canvas"); canvas.width = w; canvas.height = h; var ctx = canvas.getContext("2d"); var image = ctx.createImageData(w, h); var data = image.data; var src = pixels >> 2; var dst = 0; var num; if (typeof CanvasPixelArray !== 'undefined' && data instanceof CanvasPixelArray) { num = data.length; while (dst < num) { var val = HEAP32[src]; data[dst ] = val & 0xff; data[dst+1] = (val >> 8) & 0xff; data[dst+2] = (val >> 16) & 0xff; data[dst+3] = (val >> 24) & 0xff; src++; dst += 4; } } else { var data32 = new Int32Array(data.buffer); num = data32.length; data32.set(HEAP32.subarray(src, src + num)); } ctx.putImageData(image, 0, 0); var url = hot_x === 0 && hot_y === 0 ? "url(" + canvas.toDataURL() + "), auto" : "url(" + canvas.toDataURL() + ") " + hot_x + " " + hot_y + ", auto"; var urlBuf = _malloc(url.length + 1); stringToUTF8(url, urlBuf, url.length + 1); return urlBuf; },  
+ 67671828: ($0) => { if (Module['canvas']) { Module['canvas'].style['cursor'] = UTF8ToString($0); } },  
+ 67671911: () => { if (Module['canvas']) { Module['canvas'].style['cursor'] = 'none'; } },  
+ 67671980: () => { return window.innerWidth; },  
+ 67672010: () => { return window.innerHeight; }
 };
 function proxy_convert_mp_to_js_then_js_to_mp_obj_jsside(out) { const ret = proxy_convert_mp_to_js_obj_jsside(out); proxy_convert_js_to_mp_obj_jsside_force_double_proxy(ret, out); }
 function proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside(out) { const ret = proxy_convert_mp_to_js_obj_jsside(out); const js_obj = PyProxy.toJs(ret); proxy_convert_js_to_mp_obj_jsside(js_obj, out); }
@@ -1189,8 +1265,8 @@ function js_then_reject(ret_value,reject) { let ret_value_js; try { ret_value_js
 function js_then_continue(jsref,py_resume,resolve,reject,out) { const py_resume_js = proxy_convert_mp_to_js_obj_jsside(py_resume); const resolve_js = proxy_convert_mp_to_js_obj_jsside(resolve); const reject_js = proxy_convert_mp_to_js_obj_jsside(reject); const ret = proxy_js_ref[jsref].then( (result) => { py_resume_js(result, null, resolve_js, reject_js); }, (reason) => { py_resume_js(null, reason, resolve_js, reject_js); }, ); proxy_convert_js_to_mp_obj_jsside(ret, out); }
 function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_mp_to_js_obj_jsside(out_set); const promise = new Promise(out_set_js); proxy_convert_js_to_mp_obj_jsside(promise, out_promise); }
 
-// end include: preamble.js
 
+// end include: preamble.js
 
   /** @constructor */
   function ExitStatus(status) {
@@ -1199,6 +1275,25 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       this.status = status;
     }
 
+  var listenOnce = (object, event, func) => {
+      object.addEventListener(event, func, { 'once': true });
+    };
+  /** @param {Object=} elements */
+  var autoResumeAudioContext = (ctx, elements) => {
+      if (!elements) {
+        elements = [document, document.getElementById('canvas')];
+      }
+      ['keydown', 'mousedown', 'touchstart'].forEach((event) => {
+        elements.forEach((element) => {
+          if (element) {
+            listenOnce(element, event, () => {
+              if (ctx.state === 'suspended') ctx.resume();
+            });
+          }
+        });
+      });
+    };
+
   var callRuntimeCallbacks = (callbacks) => {
       while (callbacks.length > 0) {
         // Pass the module as the first argument.
@@ -1206,6 +1301,36 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       }
     };
 
+  var dynCallLegacy = (sig, ptr, args) => {
+      assert(('dynCall_' + sig) in Module, `bad function pointer type - dynCall function not found for sig '${sig}'`);
+      if (args && args.length) {
+        // j (64-bit integer) must be passed in as two numbers [low 32, high 32].
+        assert(args.length === sig.substring(1).replace(/j/g, '--').length);
+      } else {
+        assert(sig.length == 1);
+      }
+      var f = Module['dynCall_' + sig];
+      return args && args.length ? f.apply(null, [ptr].concat(args)) : f.call(null, ptr);
+    };
+  
+  var wasmTableMirror = [];
+  
+  var wasmTable;
+  var getWasmTableEntry = (funcPtr) => {
+      var func = wasmTableMirror[funcPtr];
+      if (!func) {
+        if (funcPtr >= wasmTableMirror.length) wasmTableMirror.length = funcPtr + 1;
+        wasmTableMirror[funcPtr] = func = wasmTable.get(funcPtr);
+      }
+      assert(wasmTable.get(funcPtr) == func, "JavaScript-side Wasm function table mirror is out of date!");
+      return func;
+    };
+  /** @param {Object=} args */
+  var dynCall = (sig, ptr, args) => {
+      var rtn = dynCallLegacy(sig, ptr, args);
+      return rtn;
+    };
+
   
     /**
      * @param {number} ptr
@@ -1214,8 +1339,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   function getValue(ptr, type = 'i8') {
     if (type.endsWith('*')) type = '*';
     switch (type) {
-      case 'i1': return HEAP8[ptr];
-      case 'i8': return HEAP8[ptr];
+      case 'i1': return HEAP8[((ptr)>>0)];
+      case 'i8': return HEAP8[((ptr)>>0)];
       case 'i16': return HEAP16[((ptr)>>1)];
       case 'i32': return HEAP32[((ptr)>>2)];
       case 'i64': abort('to do getValue(i64) use WASM_BIGINT');
@@ -1244,8 +1369,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   function setValue(ptr, value, type = 'i8') {
     if (type.endsWith('*')) type = '*';
     switch (type) {
-      case 'i1': HEAP8[ptr] = value; break;
-      case 'i8': HEAP8[ptr] = value; break;
+      case 'i1': HEAP8[((ptr)>>0)] = value; break;
+      case 'i8': HEAP8[((ptr)>>0)] = value; break;
       case 'i16': HEAP16[((ptr)>>1)] = value; break;
       case 'i32': HEAP32[((ptr)>>2)] = value; break;
       case 'i64': abort('to do setValue(i64) use WASM_BIGINT');
@@ -1256,12 +1381,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
     }
   }
 
-  var stackRestore = (val) => __emscripten_stack_restore(val);
-
-  var stackSave = () => _emscripten_stack_get_current();
-
   var warnOnce = (text) => {
-      warnOnce.shown ||= {};
+      if (!warnOnce.shown) warnOnce.shown = {};
       if (!warnOnce.shown[text]) {
         warnOnce.shown[text] = 1;
         if (ENVIRONMENT_IS_NODE) text = 'warning: ' + text;
@@ -1334,7 +1455,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         if (lastSlash === -1) return path;
         return path.substr(lastSlash+1);
       },
-  join:(...paths) => PATH.normalize(paths.join('/')),
+  join:function() {
+        var paths = Array.prototype.slice.call(arguments);
+        return PATH.normalize(paths.join('/'));
+      },
   join2:(l, r) => PATH.normalize(l + '/' + r),
   };
   
@@ -1364,7 +1488,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
       }
       // we couldn't find a proper implementation, as Math.random() is not suitable for /dev/random, see emscripten-core/emscripten/pull/7096
-      abort('no cryptographic support found for randomDevice. consider polyfilling it if you want to use something insecure like Math.random(), e.g. put this in a --pre-js: var crypto = { getRandomValues: (array) => { for (var i = 0; i < array.length; i++) array[i] = (Math.random()*256)|0 } };');
+      abort("no cryptographic support found for randomDevice. consider polyfilling it if you want to use something insecure like Math.random(), e.g. put this in a --pre-js: var crypto = { getRandomValues: (array) => { for (var i = 0; i < array.length; i++) array[i] = (Math.random()*256)|0 } };");
     };
   var randomFill = (view) => {
       // Lazily init on the first invocation.
@@ -1374,11 +1498,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var PATH_FS = {
-  resolve:(...args) => {
+  resolve:function() {
         var resolvedPath = '',
           resolvedAbsolute = false;
-        for (var i = args.length - 1; i >= -1 && !resolvedAbsolute; i--) {
-          var path = (i >= 0) ? args[i] : FS.cwd();
+        for (var i = arguments.length - 1; i >= -1 && !resolvedAbsolute; i--) {
+          var path = (i >= 0) ? arguments[i] : FS.cwd();
           // Skip empty and invalid entries
           if (typeof path != 'string') {
             throw new TypeError('Arguments to path.resolve must be strings');
@@ -1428,7 +1552,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   };
   
   
-  var UTF8Decoder = typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined;
+  var UTF8Decoder = typeof TextDecoder != 'undefined' ? new TextDecoder('utf8') : undefined;
   
     /**
      * Given a pointer 'idx' to a null-terminated UTF8-encoded string in the given
@@ -1579,17 +1703,18 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           var fd = process.stdin.fd;
   
           try {
-            bytesRead = fs.readSync(fd, buf, 0, BUFSIZE);
+            bytesRead = fs.readSync(fd, buf);
           } catch(e) {
-            // Cross-platform differences: on Windows, reading EOF throws an
-            // exception, but on other OSes, reading EOF returns 0. Uniformize
-            // behavior by treating the EOF exception to return 0.
+            // Cross-platform differences: on Windows, reading EOF throws an exception, but on other OSes,
+            // reading EOF returns 0. Uniformize behavior by treating the EOF exception to return 0.
             if (e.toString().includes('EOF')) bytesRead = 0;
             else throw e;
           }
   
           if (bytesRead > 0) {
             result = buf.slice(0, bytesRead).toString('utf-8');
+          } else {
+            result = null;
           }
         } else
         if (typeof window != 'undefined' &&
@@ -1599,8 +1724,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           if (result !== null) {
             result += '\n';
           }
-        } else
-        {}
+        } else if (typeof readline == 'function') {
+          // Command line.
+          result = readline();
+          if (result !== null) {
+            result += '\n';
+          }
+        }
         if (!result) {
           return null;
         }
@@ -1773,53 +1903,55 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           // no supported
           throw new FS.ErrnoError(63);
         }
-        MEMFS.ops_table ||= {
-          dir: {
-            node: {
-              getattr: MEMFS.node_ops.getattr,
-              setattr: MEMFS.node_ops.setattr,
-              lookup: MEMFS.node_ops.lookup,
-              mknod: MEMFS.node_ops.mknod,
-              rename: MEMFS.node_ops.rename,
-              unlink: MEMFS.node_ops.unlink,
-              rmdir: MEMFS.node_ops.rmdir,
-              readdir: MEMFS.node_ops.readdir,
-              symlink: MEMFS.node_ops.symlink
-            },
-            stream: {
-              llseek: MEMFS.stream_ops.llseek
-            }
-          },
-          file: {
-            node: {
-              getattr: MEMFS.node_ops.getattr,
-              setattr: MEMFS.node_ops.setattr
+        if (!MEMFS.ops_table) {
+          MEMFS.ops_table = {
+            dir: {
+              node: {
+                getattr: MEMFS.node_ops.getattr,
+                setattr: MEMFS.node_ops.setattr,
+                lookup: MEMFS.node_ops.lookup,
+                mknod: MEMFS.node_ops.mknod,
+                rename: MEMFS.node_ops.rename,
+                unlink: MEMFS.node_ops.unlink,
+                rmdir: MEMFS.node_ops.rmdir,
+                readdir: MEMFS.node_ops.readdir,
+                symlink: MEMFS.node_ops.symlink
+              },
+              stream: {
+                llseek: MEMFS.stream_ops.llseek
+              }
             },
-            stream: {
-              llseek: MEMFS.stream_ops.llseek,
-              read: MEMFS.stream_ops.read,
-              write: MEMFS.stream_ops.write,
-              allocate: MEMFS.stream_ops.allocate,
-              mmap: MEMFS.stream_ops.mmap,
-              msync: MEMFS.stream_ops.msync
-            }
-          },
-          link: {
-            node: {
-              getattr: MEMFS.node_ops.getattr,
-              setattr: MEMFS.node_ops.setattr,
-              readlink: MEMFS.node_ops.readlink
+            file: {
+              node: {
+                getattr: MEMFS.node_ops.getattr,
+                setattr: MEMFS.node_ops.setattr
+              },
+              stream: {
+                llseek: MEMFS.stream_ops.llseek,
+                read: MEMFS.stream_ops.read,
+                write: MEMFS.stream_ops.write,
+                allocate: MEMFS.stream_ops.allocate,
+                mmap: MEMFS.stream_ops.mmap,
+                msync: MEMFS.stream_ops.msync
+              }
             },
-            stream: {}
-          },
-          chrdev: {
-            node: {
-              getattr: MEMFS.node_ops.getattr,
-              setattr: MEMFS.node_ops.setattr
+            link: {
+              node: {
+                getattr: MEMFS.node_ops.getattr,
+                setattr: MEMFS.node_ops.setattr,
+                readlink: MEMFS.node_ops.readlink
+              },
+              stream: {}
             },
-            stream: FS.chrdev_stream_ops
-          }
-        };
+            chrdev: {
+              node: {
+                getattr: MEMFS.node_ops.getattr,
+                setattr: MEMFS.node_ops.setattr
+              },
+              stream: FS.chrdev_stream_ops
+            }
+          };
+        }
         var node = FS.createNode(parent, name, mode, dev);
         if (FS.isDir(node.mode)) {
           node.node_ops = MEMFS.ops_table.dir.node;
@@ -1946,6 +2078,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           old_node.name = new_name;
           new_dir.contents[new_name] = old_node;
           new_dir.timestamp = old_node.parent.timestamp;
+          old_node.parent = new_dir;
         },
   unlink(parent, name) {
           delete parent.contents[name];
@@ -1961,7 +2094,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         },
   readdir(node) {
           var entries = ['.', '..'];
-          for (var key of Object.keys(node.contents)) {
+          for (var key in node.contents) {
+            if (!node.contents.hasOwnProperty(key)) {
+              continue;
+            }
             entries.push(key);
           }
           return entries;
@@ -2061,28 +2197,26 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           var allocated;
           var contents = stream.node.contents;
           // Only make a new copy when MAP_PRIVATE is specified.
-          if (!(flags & 2) && contents && contents.buffer === HEAP8.buffer) {
+          if (!(flags & 2) && contents.buffer === HEAP8.buffer) {
             // We can't emulate MAP_SHARED when the file is not backed by the
             // buffer we're mapping to (e.g. the HEAP buffer).
             allocated = false;
             ptr = contents.byteOffset;
           } else {
+            // Try to avoid unnecessary slices.
+            if (position > 0 || position + length < contents.length) {
+              if (contents.subarray) {
+                contents = contents.subarray(position, position + length);
+              } else {
+                contents = Array.prototype.slice.call(contents, position, position + length);
+              }
+            }
             allocated = true;
             ptr = mmapAlloc(length);
             if (!ptr) {
               throw new FS.ErrnoError(48);
             }
-            if (contents) {
-              // Try to avoid unnecessary slices.
-              if (position > 0 || position + length < contents.length) {
-                if (contents.subarray) {
-                  contents = contents.subarray(position, position + length);
-                } else {
-                  contents = Array.prototype.slice.call(contents, position, position + length);
-                }
-              }
-              HEAP8.set(contents, ptr);
-            }
+            HEAP8.set(contents, ptr);
           }
           return { ptr, allocated };
         },
@@ -2097,20 +2231,17 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   /** @param {boolean=} noRunDep */
   var asyncLoad = (url, onload, onerror, noRunDep) => {
       var dep = !noRunDep ? getUniqueRunDependency(`al ${url}`) : '';
-      readAsync(url).then(
-        (arrayBuffer) => {
-          assert(arrayBuffer, `Loading data file "${url}" failed (no arrayBuffer).`);
-          onload(new Uint8Array(arrayBuffer));
-          if (dep) removeRunDependency(dep);
-        },
-        (err) => {
-          if (onerror) {
-            onerror();
-          } else {
-            throw `Loading data file "${url}" failed.`;
-          }
+      readAsync(url, (arrayBuffer) => {
+        assert(arrayBuffer, `Loading data file "${url}" failed (no arrayBuffer).`);
+        onload(new Uint8Array(arrayBuffer));
+        if (dep) removeRunDependency(dep);
+      }, (event) => {
+        if (onerror) {
+          onerror();
+        } else {
+          throw `Loading data file "${url}" failed.`;
         }
-      );
+      });
       if (dep) addRunDependency(dep);
     };
   
@@ -2141,15 +2272,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       var dep = getUniqueRunDependency(`cp ${fullname}`); // might have several active requests for the same fullname
       function processData(byteArray) {
         function finish(byteArray) {
-          preFinish?.();
+          if (preFinish) preFinish();
           if (!dontCreateFile) {
             FS_createDataFile(parent, name, byteArray, canRead, canWrite, canOwn);
           }
-          onload?.();
+          if (onload) onload();
           removeRunDependency(dep);
         }
         if (FS_handledByPreloadPlugin(byteArray, fullname, finish, () => {
-          onerror?.();
+          if (onerror) onerror();
           removeRunDependency(dep);
         })) {
           return;
@@ -2158,7 +2289,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       }
       addRunDependency(dep);
       if (typeof url == 'string') {
-        asyncLoad(url, processData, onerror);
+        asyncLoad(url, (byteArray) => processData(byteArray), onerror);
       } else {
         processData(url);
       }
@@ -2204,78 +2335,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       },
   DB_VERSION:21,
   DB_STORE_NAME:"FILE_DATA",
-  queuePersist:(mount) => {
-        function onPersistComplete() {
-          if (mount.idbPersistState === 'again') startPersist(); // If a new sync request has appeared in between, kick off a new sync
-          else mount.idbPersistState = 0; // Otherwise reset sync state back to idle to wait for a new sync later
-        }
-        function startPersist() {
-          mount.idbPersistState = 'idb'; // Mark that we are currently running a sync operation
-          IDBFS.syncfs(mount, /*populate:*/false, onPersistComplete);
-        }
-  
-        if (!mount.idbPersistState) {
-          // Programs typically write/copy/move multiple files in the in-memory
-          // filesystem within a single app frame, so when a filesystem sync
-          // command is triggered, do not start it immediately, but only after
-          // the current frame is finished. This way all the modified files
-          // inside the main loop tick will be batched up to the same sync.
-          mount.idbPersistState = setTimeout(startPersist, 0);
-        } else if (mount.idbPersistState === 'idb') {
-          // There is an active IndexedDB sync operation in-flight, but we now
-          // have accumulated more files to sync. We should therefore queue up
-          // a new sync after the current one finishes so that all writes
-          // will be properly persisted.
-          mount.idbPersistState = 'again';
-        }
-      },
-  mount:(mount) => {
-        // reuse core MEMFS functionality
-        var mnt = MEMFS.mount(mount);
-        // If the automatic IDBFS persistence option has been selected, then automatically persist
-        // all modifications to the filesystem as they occur.
-        if (mount?.opts?.autoPersist) {
-          mnt.idbPersistState = 0; // IndexedDB sync starts in idle state
-          var memfs_node_ops = mnt.node_ops;
-          mnt.node_ops = Object.assign({}, mnt.node_ops); // Clone node_ops to inject write tracking
-          mnt.node_ops.mknod = (parent, name, mode, dev) => {
-            var node = memfs_node_ops.mknod(parent, name, mode, dev);
-            // Propagate injected node_ops to the newly created child node
-            node.node_ops = mnt.node_ops;
-            // Remember for each IDBFS node which IDBFS mount point they came from so we know which mount to persist on modification.
-            node.idbfs_mount = mnt.mount;
-            // Remember original MEMFS stream_ops for this node
-            node.memfs_stream_ops = node.stream_ops;
-            // Clone stream_ops to inject write tracking
-            node.stream_ops = Object.assign({}, node.stream_ops);
-  
-            // Track all file writes
-            node.stream_ops.write = (stream, buffer, offset, length, position, canOwn) => {
-              // This file has been modified, we must persist IndexedDB when this file closes
-              stream.node.isModified = true;
-              return node.memfs_stream_ops.write(stream, buffer, offset, length, position, canOwn);
-            };
-  
-            // Persist IndexedDB on file close
-            node.stream_ops.close = (stream) => {
-              var n = stream.node;
-              if (n.isModified) {
-                IDBFS.queuePersist(n.idbfs_mount);
-                n.isModified = false;
-              }
-              if (n.memfs_stream_ops.close) return n.memfs_stream_ops.close(stream);
-            };
-  
-            return node;
-          };
-          // Also kick off persisting the filesystem on other operations that modify the filesystem.
-          mnt.node_ops.mkdir   = (...args) => (IDBFS.queuePersist(mnt.mount), memfs_node_ops.mkdir(...args));
-          mnt.node_ops.rmdir   = (...args) => (IDBFS.queuePersist(mnt.mount), memfs_node_ops.rmdir(...args));
-          mnt.node_ops.symlink = (...args) => (IDBFS.queuePersist(mnt.mount), memfs_node_ops.symlink(...args));
-          mnt.node_ops.unlink  = (...args) => (IDBFS.queuePersist(mnt.mount), memfs_node_ops.unlink(...args));
-          mnt.node_ops.rename  = (...args) => (IDBFS.queuePersist(mnt.mount), memfs_node_ops.rename(...args));
-        }
-        return mnt;
+  mount:function(mount) {
+        // reuse all of the core MEMFS functionality
+        return MEMFS.mount.apply(null, arguments);
       },
   syncfs:(mount, populate, callback) => {
         IDBFS.getLocalSet(mount, (err, local) => {
@@ -2362,7 +2424,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           }
   
           if (FS.isDir(stat.mode)) {
-            check.push(...FS.readdir(path).filter(isRealDir).map(toAbsolute(path)));
+            check.push.apply(check, FS.readdir(path).filter(isRealDir).map(toAbsolute(path)));
           }
   
           entries[path] = { 'timestamp': stat.mtime };
@@ -2490,7 +2552,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var total = 0;
   
         var create = [];
-        Object.keys(src.entries).forEach((key) => {
+        Object.keys(src.entries).forEach(function (key) {
           var e = src.entries[key];
           var e2 = dst.entries[key];
           if (!e2 || e['timestamp'].getTime() != e2['timestamp'].getTime()) {
@@ -2500,7 +2562,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         });
   
         var remove = [];
-        Object.keys(dst.entries).forEach((key) => {
+        Object.keys(dst.entries).forEach(function (key) {
           if (!src.entries[key]) {
             remove.push(key);
             total++;
@@ -2523,9 +2585,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           }
         };
   
-        // transaction may abort if (for example) there is a QuotaExceededError
-        transaction.onerror = transaction.onabort = (e) => {
-          done(e.target.error);
+        transaction.onerror = (e) => {
+          done(this.error);
           e.preventDefault();
         };
   
@@ -2563,31 +2624,127 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       },
   };
   
-  
-  
-    /**
-     * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
-     * emscripten HEAP, returns a copy of that string as a Javascript String object.
-     *
-     * @param {number} ptr
-     * @param {number=} maxBytesToRead - An optional length that specifies the
-     *   maximum number of bytes to read. You can omit this parameter to scan the
-     *   string until the first 0 byte. If maxBytesToRead is passed, and the string
-     *   at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
-     *   string will cut short at that byte index (i.e. maxBytesToRead will not
-     *   produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
-     *   frequent uses of UTF8ToString() with and without maxBytesToRead may throw
-     *   JS JIT optimizations off, so it is worth to consider consistently using one
-     * @return {string}
-     */
-  var UTF8ToString = (ptr, maxBytesToRead) => {
-      assert(typeof ptr == 'number', `UTF8ToString expects a number (got ${typeof ptr})`);
-      return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
-    };
-  
-  var strError = (errno) => {
-      return UTF8ToString(_strerror(errno));
-    };
+  var ERRNO_MESSAGES = {
+  0:"Success",
+  1:"Arg list too long",
+  2:"Permission denied",
+  3:"Address already in use",
+  4:"Address not available",
+  5:"Address family not supported by protocol family",
+  6:"No more processes",
+  7:"Socket already connected",
+  8:"Bad file number",
+  9:"Trying to read unreadable message",
+  10:"Mount device busy",
+  11:"Operation canceled",
+  12:"No children",
+  13:"Connection aborted",
+  14:"Connection refused",
+  15:"Connection reset by peer",
+  16:"File locking deadlock error",
+  17:"Destination address required",
+  18:"Math arg out of domain of func",
+  19:"Quota exceeded",
+  20:"File exists",
+  21:"Bad address",
+  22:"File too large",
+  23:"Host is unreachable",
+  24:"Identifier removed",
+  25:"Illegal byte sequence",
+  26:"Connection already in progress",
+  27:"Interrupted system call",
+  28:"Invalid argument",
+  29:"I/O error",
+  30:"Socket is already connected",
+  31:"Is a directory",
+  32:"Too many symbolic links",
+  33:"Too many open files",
+  34:"Too many links",
+  35:"Message too long",
+  36:"Multihop attempted",
+  37:"File or path name too long",
+  38:"Network interface is not configured",
+  39:"Connection reset by network",
+  40:"Network is unreachable",
+  41:"Too many open files in system",
+  42:"No buffer space available",
+  43:"No such device",
+  44:"No such file or directory",
+  45:"Exec format error",
+  46:"No record locks available",
+  47:"The link has been severed",
+  48:"Not enough core",
+  49:"No message of desired type",
+  50:"Protocol not available",
+  51:"No space left on device",
+  52:"Function not implemented",
+  53:"Socket is not connected",
+  54:"Not a directory",
+  55:"Directory not empty",
+  56:"State not recoverable",
+  57:"Socket operation on non-socket",
+  59:"Not a typewriter",
+  60:"No such device or address",
+  61:"Value too large for defined data type",
+  62:"Previous owner died",
+  63:"Not super-user",
+  64:"Broken pipe",
+  65:"Protocol error",
+  66:"Unknown protocol",
+  67:"Protocol wrong type for socket",
+  68:"Math result not representable",
+  69:"Read only file system",
+  70:"Illegal seek",
+  71:"No such process",
+  72:"Stale file handle",
+  73:"Connection timed out",
+  74:"Text file busy",
+  75:"Cross-device link",
+  100:"Device not a stream",
+  101:"Bad font file fmt",
+  102:"Invalid slot",
+  103:"Invalid request code",
+  104:"No anode",
+  105:"Block device required",
+  106:"Channel number out of range",
+  107:"Level 3 halted",
+  108:"Level 3 reset",
+  109:"Link number out of range",
+  110:"Protocol driver not attached",
+  111:"No CSI structure available",
+  112:"Level 2 halted",
+  113:"Invalid exchange",
+  114:"Invalid request descriptor",
+  115:"Exchange full",
+  116:"No data (for no delay io)",
+  117:"Timer expired",
+  118:"Out of streams resources",
+  119:"Machine is not on the network",
+  120:"Package not installed",
+  121:"The object is remote",
+  122:"Advertise error",
+  123:"Srmount error",
+  124:"Communication error on send",
+  125:"Cross mount point (not really error)",
+  126:"Given log. name not unique",
+  127:"f.d. invalid for this operation",
+  128:"Remote address changed",
+  129:"Can   access a needed shared lib",
+  130:"Accessing a corrupted shared lib",
+  131:".lib section in a.out corrupted",
+  132:"Attempting to link in too many libs",
+  133:"Attempting to exec a shared library",
+  135:"Streams pipe error",
+  136:"Too many users",
+  137:"Socket type not supported",
+  138:"Not supported",
+  139:"Protocol family not supported",
+  140:"Can't send after socket shutdown",
+  141:"Too many references",
+  142:"Host is down",
+  148:"No medium (in tape drive)",
+  156:"Level 2 not synchronized",
+  };
   
   var ERRNO_CODES = {
       'EPERM': 63,
@@ -2712,6 +2869,20 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       'EOWNERDEAD': 62,
       'ESTRPIPE': 135,
     };
+  
+  var demangle = (func) => {
+      warnOnce('warning: build with -sDEMANGLE_SUPPORT to link in libcxxabi demangling');
+      return func;
+    };
+  var demangleAll = (text) => {
+      var regex =
+        /\b_Z[\w\d_]+/g;
+      return text.replace(regex,
+        function(x) {
+          var y = demangle(x);
+          return x === y ? x : (y + ' [' + x + ']');
+        });
+    };
   var FS = {
   root:null,
   mounts:[],
@@ -2723,101 +2894,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   currentPath:"/",
   initialized:false,
   ignorePermissions:true,
-  ErrnoError:class extends Error {
-        // We set the `name` property to be able to identify `FS.ErrnoError`
-        // - the `name` is a standard ECMA-262 property of error objects. Kind of good to have it anyway.
-        // - when using PROXYFS, an error can come from an underlying FS
-        // as different FS objects have their own FS.ErrnoError each,
-        // the test `err instanceof FS.ErrnoError` won't detect an error coming from another filesystem, causing bugs.
-        // we'll use the reliable test `err.name == "ErrnoError"` instead
-        constructor(errno) {
-          super(runtimeInitialized ? strError(errno) : '');
-          // TODO(sbc): Use the inline member declaration syntax once we
-          // support it in acorn and closure.
-          this.name = 'ErrnoError';
-          this.errno = errno;
-          for (var key in ERRNO_CODES) {
-            if (ERRNO_CODES[key] === errno) {
-              this.code = key;
-              break;
-            }
-          }
-        }
-      },
+  ErrnoError:null,
   genericErrors:{
   },
   filesystems:null,
   syncFSRequests:0,
-  FSStream:class {
-        constructor() {
-          // TODO(https://github.com/emscripten-core/emscripten/issues/21414):
-          // Use inline field declarations.
-          this.shared = {};
-        }
-        get object() {
-          return this.node;
-        }
-        set object(val) {
-          this.node = val;
-        }
-        get isRead() {
-          return (this.flags & 2097155) !== 1;
-        }
-        get isWrite() {
-          return (this.flags & 2097155) !== 0;
-        }
-        get isAppend() {
-          return (this.flags & 1024);
-        }
-        get flags() {
-          return this.shared.flags;
-        }
-        set flags(val) {
-          this.shared.flags = val;
-        }
-        get position() {
-          return this.shared.position;
-        }
-        set position(val) {
-          this.shared.position = val;
-        }
-      },
-  FSNode:class {
-        constructor(parent, name, mode, rdev) {
-          if (!parent) {
-            parent = this;  // root node sets parent to itself
-          }
-          this.parent = parent;
-          this.mount = parent.mount;
-          this.mounted = null;
-          this.id = FS.nextInode++;
-          this.name = name;
-          this.mode = mode;
-          this.node_ops = {};
-          this.stream_ops = {};
-          this.rdev = rdev;
-          this.readMode = 292 | 73;
-          this.writeMode = 146;
-        }
-        get read() {
-          return (this.mode & this.readMode) === this.readMode;
-        }
-        set read(val) {
-          val ? this.mode |= this.readMode : this.mode &= ~this.readMode;
-        }
-        get write() {
-          return (this.mode & this.writeMode) === this.writeMode;
-        }
-        set write(val) {
-          val ? this.mode |= this.writeMode : this.mode &= ~this.writeMode;
-        }
-        get isFolder() {
-          return FS.isDir(this.mode);
-        }
-        get isDevice() {
-          return FS.isChrdev(this.mode);
-        }
-      },
   lookupPath(path, opts = {}) {
         path = PATH_FS.resolve(path);
   
@@ -2920,7 +3001,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   lookupNode(parent, name) {
         var errCode = FS.mayLookup(parent);
         if (errCode) {
-          throw new FS.ErrnoError(errCode);
+          throw new FS.ErrnoError(errCode, parent);
         }
         var hash = FS.hashName(parent.id, name);
         for (var node = FS.nameTable[hash]; node; node = node.name_next) {
@@ -2992,7 +3073,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         return 0;
       },
   mayLookup(dir) {
-        if (!FS.isDir(dir.mode)) return 54;
         var errCode = FS.nodePermissions(dir, 'x');
         if (errCode) return errCode;
         if (!dir.node_ops.lookup) return 2;
@@ -3063,8 +3143,44 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       },
   getStream:(fd) => FS.streams[fd],
   createStream(stream, fd = -1) {
-        assert(fd >= -1);
-  
+        if (!FS.FSStream) {
+          FS.FSStream = /** @constructor */ function() {
+            this.shared = { };
+          };
+          FS.FSStream.prototype = {};
+          Object.defineProperties(FS.FSStream.prototype, {
+            object: {
+              /** @this {FS.FSStream} */
+              get() { return this.node; },
+              /** @this {FS.FSStream} */
+              set(val) { this.node = val; }
+            },
+            isRead: {
+              /** @this {FS.FSStream} */
+              get() { return (this.flags & 2097155) !== 1; }
+            },
+            isWrite: {
+              /** @this {FS.FSStream} */
+              get() { return (this.flags & 2097155) !== 0; }
+            },
+            isAppend: {
+              /** @this {FS.FSStream} */
+              get() { return (this.flags & 1024); }
+            },
+            flags: {
+              /** @this {FS.FSStream} */
+              get() { return this.shared.flags; },
+              /** @this {FS.FSStream} */
+              set(val) { this.shared.flags = val; },
+            },
+            position : {
+              /** @this {FS.FSStream} */
+              get() { return this.shared.position; },
+              /** @this {FS.FSStream} */
+              set(val) { this.shared.position = val; },
+            },
+          });
+        }
         // clone it, so we can return an instance of FSStream
         stream = Object.assign(new FS.FSStream(), stream);
         if (fd == -1) {
@@ -3077,18 +3193,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   closeStream(fd) {
         FS.streams[fd] = null;
       },
-  dupStream(origStream, fd = -1) {
-        var stream = FS.createStream(origStream, fd);
-        stream.stream_ops?.dup?.(stream);
-        return stream;
-      },
   chrdev_stream_ops:{
   open(stream) {
           var device = FS.getDevice(stream.node.rdev);
           // override node's stream ops with the device's
           stream.stream_ops = device.stream_ops;
           // forward the open call
-          stream.stream_ops.open?.(stream);
+          if (stream.stream_ops.open) {
+            stream.stream_ops.open(stream);
+          }
         },
   llseek() {
           throw new FS.ErrnoError(70);
@@ -3110,7 +3223,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
           mounts.push(m);
   
-          check.push(...m.mounts);
+          check.push.apply(check, m.mounts);
         }
   
         return mounts;
@@ -3323,7 +3436,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // parents must exist
         var lookup, old_dir, new_dir;
   
-        // let the errors from non existent directories percolate up
+        // let the errors from non existant directories percolate up
         lookup = FS.lookupPath(old_path, { parent: true });
         old_dir = lookup.node;
         lookup = FS.lookupPath(new_path, { parent: true });
@@ -3389,9 +3502,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // do the underlying fs rename
         try {
           old_dir.node_ops.rename(old_node, new_dir, new_name);
-          // update old node (we do this here to avoid each backend 
-          // needing to)
-          old_node.parent = new_dir;
         } catch (e) {
           throw e;
         } finally {
@@ -3569,8 +3679,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           throw new FS.ErrnoError(44);
         }
         flags = typeof flags == 'string' ? FS_modeStringToFlags(flags) : flags;
+        mode = typeof mode == 'undefined' ? 438 /* 0666 */ : mode;
         if ((flags & 64)) {
-          mode = typeof mode == 'undefined' ? 438 /* 0666 */ : mode;
           mode = (mode & 4095) | 32768;
         } else {
           mode = 0;
@@ -3781,9 +3891,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         if (!stream.stream_ops.mmap) {
           throw new FS.ErrnoError(43);
         }
-        if (!length) {
-          throw new FS.ErrnoError(28);
-        }
         return stream.stream_ops.mmap(stream, length, position, prot, flags);
       },
   msync(stream, buffer, offset, length, mmapFlags) {
@@ -3793,6 +3900,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
         return stream.stream_ops.msync(stream, buffer, offset, length, mmapFlags);
       },
+  munmap:(stream) => 0,
   ioctl(stream, cmd, arg) {
         if (!stream.stream_ops.ioctl) {
           throw new FS.ErrnoError(59);
@@ -3911,7 +4019,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           }
         }, {}, '/proc/self/fd');
       },
-  createStandardStreams(input, output, error) {
+  createStandardStreams() {
         // TODO deprecate the old functionality of a single
         // input / output callback and that utilizes FS.createDevice
         // and instead require a unique set of stream ops
@@ -3920,18 +4028,18 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // default tty devices. however, if the standard streams
         // have been overwritten we create a unique device for
         // them instead.
-        if (input) {
-          FS.createDevice('/dev', 'stdin', input);
+        if (Module['stdin']) {
+          FS.createDevice('/dev', 'stdin', Module['stdin']);
         } else {
           FS.symlink('/dev/tty', '/dev/stdin');
         }
-        if (output) {
-          FS.createDevice('/dev', 'stdout', null, output);
+        if (Module['stdout']) {
+          FS.createDevice('/dev', 'stdout', null, Module['stdout']);
         } else {
           FS.symlink('/dev/tty', '/dev/stdout');
         }
-        if (error) {
-          FS.createDevice('/dev', 'stderr', null, error);
+        if (Module['stderr']) {
+          FS.createDevice('/dev', 'stderr', null, Module['stderr']);
         } else {
           FS.symlink('/dev/tty1', '/dev/stderr');
         }
@@ -3944,12 +4052,47 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         assert(stdout.fd === 1, `invalid handle for stdout (${stdout.fd})`);
         assert(stderr.fd === 2, `invalid handle for stderr (${stderr.fd})`);
       },
-  staticInit() {
+  ensureErrnoError() {
+        if (FS.ErrnoError) return;
+        FS.ErrnoError = /** @this{Object} */ function ErrnoError(errno, node) {
+          // We set the `name` property to be able to identify `FS.ErrnoError`
+          // - the `name` is a standard ECMA-262 property of error objects. Kind of good to have it anyway.
+          // - when using PROXYFS, an error can come from an underlying FS
+          // as different FS objects have their own FS.ErrnoError each,
+          // the test `err instanceof FS.ErrnoError` won't detect an error coming from another filesystem, causing bugs.
+          // we'll use the reliable test `err.name == "ErrnoError"` instead
+          this.name = 'ErrnoError';
+          this.node = node;
+          this.setErrno = /** @this{Object} */ function(errno) {
+            this.errno = errno;
+            for (var key in ERRNO_CODES) {
+              if (ERRNO_CODES[key] === errno) {
+                this.code = key;
+                break;
+              }
+            }
+          };
+          this.setErrno(errno);
+          this.message = ERRNO_MESSAGES[errno];
+  
+          // Try to get a maximally helpful stack trace. On Node.js, getting Error.stack
+          // now ensures it shows what we want.
+          if (this.stack) {
+            // Define the stack property for Node.js 4, which otherwise errors on the next line.
+            Object.defineProperty(this, "stack", { value: (new Error).stack, writable: true });
+            this.stack = demangleAll(this.stack);
+          }
+        };
+        FS.ErrnoError.prototype = new Error();
+        FS.ErrnoError.prototype.constructor = FS.ErrnoError;
         // Some errors may happen quite a bit, to avoid overhead we reuse them (and suffer a lack of stack info)
         [44].forEach((code) => {
           FS.genericErrors[code] = new FS.ErrnoError(code);
           FS.genericErrors[code].stack = '<generic error, no stack>';
         });
+      },
+  staticInit() {
+        FS.ensureErrnoError();
   
         FS.nameTable = new Array(4096);
   
@@ -3965,18 +4108,20 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         };
       },
   init(input, output, error) {
-        assert(!FS.initialized, 'FS.init was previously called. If you want to initialize later with custom parameters, remove any earlier calls (note that one is automatically added to the generated code)');
-        FS.initialized = true;
+        assert(!FS.init.initialized, 'FS.init was previously called. If you want to initialize later with custom parameters, remove any earlier calls (note that one is automatically added to the generated code)');
+        FS.init.initialized = true;
+  
+        FS.ensureErrnoError();
   
         // Allow Module.stdin etc. to provide defaults, if none explicitly passed to us here
-        input ??= Module['stdin'];
-        output ??= Module['stdout'];
-        error ??= Module['stderr'];
+        Module['stdin'] = input || Module['stdin'];
+        Module['stdout'] = output || Module['stdout'];
+        Module['stderr'] = error || Module['stderr'];
   
-        FS.createStandardStreams(input, output, error);
+        FS.createStandardStreams();
       },
   quit() {
-        FS.initialized = false;
+        FS.init.initialized = false;
         // force-flush all streams, so we get musl std streams printed out
         _fflush(0);
         // close all of our streams
@@ -4079,7 +4224,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           },
           close(stream) {
             // flush any pending line data
-            if (output?.buffer?.length) {
+            if (output && output.buffer && output.buffer.length) {
               output(10);
             }
           },
@@ -4124,113 +4269,122 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         if (obj.isDevice || obj.isFolder || obj.link || obj.contents) return true;
         if (typeof XMLHttpRequest != 'undefined') {
           throw new Error("Lazy loading should have been performed (contents set) in createLazyFile, but it was not. Lazy loading only works in web workers. Use --embed-file or --preload-file in emcc on the main thread.");
-        } else { // Command-line.
+        } else if (read_) {
+          // Command-line.
           try {
-            obj.contents = readBinary(obj.url);
+            // WARNING: Can't read binary files in V8's d8 or tracemonkey's js, as
+            //          read() will try to parse UTF8.
+            obj.contents = intArrayFromString(read_(obj.url), true);
             obj.usedBytes = obj.contents.length;
           } catch (e) {
             throw new FS.ErrnoError(29);
           }
+        } else {
+          throw new Error('Cannot load without read() or XMLHttpRequest.');
         }
       },
   createLazyFile(parent, name, url, canRead, canWrite) {
-        // Lazy chunked Uint8Array (implements get and length from Uint8Array).
-        // Actual getting is abstracted away for eventual reuse.
-        class LazyUint8Array {
-          constructor() {
-            this.lengthKnown = false;
-            this.chunks = []; // Loaded chunks. Index is the chunk number
-          }
-          get(idx) {
-            if (idx > this.length-1 || idx < 0) {
-              return undefined;
-            }
-            var chunkOffset = idx % this.chunkSize;
-            var chunkNum = (idx / this.chunkSize)|0;
-            return this.getter(chunkNum)[chunkOffset];
-          }
-          setDataGetter(getter) {
-            this.getter = getter;
-          }
-          cacheLength() {
-            // Find length
+        // Lazy chunked Uint8Array (implements get and length from Uint8Array). Actual getting is abstracted away for eventual reuse.
+        /** @constructor */
+        function LazyUint8Array() {
+          this.lengthKnown = false;
+          this.chunks = []; // Loaded chunks. Index is the chunk number
+        }
+        LazyUint8Array.prototype.get = /** @this{Object} */ function LazyUint8Array_get(idx) {
+          if (idx > this.length-1 || idx < 0) {
+            return undefined;
+          }
+          var chunkOffset = idx % this.chunkSize;
+          var chunkNum = (idx / this.chunkSize)|0;
+          return this.getter(chunkNum)[chunkOffset];
+        };
+        LazyUint8Array.prototype.setDataGetter = function LazyUint8Array_setDataGetter(getter) {
+          this.getter = getter;
+        };
+        LazyUint8Array.prototype.cacheLength = function LazyUint8Array_cacheLength() {
+          // Find length
+          var xhr = new XMLHttpRequest();
+          xhr.open('HEAD', url, false);
+          xhr.send(null);
+          if (!(xhr.status >= 200 && xhr.status < 300 || xhr.status === 304)) throw new Error("Couldn't load " + url + ". Status: " + xhr.status);
+          var datalength = Number(xhr.getResponseHeader("Content-length"));
+          var header;
+          var hasByteServing = (header = xhr.getResponseHeader("Accept-Ranges")) && header === "bytes";
+          var usesGzip = (header = xhr.getResponseHeader("Content-Encoding")) && header === "gzip";
+  
+          var chunkSize = 1024*1024; // Chunk size in bytes
+  
+          if (!hasByteServing) chunkSize = datalength;
+  
+          // Function to get a range from the remote URL.
+          var doXHR = (from, to) => {
+            if (from > to) throw new Error("invalid range (" + from + ", " + to + ") or no bytes requested!");
+            if (to > datalength-1) throw new Error("only " + datalength + " bytes available! programmer error!");
+  
+            // TODO: Use mozResponseArrayBuffer, responseStream, etc. if available.
             var xhr = new XMLHttpRequest();
-            xhr.open('HEAD', url, false);
-            xhr.send(null);
-            if (!(xhr.status >= 200 && xhr.status < 300 || xhr.status === 304)) throw new Error("Couldn't load " + url + ". Status: " + xhr.status);
-            var datalength = Number(xhr.getResponseHeader("Content-length"));
-            var header;
-            var hasByteServing = (header = xhr.getResponseHeader("Accept-Ranges")) && header === "bytes";
-            var usesGzip = (header = xhr.getResponseHeader("Content-Encoding")) && header === "gzip";
-  
-            var chunkSize = 1024*1024; // Chunk size in bytes
-  
-            if (!hasByteServing) chunkSize = datalength;
-  
-            // Function to get a range from the remote URL.
-            var doXHR = (from, to) => {
-              if (from > to) throw new Error("invalid range (" + from + ", " + to + ") or no bytes requested!");
-              if (to > datalength-1) throw new Error("only " + datalength + " bytes available! programmer error!");
-  
-              // TODO: Use mozResponseArrayBuffer, responseStream, etc. if available.
-              var xhr = new XMLHttpRequest();
-              xhr.open('GET', url, false);
-              if (datalength !== chunkSize) xhr.setRequestHeader("Range", "bytes=" + from + "-" + to);
-  
-              // Some hints to the browser that we want binary data.
-              xhr.responseType = 'arraybuffer';
-              if (xhr.overrideMimeType) {
-                xhr.overrideMimeType('text/plain; charset=x-user-defined');
-              }
+            xhr.open('GET', url, false);
+            if (datalength !== chunkSize) xhr.setRequestHeader("Range", "bytes=" + from + "-" + to);
   
-              xhr.send(null);
-              if (!(xhr.status >= 200 && xhr.status < 300 || xhr.status === 304)) throw new Error("Couldn't load " + url + ". Status: " + xhr.status);
-              if (xhr.response !== undefined) {
-                return new Uint8Array(/** @type{Array<number>} */(xhr.response || []));
-              }
-              return intArrayFromString(xhr.responseText || '', true);
-            };
-            var lazyArray = this;
-            lazyArray.setDataGetter((chunkNum) => {
-              var start = chunkNum * chunkSize;
-              var end = (chunkNum+1) * chunkSize - 1; // including this byte
-              end = Math.min(end, datalength-1); // if datalength-1 is selected, this is the last block
-              if (typeof lazyArray.chunks[chunkNum] == 'undefined') {
-                lazyArray.chunks[chunkNum] = doXHR(start, end);
-              }
-              if (typeof lazyArray.chunks[chunkNum] == 'undefined') throw new Error('doXHR failed!');
-              return lazyArray.chunks[chunkNum];
-            });
-  
-            if (usesGzip || !datalength) {
-              // if the server uses gzip or doesn't supply the length, we have to download the whole file to get the (uncompressed) length
-              chunkSize = datalength = 1; // this will force getter(0)/doXHR do download the whole file
-              datalength = this.getter(0).length;
-              chunkSize = datalength;
-              out("LazyFiles on gzip forces download of the whole file when length is accessed");
+            // Some hints to the browser that we want binary data.
+            xhr.responseType = 'arraybuffer';
+            if (xhr.overrideMimeType) {
+              xhr.overrideMimeType('text/plain; charset=x-user-defined');
             }
   
-            this._length = datalength;
-            this._chunkSize = chunkSize;
-            this.lengthKnown = true;
-          }
-          get length() {
-            if (!this.lengthKnown) {
-              this.cacheLength();
+            xhr.send(null);
+            if (!(xhr.status >= 200 && xhr.status < 300 || xhr.status === 304)) throw new Error("Couldn't load " + url + ". Status: " + xhr.status);
+            if (xhr.response !== undefined) {
+              return new Uint8Array(/** @type{Array<number>} */(xhr.response || []));
             }
-            return this._length;
-          }
-          get chunkSize() {
-            if (!this.lengthKnown) {
-              this.cacheLength();
+            return intArrayFromString(xhr.responseText || '', true);
+          };
+          var lazyArray = this;
+          lazyArray.setDataGetter((chunkNum) => {
+            var start = chunkNum * chunkSize;
+            var end = (chunkNum+1) * chunkSize - 1; // including this byte
+            end = Math.min(end, datalength-1); // if datalength-1 is selected, this is the last block
+            if (typeof lazyArray.chunks[chunkNum] == 'undefined') {
+              lazyArray.chunks[chunkNum] = doXHR(start, end);
             }
-            return this._chunkSize;
+            if (typeof lazyArray.chunks[chunkNum] == 'undefined') throw new Error('doXHR failed!');
+            return lazyArray.chunks[chunkNum];
+          });
+  
+          if (usesGzip || !datalength) {
+            // if the server uses gzip or doesn't supply the length, we have to download the whole file to get the (uncompressed) length
+            chunkSize = datalength = 1; // this will force getter(0)/doXHR do download the whole file
+            datalength = this.getter(0).length;
+            chunkSize = datalength;
+            out("LazyFiles on gzip forces download of the whole file when length is accessed");
           }
-        }
   
+          this._length = datalength;
+          this._chunkSize = chunkSize;
+          this.lengthKnown = true;
+        };
         if (typeof XMLHttpRequest != 'undefined') {
           if (!ENVIRONMENT_IS_WORKER) throw 'Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc';
           var lazyArray = new LazyUint8Array();
+          Object.defineProperties(lazyArray, {
+            length: {
+              get: /** @this{Object} */ function() {
+                if (!this.lengthKnown) {
+                  this.cacheLength();
+                }
+                return this._length;
+              }
+            },
+            chunkSize: {
+              get: /** @this{Object} */ function() {
+                if (!this.lengthKnown) {
+                  this.cacheLength();
+                }
+                return this._chunkSize;
+              }
+            }
+          });
+  
           var properties = { isDevice: false, contents: lazyArray };
         } else {
           var properties = { isDevice: false, url: url };
@@ -4249,7 +4403,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // Add a function that defers querying the file size until it is asked the first time.
         Object.defineProperties(node, {
           usedBytes: {
-            get: function() { return this.contents.length; }
+            get: /** @this {FSNode} */ function() { return this.contents.length; }
           }
         });
         // override each stream op with one that tries to force load the lazy file first
@@ -4257,9 +4411,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var keys = Object.keys(node.stream_ops);
         keys.forEach((key) => {
           var fn = node.stream_ops[key];
-          stream_ops[key] = (...args) => {
+          stream_ops[key] = function forceLoadLazyFile() {
             FS.forceLoadFile(node);
-            return fn(...args);
+            return fn.apply(null, arguments);
           };
         });
         function writeChunks(stream, buffer, offset, length, position) {
@@ -4317,6 +4471,26 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       },
   };
   
+  
+    /**
+     * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the
+     * emscripten HEAP, returns a copy of that string as a Javascript String object.
+     *
+     * @param {number} ptr
+     * @param {number=} maxBytesToRead - An optional length that specifies the
+     *   maximum number of bytes to read. You can omit this parameter to scan the
+     *   string until the first 0 byte. If maxBytesToRead is passed, and the string
+     *   at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the
+     *   string will cut short at that byte index (i.e. maxBytesToRead will not
+     *   produce a string of exact length [ptr, ptr+maxBytesToRead[) N.B. mixing
+     *   frequent uses of UTF8ToString() with and without maxBytesToRead may throw
+     *   JS JIT optimizations off, so it is worth to consider consistently using one
+     * @return {string}
+     */
+  var UTF8ToString = (ptr, maxBytesToRead) => {
+      assert(typeof ptr == 'number', `UTF8ToString expects a number (got ${typeof ptr})`);
+      return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
+    };
   var SYSCALLS = {
   DEFAULT_POLLMASK:5,
   calculateAt(dirfd, path, allowEmpty) {
@@ -4340,7 +4514,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         return PATH.join2(dir, path);
       },
   doStat(func, path, buf) {
-        var stat = func(path);
+        try {
+          var stat = func(path);
+        } catch (e) {
+          if (e && e.node && PATH.normalize(path) !== PATH.normalize(FS.getPath(e.node))) {
+            // an error occurred while trying to look up the path; we should just report ENOTDIR
+            return -54;
+          }
+          throw e;
+        }
         HEAP32[((buf)>>2)] = stat.dev;
         HEAP32[(((buf)+(4))>>2)] = stat.mode;
         HEAPU32[(((buf)+(8))>>2)] = stat.nlink;
@@ -4354,11 +4536,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var mtime = stat.mtime.getTime();
         var ctime = stat.ctime.getTime();
         (tempI64 = [Math.floor(atime / 1000)>>>0,(tempDouble = Math.floor(atime / 1000),(+(Math.abs(tempDouble))) >= 1.0 ? (tempDouble > 0.0 ? (+(Math.floor((tempDouble)/4294967296.0)))>>>0 : (~~((+(Math.ceil((tempDouble - +(((~~(tempDouble)))>>>0))/4294967296.0)))))>>>0) : 0)], HEAP32[(((buf)+(40))>>2)] = tempI64[0],HEAP32[(((buf)+(44))>>2)] = tempI64[1]);
-        HEAPU32[(((buf)+(48))>>2)] = (atime % 1000) * 1000 * 1000;
+        HEAPU32[(((buf)+(48))>>2)] = (atime % 1000) * 1000;
         (tempI64 = [Math.floor(mtime / 1000)>>>0,(tempDouble = Math.floor(mtime / 1000),(+(Math.abs(tempDouble))) >= 1.0 ? (tempDouble > 0.0 ? (+(Math.floor((tempDouble)/4294967296.0)))>>>0 : (~~((+(Math.ceil((tempDouble - +(((~~(tempDouble)))>>>0))/4294967296.0)))))>>>0) : 0)], HEAP32[(((buf)+(56))>>2)] = tempI64[0],HEAP32[(((buf)+(60))>>2)] = tempI64[1]);
-        HEAPU32[(((buf)+(64))>>2)] = (mtime % 1000) * 1000 * 1000;
+        HEAPU32[(((buf)+(64))>>2)] = (mtime % 1000) * 1000;
         (tempI64 = [Math.floor(ctime / 1000)>>>0,(tempDouble = Math.floor(ctime / 1000),(+(Math.abs(tempDouble))) >= 1.0 ? (tempDouble > 0.0 ? (+(Math.floor((tempDouble)/4294967296.0)))>>>0 : (~~((+(Math.ceil((tempDouble - +(((~~(tempDouble)))>>>0))/4294967296.0)))))>>>0) : 0)], HEAP32[(((buf)+(72))>>2)] = tempI64[0],HEAP32[(((buf)+(76))>>2)] = tempI64[1]);
-        HEAPU32[(((buf)+(80))>>2)] = (ctime % 1000) * 1000 * 1000;
+        HEAPU32[(((buf)+(80))>>2)] = (ctime % 1000) * 1000;
         (tempI64 = [stat.ino>>>0,(tempDouble = stat.ino,(+(Math.abs(tempDouble))) >= 1.0 ? (tempDouble > 0.0 ? (+(Math.floor((tempDouble)/4294967296.0)))>>>0 : (~~((+(Math.ceil((tempDouble - +(((~~(tempDouble)))>>>0))/4294967296.0)))))>>>0) : 0)], HEAP32[(((buf)+(88))>>2)] = tempI64[0],HEAP32[(((buf)+(92))>>2)] = tempI64[1]);
         return 0;
       },
@@ -4373,15 +4555,23 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var buffer = HEAPU8.slice(addr, addr + len);
         FS.msync(stream, buffer, offset, len, flags);
       },
-  getStreamFromFD(fd) {
-        var stream = FS.getStreamChecked(fd);
-        return stream;
-      },
   varargs:undefined,
+  get() {
+        assert(SYSCALLS.varargs != undefined);
+        // the `+` prepended here is necessary to convince the JSCompiler that varargs is indeed a number.
+        var ret = HEAP32[((+SYSCALLS.varargs)>>2)];
+        SYSCALLS.varargs += 4;
+        return ret;
+      },
+  getp() { return SYSCALLS.get() },
   getStr(ptr) {
         var ret = UTF8ToString(ptr);
         return ret;
       },
+  getStreamFromFD(fd) {
+        var stream = FS.getStreamChecked(fd);
+        return stream;
+      },
   };
   function ___syscall_chdir(path) {
   try {
@@ -4395,16 +4585,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   }
   }
 
-  /** @suppress {duplicate } */
-  function syscallGetVarargI() {
-      assert(SYSCALLS.varargs != undefined);
-      // the `+` prepended here is necessary to convince the JSCompiler that varargs is indeed a number.
-      var ret = HEAP32[((+SYSCALLS.varargs)>>2)];
-      SYSCALLS.varargs += 4;
-      return ret;
-    }
-  var syscallGetVarargP = syscallGetVarargI;
-  
+  var setErrNo = (value) => {
+      HEAP32[((___errno_location())>>2)] = value;
+      return value;
+    };
   
   function ___syscall_fcntl64(fd, cmd, varargs) {
   SYSCALLS.varargs = varargs;
@@ -4413,7 +4597,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       var stream = SYSCALLS.getStreamFromFD(fd);
       switch (cmd) {
         case 0: {
-          var arg = syscallGetVarargI();
+          var arg = SYSCALLS.get();
           if (arg < 0) {
             return -28;
           }
@@ -4421,7 +4605,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             arg++;
           }
           var newStream;
-          newStream = FS.dupStream(stream, arg);
+          newStream = FS.createStream(stream, arg);
           return newStream.fd;
         }
         case 1:
@@ -4430,22 +4614,31 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         case 3:
           return stream.flags;
         case 4: {
-          var arg = syscallGetVarargI();
+          var arg = SYSCALLS.get();
           stream.flags |= arg;
           return 0;
         }
-        case 12: {
-          var arg = syscallGetVarargP();
+        case 5: {
+          var arg = SYSCALLS.getp();
           var offset = 0;
           // We're always unlocked.
           HEAP16[(((arg)+(offset))>>1)] = 2;
           return 0;
         }
-        case 13:
-        case 14:
+        case 6:
+        case 7:
           return 0; // Pretend that the locking is successful.
+        case 16:
+        case 8:
+          return -28; // These are for sockets. We don't have them fully implemented yet.
+        case 9:
+          // musl trusts getown return values, due to a bug where they must be, as they overlap with errors. just return -1 here, so fcntl() returns that, and we set errno ourselves.
+          setErrNo(28);
+          return -1;
+        default: {
+          return -28;
+        }
       }
-      return -28;
     } catch (e) {
     if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e;
     return -e.errno;
@@ -4468,6 +4661,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
       return stringToUTF8Array(str, HEAPU8, outPtr, maxBytesToWrite);
     };
+  
   function ___syscall_getcwd(buf, size) {
   try {
   
@@ -4488,7 +4682,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   try {
   
       var stream = SYSCALLS.getStreamFromFD(fd)
-      stream.getdents ||= FS.readdir(stream.path);
+      if (!stream.getdents) {
+        stream.getdents = FS.readdir(stream.path);
+      }
   
       var struct_size = 280;
       var pos = 0;
@@ -4521,7 +4717,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         (tempI64 = [id>>>0,(tempDouble = id,(+(Math.abs(tempDouble))) >= 1.0 ? (tempDouble > 0.0 ? (+(Math.floor((tempDouble)/4294967296.0)))>>>0 : (~~((+(Math.ceil((tempDouble - +(((~~(tempDouble)))>>>0))/4294967296.0)))))>>>0) : 0)], HEAP32[((dirp + pos)>>2)] = tempI64[0],HEAP32[(((dirp + pos)+(4))>>2)] = tempI64[1]);
         (tempI64 = [(idx + 1) * struct_size>>>0,(tempDouble = (idx + 1) * struct_size,(+(Math.abs(tempDouble))) >= 1.0 ? (tempDouble > 0.0 ? (+(Math.floor((tempDouble)/4294967296.0)))>>>0 : (~~((+(Math.ceil((tempDouble - +(((~~(tempDouble)))>>>0))/4294967296.0)))))>>>0) : 0)], HEAP32[(((dirp + pos)+(8))>>2)] = tempI64[0],HEAP32[(((dirp + pos)+(12))>>2)] = tempI64[1]);
         HEAP16[(((dirp + pos)+(16))>>1)] = 280;
-        HEAP8[(dirp + pos)+(18)] = type;
+        HEAP8[(((dirp + pos)+(18))>>0)] = type;
         stringToUTF8(name, dirp + pos + 19, 256);
         pos += struct_size;
         idx += 1;
@@ -4534,7 +4730,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   }
   }
 
-  
   function ___syscall_ioctl(fd, op, varargs) {
   SYSCALLS.varargs = varargs;
   try {
@@ -4549,13 +4744,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           if (!stream.tty) return -59;
           if (stream.tty.ops.ioctl_tcgets) {
             var termios = stream.tty.ops.ioctl_tcgets(stream);
-            var argp = syscallGetVarargP();
+            var argp = SYSCALLS.getp();
             HEAP32[((argp)>>2)] = termios.c_iflag || 0;
             HEAP32[(((argp)+(4))>>2)] = termios.c_oflag || 0;
             HEAP32[(((argp)+(8))>>2)] = termios.c_cflag || 0;
             HEAP32[(((argp)+(12))>>2)] = termios.c_lflag || 0;
             for (var i = 0; i < 32; i++) {
-              HEAP8[(argp + i)+(17)] = termios.c_cc[i] || 0;
+              HEAP8[(((argp + i)+(17))>>0)] = termios.c_cc[i] || 0;
             }
             return 0;
           }
@@ -4572,14 +4767,14 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         case 21508: {
           if (!stream.tty) return -59;
           if (stream.tty.ops.ioctl_tcsets) {
-            var argp = syscallGetVarargP();
+            var argp = SYSCALLS.getp();
             var c_iflag = HEAP32[((argp)>>2)];
             var c_oflag = HEAP32[(((argp)+(4))>>2)];
             var c_cflag = HEAP32[(((argp)+(8))>>2)];
             var c_lflag = HEAP32[(((argp)+(12))>>2)];
             var c_cc = []
             for (var i = 0; i < 32; i++) {
-              c_cc.push(HEAP8[(argp + i)+(17)]);
+              c_cc.push(HEAP8[(((argp + i)+(17))>>0)]);
             }
             return stream.tty.ops.ioctl_tcsets(stream.tty, op, { c_iflag, c_oflag, c_cflag, c_lflag, c_cc });
           }
@@ -4587,7 +4782,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
         case 21519: {
           if (!stream.tty) return -59;
-          var argp = syscallGetVarargP();
+          var argp = SYSCALLS.getp();
           HEAP32[((argp)>>2)] = 0;
           return 0;
         }
@@ -4596,7 +4791,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           return -28; // not supported
         }
         case 21531: {
-          var argp = syscallGetVarargP();
+          var argp = SYSCALLS.getp();
           return FS.ioctl(stream, op, argp);
         }
         case 21523: {
@@ -4605,7 +4800,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           if (!stream.tty) return -59;
           if (stream.tty.ops.ioctl_tiocgwinsz) {
             var winsize = stream.tty.ops.ioctl_tiocgwinsz(stream.tty);
-            var argp = syscallGetVarargP();
+            var argp = SYSCALLS.getp();
             HEAP16[((argp)>>1)] = winsize[0];
             HEAP16[(((argp)+(2))>>1)] = winsize[1];
           }
@@ -4674,14 +4869,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   }
   }
 
-  
   function ___syscall_openat(dirfd, path, flags, varargs) {
   SYSCALLS.varargs = varargs;
   try {
   
       path = SYSCALLS.getStr(path);
       path = SYSCALLS.calculateAt(dirfd, path);
-      var mode = varargs ? syscallGetVarargI() : 0;
+      var mode = varargs ? SYSCALLS.get() : 0;
       return FS.open(path, flags, mode).fd;
     } catch (e) {
     if (typeof FS == 'undefined' || !(e.name === 'ErrnoError')) throw e;
@@ -4800,8 +4994,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var nowIsMonotonic = 1;
   var __emscripten_get_now_is_monotonic = () => nowIsMonotonic;
 
-  var __emscripten_memcpy_js = (dest, src, num) => HEAPU8.copyWithin(dest, src, src + num);
-
   var __emscripten_throw_longjmp = () => {
       throw Infinity;
     };
@@ -4846,10 +5038,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
               }
             };
             addEventListener("message", Browser_setImmediate_messageHandler, true);
-            Browser.setImmediate = /** @type{function(function(): ?, ...?): number} */((func) => {
+            Browser.setImmediate = /** @type{function(function(): ?, ...?): number} */(function Browser_emulated_setImmediate(func) {
               setImmediates.push(func);
               if (ENVIRONMENT_IS_WORKER) {
-                Module['setImmediates'] ??= [];
+                if (Module['setImmediates'] === undefined) Module['setImmediates'] = [];
                 Module['setImmediates'].push(func);
                 postMessage({target: emscriptenMainLoopMessageId}); // In --proxy-to-worker, route the message via proxyClient.js
               } else postMessage(emscriptenMainLoopMessageId, "*"); // On the main thread, can just send the message to itself.
@@ -4874,70 +5066,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   ;
   
   
-  var runtimeKeepaliveCounter = 0;
-  var keepRuntimeAlive = () => noExitRuntime || runtimeKeepaliveCounter > 0;
-  var _proc_exit = (code) => {
-      EXITSTATUS = code;
-      if (!keepRuntimeAlive()) {
-        Module['onExit']?.(code);
-        ABORT = true;
-      }
-      quit_(code, new ExitStatus(code));
-    };
-  
-  /** @suppress {duplicate } */
-  /** @param {boolean|number=} implicit */
-  var exitJS = (status, implicit) => {
-      EXITSTATUS = status;
-  
-      checkUnflushedContent();
-  
-      // if exit() was called explicitly, warn the user if the runtime isn't actually being shut down
-      if (keepRuntimeAlive() && !implicit) {
-        var msg = `program exited (with status: ${status}), but keepRuntimeAlive() is set (counter=${runtimeKeepaliveCounter}) due to an async operation, so halting execution but not exiting the runtime or preventing further async execution (you can use emscripten_force_exit, if you want to force a true shutdown)`;
-        readyPromiseReject(msg);
-        err(msg);
-      }
-  
-      _proc_exit(status);
-    };
-  var _exit = exitJS;
-  
-  var handleException = (e) => {
-      // Certain exception types we do not treat as errors since they are used for
-      // internal control flow.
-      // 1. ExitStatus, which is thrown by exit()
-      // 2. "unwind", which is thrown by emscripten_unwind_to_js_event_loop() and others
-      //    that wish to return to JS event loop.
-      if (e instanceof ExitStatus || e == 'unwind') {
-        return EXITSTATUS;
-      }
-      checkStackCookie();
-      if (e instanceof WebAssembly.RuntimeError) {
-        if (_emscripten_stack_get_current() <= 0) {
-          err('Stack overflow detected.  You can try increasing -sSTACK_SIZE (currently set to 67108864)');
-        }
-      }
-      quit_(1, e);
-    };
-  
-  var maybeExit = () => {
-      if (!keepRuntimeAlive()) {
-        try {
-          _exit(EXITSTATUS);
-        } catch (e) {
-          handleException(e);
-        }
-      }
-    };
-  
-  
     /**
      * @param {number=} arg
      * @param {boolean=} noSetTiming
      */
   var setMainLoop = (browserIterationFunc, fps, simulateInfiniteLoop, arg, noSetTiming) => {
       assert(!Browser.mainLoop.func, 'emscripten_set_main_loop: there can only be one main loop function at once: call emscripten_cancel_main_loop to cancel the previous one before setting a new one with different parameters.');
+  
       Browser.mainLoop.func = browserIterationFunc;
       Browser.mainLoop.arg = arg;
   
@@ -4954,7 +5089,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       function checkIsRunning() {
         if (thisMainLoopId < Browser.mainLoop.currentlyRunningMainloop) {
           
-          maybeExit();
           return false;
         }
         return true;
@@ -5024,7 +5158,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // to queue the newest produced audio samples.
         // TODO: Consider adding pre- and post- rAF callbacks so that GL.newRenderingFrameStarted() and SDL.audio.queueNewAudioData()
         //       do not need to be hardcoded into this function, but can be more generic.
-        if (typeof SDL == 'object') SDL.audio?.queueNewAudioData?.();
+        if (typeof SDL == 'object' && SDL.audio && SDL.audio.queueNewAudioData) SDL.audio.queueNewAudioData();
   
         Browser.mainLoop.scheduler();
       }
@@ -5045,7 +5179,65 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       }
     };
   
+  var handleException = (e) => {
+      // Certain exception types we do not treat as errors since they are used for
+      // internal control flow.
+      // 1. ExitStatus, which is thrown by exit()
+      // 2. "unwind", which is thrown by emscripten_unwind_to_js_event_loop() and others
+      //    that wish to return to JS event loop.
+      if (e instanceof ExitStatus || e == 'unwind') {
+        return EXITSTATUS;
+      }
+      checkStackCookie();
+      if (e instanceof WebAssembly.RuntimeError) {
+        if (_emscripten_stack_get_current() <= 0) {
+          err('Stack overflow detected.  You can try increasing -sSTACK_SIZE (currently set to 67108864)');
+        }
+      }
+      quit_(1, e);
+    };
+  
+  
+  var runtimeKeepaliveCounter = 0;
+  var keepRuntimeAlive = () => noExitRuntime || runtimeKeepaliveCounter > 0;
+  
+  var _proc_exit = (code) => {
+      EXITSTATUS = code;
+      if (!keepRuntimeAlive()) {
+        if (Module['onExit']) Module['onExit'](code);
+        ABORT = true;
+      }
+      quit_(code, new ExitStatus(code));
+    };
+  
+  /** @suppress {duplicate } */
+  /** @param {boolean|number=} implicit */
+  var exitJS = (status, implicit) => {
+      EXITSTATUS = status;
+  
+      checkUnflushedContent();
+  
+      // if exit() was called explicitly, warn the user if the runtime isn't actually being shut down
+      if (keepRuntimeAlive() && !implicit) {
+        var msg = `program exited (with status: ${status}), but keepRuntimeAlive() is set (counter=${runtimeKeepaliveCounter}) due to an async operation, so halting execution but not exiting the runtime or preventing further async execution (you can use emscripten_force_exit, if you want to force a true shutdown)`;
+        readyPromiseReject(msg);
+        err(msg);
+      }
+  
+      _proc_exit(status);
+    };
+  var _exit = exitJS;
+  
   
+  var maybeExit = () => {
+      if (!keepRuntimeAlive()) {
+        try {
+          _exit(EXITSTATUS);
+        } catch (e) {
+          handleException(e);
+        }
+      }
+    };
   var callUserCallback = (func) => {
       if (ABORT) {
         err('user callback triggered after runtime exited or application aborted.  Ignoring.');
@@ -5106,7 +5298,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             var expected = Browser.mainLoop.expectedBlockers;
             if (remaining) {
               if (remaining < expected) {
-                Module['setStatus'](`{message} ({expected - remaining}/{expected})`);
+                Module['setStatus'](message + ' (' + (expected - remaining) + '/' + expected + ')');
               } else {
                 Module['setStatus'](message);
               }
@@ -5124,10 +5316,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             }
           }
           callUserCallback(func);
-          Module['postMainLoop']?.();
+          if (Module['postMainLoop']) Module['postMainLoop']();
         },
   },
-  useWebGL:false,
   isFullscreen:false,
   pointerLock:false,
   moduleContextCreatedCallbacks:[],
@@ -5146,7 +5337,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
         var imagePlugin = {};
         imagePlugin['canHandle'] = function imagePlugin_canHandle(name) {
-          return !Module['noImageDecoding'] && /\.(jpg|jpeg|png|bmp|webp)$/i.test(name);
+          return !Module.noImageDecoding && /\.(jpg|jpeg|png|bmp)$/i.test(name);
         };
         imagePlugin['handle'] = function imagePlugin_handle(byteArray, name, onload, onerror) {
           var b = new Blob([byteArray], { type: Browser.getMimetype(name) });
@@ -5166,11 +5357,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             ctx.drawImage(img, 0, 0);
             preloadedImages[name] = canvas;
             URL.revokeObjectURL(url);
-            onload?.(byteArray);
+            if (onload) onload(byteArray);
           };
           img.onerror = (event) => {
             err(`Image ${url} could not be decoded`);
-            onerror?.();
+            if (onerror) onerror();
           };
           img.src = url;
         };
@@ -5178,7 +5369,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
         var audioPlugin = {};
         audioPlugin['canHandle'] = function audioPlugin_canHandle(name) {
-          return !Module['noAudioDecoding'] && name.substr(-4) in { '.ogg': 1, '.wav': 1, '.mp3': 1 };
+          return !Module.noAudioDecoding && name.substr(-4) in { '.ogg': 1, '.wav': 1, '.mp3': 1 };
         };
         audioPlugin['handle'] = function audioPlugin_handle(byteArray, name, onload, onerror) {
           var done = false;
@@ -5186,13 +5377,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             if (done) return;
             done = true;
             preloadedAudios[name] = audio;
-            onload?.(byteArray);
+            if (onload) onload(byteArray);
           }
           function fail() {
             if (done) return;
             done = true;
             preloadedAudios[name] = new Audio(); // empty shim
-            onerror?.();
+            if (onerror) onerror();
           }
           var b = new Blob([byteArray], { type: Browser.getMimetype(name) });
           var url = URL.createObjectURL(b); // XXX we never revoke this!
@@ -5313,14 +5504,16 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
         if (setInModule) {
           if (!useWebGL) assert(typeof GLctx == 'undefined', 'cannot set in module if GLctx is used, but we are a non-GL context that would replace it');
+  
           Module.ctx = ctx;
           if (useWebGL) GL.makeContextCurrent(contextHandle);
-          Browser.useWebGL = useWebGL;
+          Module.useWebGL = useWebGL;
           Browser.moduleContextCreatedCallbacks.forEach((callback) => callback());
           Browser.init();
         }
         return ctx;
       },
+  destroyContext(canvas, useWebGL, setInModule) {},
   fullscreenHandlersInstalled:false,
   lockPointer:undefined,
   resizeCanvas:undefined,
@@ -5356,8 +5549,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
               Browser.updateCanvasDimensions(canvas);
             }
           }
-          Module['onFullScreen']?.(Browser.isFullscreen);
-          Module['onFullscreen']?.(Browser.isFullscreen);
+          if (Module['onFullScreen']) Module['onFullScreen'](Browser.isFullscreen);
+          if (Module['onFullscreen']) Module['onFullscreen'](Browser.isFullscreen);
         }
   
         if (!Browser.fullscreenHandlersInstalled) {
@@ -5449,8 +5642,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }[name.substr(name.lastIndexOf('.')+1)];
       },
   getUserMedia(func) {
-        window.getUserMedia ||= navigator['getUserMedia'] ||
+        if (!window.getUserMedia) {
+          window.getUserMedia = navigator['getUserMedia'] ||
                                 navigator['mozGetUserMedia'];
+        }
         window.getUserMedia(func);
       },
   getMovementX(event) {
@@ -5508,39 +5703,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   },
   lastTouches:{
   },
-  calculateMouseCoords(pageX, pageY) {
-        // Calculate the movement based on the changes
-        // in the coordinates.
-        var rect = Module["canvas"].getBoundingClientRect();
-        var cw = Module["canvas"].width;
-        var ch = Module["canvas"].height;
-  
-        // Neither .scrollX or .pageXOffset are defined in a spec, but
-        // we prefer .scrollX because it is currently in a spec draft.
-        // (see: http://www.w3.org/TR/2013/WD-cssom-view-20131217/)
-        var scrollX = ((typeof window.scrollX != 'undefined') ? window.scrollX : window.pageXOffset);
-        var scrollY = ((typeof window.scrollY != 'undefined') ? window.scrollY : window.pageYOffset);
-        // If this assert lands, it's likely because the browser doesn't support scrollX or pageXOffset
-        // and we have no viable fallback.
-        assert((typeof scrollX != 'undefined') && (typeof scrollY != 'undefined'), 'Unable to retrieve scroll position, mouse positions likely broken.');
-        var adjustedX = pageX - (scrollX + rect.left);
-        var adjustedY = pageY - (scrollY + rect.top);
-  
-        // the canvas might be CSS-scaled compared to its backbuffer;
-        // SDL-using content will want mouse coordinates in terms
-        // of backbuffer units.
-        adjustedX = adjustedX * (cw / rect.width);
-        adjustedY = adjustedY * (ch / rect.height);
-  
-        return { x: adjustedX, y: adjustedY };
-      },
-  setMouseCoords(pageX, pageY) {
-        const {x, y} = Browser.calculateMouseCoords(pageX, pageY);
-        Browser.mouseMovementX = x - Browser.mouseX;
-        Browser.mouseMovementY = y - Browser.mouseY;
-        Browser.mouseX = x;
-        Browser.mouseY = y;
-      },
   calculateMouseEvent(event) { // event should be mousemove, mousedown or mouseup
         if (Browser.pointerLock) {
           // When the pointer is locked, calculate the coordinates
@@ -5554,31 +5716,71 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             Browser.mouseMovementY = Browser.getMovementY(event);
           }
   
-          // add the mouse delta to the current absolute mouse position
-          Browser.mouseX += Browser.mouseMovementX;
-          Browser.mouseY += Browser.mouseMovementY;
+          // check if SDL is available
+          if (typeof SDL != "undefined") {
+            Browser.mouseX = SDL.mouseX + Browser.mouseMovementX;
+            Browser.mouseY = SDL.mouseY + Browser.mouseMovementY;
+          } else {
+            // just add the mouse delta to the current absolut mouse position
+            // FIXME: ideally this should be clamped against the canvas size and zero
+            Browser.mouseX += Browser.mouseMovementX;
+            Browser.mouseY += Browser.mouseMovementY;
+          }
         } else {
+          // Otherwise, calculate the movement based on the changes
+          // in the coordinates.
+          var rect = Module["canvas"].getBoundingClientRect();
+          var cw = Module["canvas"].width;
+          var ch = Module["canvas"].height;
+  
+          // Neither .scrollX or .pageXOffset are defined in a spec, but
+          // we prefer .scrollX because it is currently in a spec draft.
+          // (see: http://www.w3.org/TR/2013/WD-cssom-view-20131217/)
+          var scrollX = ((typeof window.scrollX != 'undefined') ? window.scrollX : window.pageXOffset);
+          var scrollY = ((typeof window.scrollY != 'undefined') ? window.scrollY : window.pageYOffset);
+          // If this assert lands, it's likely because the browser doesn't support scrollX or pageXOffset
+          // and we have no viable fallback.
+          assert((typeof scrollX != 'undefined') && (typeof scrollY != 'undefined'), 'Unable to retrieve scroll position, mouse positions likely broken.');
+  
           if (event.type === 'touchstart' || event.type === 'touchend' || event.type === 'touchmove') {
             var touch = event.touch;
             if (touch === undefined) {
               return; // the "touch" property is only defined in SDL
   
             }
-            var coords = Browser.calculateMouseCoords(touch.pageX, touch.pageY);
+            var adjustedX = touch.pageX - (scrollX + rect.left);
+            var adjustedY = touch.pageY - (scrollY + rect.top);
+  
+            adjustedX = adjustedX * (cw / rect.width);
+            adjustedY = adjustedY * (ch / rect.height);
+  
+            var coords = { x: adjustedX, y: adjustedY };
   
             if (event.type === 'touchstart') {
               Browser.lastTouches[touch.identifier] = coords;
               Browser.touches[touch.identifier] = coords;
             } else if (event.type === 'touchend' || event.type === 'touchmove') {
               var last = Browser.touches[touch.identifier];
-              last ||= coords;
+              if (!last) last = coords;
               Browser.lastTouches[touch.identifier] = last;
               Browser.touches[touch.identifier] = coords;
             }
             return;
           }
   
-          Browser.setMouseCoords(event.pageX, event.pageY);
+          var x = event.pageX - (scrollX + rect.left);
+          var y = event.pageY - (scrollY + rect.top);
+  
+          // the canvas might be CSS-scaled compared to its backbuffer;
+          // SDL-using content will want mouse coordinates in terms
+          // of backbuffer units.
+          x = x * (cw / rect.width);
+          y = y * (ch / rect.height);
+  
+          Browser.mouseMovementX = x - Browser.mouseX;
+          Browser.mouseMovementY = y - Browser.mouseY;
+          Browser.mouseX = x;
+          Browser.mouseY = y;
         }
       },
   resizeListeners:[],
@@ -5741,14 +5943,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return EGL.chooseConfig(display, attrib_list, configs, config_size, numConfigs);
     };
 
-  var GLctx;
-  
   var webgl_enable_ANGLE_instanced_arrays = (ctx) => {
       // Extension available in WebGL 1 from Firefox 26 and Google Chrome 30 onwards. Core feature in WebGL 2.
       var ext = ctx.getExtension('ANGLE_instanced_arrays');
-      // Because this extension is a core function in WebGL 2, assign the extension entry points in place of
-      // where the core functions will reside in WebGL 2. This way the calling code can call these without
-      // having to dynamically branch depending if running against WebGL 1 or WebGL 2.
       if (ext) {
         ctx['vertexAttribDivisor'] = (index, divisor) => ext['vertexAttribDivisorANGLE'](index, divisor);
         ctx['drawArraysInstanced'] = (mode, first, count, primcount) => ext['drawArraysInstancedANGLE'](mode, first, count, primcount);
@@ -5778,71 +5975,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       }
     };
   
-  var webgl_enable_EXT_polygon_offset_clamp = (ctx) => {
-      return !!(ctx.extPolygonOffsetClamp = ctx.getExtension('EXT_polygon_offset_clamp'));
-    };
-  
-  var webgl_enable_EXT_clip_control = (ctx) => {
-      return !!(ctx.extClipControl = ctx.getExtension('EXT_clip_control'));
-    };
-  
-  var webgl_enable_WEBGL_polygon_mode = (ctx) => {
-      return !!(ctx.webglPolygonMode = ctx.getExtension('WEBGL_polygon_mode'));
-    };
-  
   var webgl_enable_WEBGL_multi_draw = (ctx) => {
       // Closure is expected to be allowed to minify the '.multiDrawWebgl' property, so not accessing it quoted.
       return !!(ctx.multiDrawWebgl = ctx.getExtension('WEBGL_multi_draw'));
     };
   
-  var getEmscriptenSupportedExtensions = (ctx) => {
-      // Restrict the list of advertised extensions to those that we actually
-      // support.
-      var supportedExtensions = [
-        // WebGL 1 extensions
-        'ANGLE_instanced_arrays',
-        'EXT_blend_minmax',
-        'EXT_disjoint_timer_query',
-        'EXT_frag_depth',
-        'EXT_shader_texture_lod',
-        'EXT_sRGB',
-        'OES_element_index_uint',
-        'OES_fbo_render_mipmap',
-        'OES_standard_derivatives',
-        'OES_texture_float',
-        'OES_texture_half_float',
-        'OES_texture_half_float_linear',
-        'OES_vertex_array_object',
-        'WEBGL_color_buffer_float',
-        'WEBGL_depth_texture',
-        'WEBGL_draw_buffers',
-        // WebGL 1 and WebGL 2 extensions
-        'EXT_clip_control',
-        'EXT_color_buffer_half_float',
-        'EXT_depth_clamp',
-        'EXT_float_blend',
-        'EXT_polygon_offset_clamp',
-        'EXT_texture_compression_bptc',
-        'EXT_texture_compression_rgtc',
-        'EXT_texture_filter_anisotropic',
-        'KHR_parallel_shader_compile',
-        'OES_texture_float_linear',
-        'WEBGL_blend_func_extended',
-        'WEBGL_compressed_texture_astc',
-        'WEBGL_compressed_texture_etc',
-        'WEBGL_compressed_texture_etc1',
-        'WEBGL_compressed_texture_s3tc',
-        'WEBGL_compressed_texture_s3tc_srgb',
-        'WEBGL_debug_renderer_info',
-        'WEBGL_debug_shaders',
-        'WEBGL_lose_context',
-        'WEBGL_multi_draw',
-        'WEBGL_polygon_mode'
-      ];
-      // .getSupportedExtensions() can return null if context is lost, so coerce to empty array.
-      return (ctx.getSupportedExtensions() || []).filter(ext => supportedExtensions.includes(ext));
-    };
-  
   
   var GL = {
   counter:1,
@@ -5860,8 +5997,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   stringCache:{
   },
   unpackAlignment:4,
-  unpackRowLength:0,
-  recordError:(errorCode) => {
+  recordError:function recordError(errorCode) {
         if (!GL.lastError) {
           GL.lastError = errorCode;
         }
@@ -5873,25 +6009,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
         return ret;
       },
-  genObject:(n, buffers, createFunction, objectTable
-        ) => {
-        for (var i = 0; i < n; i++) {
-          var buffer = GLctx[createFunction]();
-          var id = buffer && GL.getNewId(objectTable);
-          if (buffer) {
-            buffer.name = id;
-            objectTable[id] = buffer;
-          } else {
-            GL.recordError(0x502 /* GL_INVALID_OPERATION */);
-          }
-          HEAP32[(((buffers)+(i*4))>>2)] = id;
-        }
-      },
   getSource:(shader, count, string, length) => {
         var source = '';
         for (var i = 0; i < count; ++i) {
-          var len = length ? HEAPU32[(((length)+(i*4))>>2)] : undefined;
-          source += UTF8ToString(HEAPU32[(((string)+(i*4))>>2)], len);
+          var len = length ? HEAP32[(((length)+(i*4))>>2)] : -1;
+          source += UTF8ToString(HEAP32[(((string)+(i*4))>>2)], len < 0 ? undefined : len);
         }
         return source;
       },
@@ -5953,7 +6075,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // Active Emscripten GL layer context object.
         GL.currentContext = GL.contexts[contextHandle];
         // Active WebGL context object.
-        Module.ctx = GLctx = GL.currentContext?.GLctx;
+        Module.ctx = GLctx = GL.currentContext && GL.currentContext.GLctx;
         return !(contextHandle && !GLctx);
       },
   getContext:(contextHandle) => {
@@ -5978,7 +6100,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   initExtensions:(context) => {
         // If this function is called without a specific context object, init the
         // extensions of the currently active context.
-        context ||= GL.currentContext;
+        if (!context) context = GL.currentContext;
   
         if (context.initExtensionsDone) return;
         context.initExtensionsDone = true;
@@ -5988,21 +6110,22 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // Detect the presence of a few extensions manually, ction GL interop
         // layer itself will need to know if they exist.
   
-        // Extensions that are available in both WebGL 1 and WebGL 2
-        webgl_enable_WEBGL_multi_draw(GLctx);
-        webgl_enable_EXT_polygon_offset_clamp(GLctx);
-        webgl_enable_EXT_clip_control(GLctx);
-        webgl_enable_WEBGL_polygon_mode(GLctx);
         // Extensions that are only available in WebGL 1 (the calls will be no-ops
         // if called on a WebGL 2 context active)
         webgl_enable_ANGLE_instanced_arrays(GLctx);
         webgl_enable_OES_vertex_array_object(GLctx);
         webgl_enable_WEBGL_draw_buffers(GLctx);
+  
         {
           GLctx.disjointTimerQueryExt = GLctx.getExtension("EXT_disjoint_timer_query");
         }
   
-        getEmscriptenSupportedExtensions(GLctx).forEach((ext) => {
+        webgl_enable_WEBGL_multi_draw(GLctx);
+  
+        // .getSupportedExtensions() can return null if context is lost, so coerce
+        // to empty array.
+        var exts = GLctx.getSupportedExtensions() || [];
+        exts.forEach((ext) => {
           // WEBGL_lose_context, WEBGL_debug_renderer_info and WEBGL_debug_shaders
           // are not enabled by default.
           if (!ext.includes('lose_context') && !ext.includes('debug')) {
@@ -6011,6 +6134,12 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           }
         });
       },
+  getExtensions() {
+        // .getSupportedExtensions() can return null if context is lost, so coerce to empty array.
+        var exts = GLctx.getSupportedExtensions() || [];
+        exts = exts.concat(exts.map((e) => "GL_" + e));
+        return exts;
+      },
   };
   
   var _eglCreateContext = (display, config, hmm, contextAttribs) => {
@@ -6050,8 +6179,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
         // Run callbacks so that GL emulation works
         GL.makeContextCurrent(EGL.context);
-        Browser.useWebGL = true;
-        Browser.moduleContextCreatedCallbacks.forEach((callback) => callback());
+        Module.useWebGL = true;
+        Browser.moduleContextCreatedCallbacks.forEach(function(callback) { callback() });
   
         // Note: This function only creates a context, but it shall not make it active.
         GL.makeContextCurrent(null);
@@ -6408,36 +6537,44 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var runEmAsmFunction = (code, sigPtr, argbuf) => {
       var args = readEmAsmArgs(sigPtr, argbuf);
       assert(ASM_CONSTS.hasOwnProperty(code), `No EM_ASM constant found at address ${code}.  The loaded WebAssembly file is likely out of sync with the generated JavaScript.`);
-      return ASM_CONSTS[code](...args);
+      return ASM_CONSTS[code].apply(null, args);
     };
   var _emscripten_asm_const_int = (code, sigPtr, argbuf) => {
       return runEmAsmFunction(code, sigPtr, argbuf);
     };
 
-  var runMainThreadEmAsm = (emAsmAddr, sigPtr, argbuf, sync) => {
+  var runMainThreadEmAsm = (code, sigPtr, argbuf, sync) => {
       var args = readEmAsmArgs(sigPtr, argbuf);
-      assert(ASM_CONSTS.hasOwnProperty(emAsmAddr), `No EM_ASM constant found at address ${emAsmAddr}.  The loaded WebAssembly file is likely out of sync with the generated JavaScript.`);
-      return ASM_CONSTS[emAsmAddr](...args);
+      assert(ASM_CONSTS.hasOwnProperty(code), `No EM_ASM constant found at address ${code}.  The loaded WebAssembly file is likely out of sync with the generated JavaScript.`);
+      return ASM_CONSTS[code].apply(null, args);
     };
-  var _emscripten_asm_const_int_sync_on_main_thread = (emAsmAddr, sigPtr, argbuf) => runMainThreadEmAsm(emAsmAddr, sigPtr, argbuf, 1);
-
-  var _emscripten_asm_const_ptr_sync_on_main_thread = (emAsmAddr, sigPtr, argbuf) => runMainThreadEmAsm(emAsmAddr, sigPtr, argbuf, 1);
-
-  var _emscripten_cancel_main_loop = () => {
-      Browser.mainLoop.pause();
-      Browser.mainLoop.func = null;
+  var _emscripten_asm_const_int_sync_on_main_thread = (code, sigPtr, argbuf) => {
+      return runMainThreadEmAsm(code, sigPtr, argbuf, 1);
     };
 
   var _emscripten_date_now = () => Date.now();
 
+  var withStackSave = (f) => {
+      var stack = stackSave();
+      var ret = f();
+      stackRestore(stack);
+      return ret;
+    };
   var JSEvents = {
+  inEventHandler:0,
   removeAllEventListeners() {
-        while (JSEvents.eventHandlers.length) {
-          JSEvents._removeHandler(JSEvents.eventHandlers.length - 1);
+        for (var i = JSEvents.eventHandlers.length-1; i >= 0; --i) {
+          JSEvents._removeHandler(i);
         }
+        JSEvents.eventHandlers = [];
         JSEvents.deferredCalls = [];
       },
-  inEventHandler:0,
+  registerRemoveEventListeners() {
+        if (!JSEvents.removeEventListenersRegistered) {
+          __ATEXIT__.push(JSEvents.removeAllEventListeners);
+          JSEvents.removeEventListenersRegistered = true;
+        }
+      },
   deferredCalls:[],
   deferCall(targetFunction, precedence, argsList) {
         function arraysHaveEqualContent(arrA, arrB) {
@@ -6449,7 +6586,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           return true;
         }
         // Test if the given call was already queued, and if so, don't add it again.
-        for (var call of JSEvents.deferredCalls) {
+        for (var i in JSEvents.deferredCalls) {
+          var call = JSEvents.deferredCalls[i];
           if (call.targetFunction == targetFunction && arraysHaveEqualContent(call.argsList, argsList)) {
             return;
           }
@@ -6463,7 +6601,12 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         JSEvents.deferredCalls.sort((x,y) => x.precedence < y.precedence);
       },
   removeDeferredCalls(targetFunction) {
-        JSEvents.deferredCalls = JSEvents.deferredCalls.filter((call) => call.targetFunction != targetFunction);
+        for (var i = 0; i < JSEvents.deferredCalls.length; ++i) {
+          if (JSEvents.deferredCalls[i].targetFunction == targetFunction) {
+            JSEvents.deferredCalls.splice(i, 1);
+            --i;
+          }
+        }
       },
   canPerformEventHandlerRequests() {
         if (navigator.userActivation) {
@@ -6481,10 +6624,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         if (!JSEvents.canPerformEventHandlerRequests()) {
           return;
         }
-        var deferredCalls = JSEvents.deferredCalls;
-        JSEvents.deferredCalls = [];
-        for (var call of deferredCalls) {
-          call.targetFunction(...call.argsList);
+        for (var i = 0; i < JSEvents.deferredCalls.length; ++i) {
+          var call = JSEvents.deferredCalls[i];
+          JSEvents.deferredCalls.splice(i, 1);
+          --i;
+          call.targetFunction.apply(null, call.argsList);
         }
       },
   eventHandlers:[],
@@ -6507,25 +6651,25 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           console.dir(eventHandler);
           return -4;
         }
-        if (eventHandler.callbackfunc) {
-          eventHandler.eventListenerFunc = function(event) {
-            // Increment nesting count for the event handler.
-            ++JSEvents.inEventHandler;
-            JSEvents.currentEventHandler = eventHandler;
-            // Process any old deferred calls the user has placed.
-            JSEvents.runDeferredCalls();
-            // Process the actual event, calls back to user C code handler.
-            eventHandler.handlerFunc(event);
-            // Process any new deferred calls that were placed right now from this event handler.
-            JSEvents.runDeferredCalls();
-            // Out of event handler - restore nesting count.
-            --JSEvents.inEventHandler;
-          };
+        var jsEventHandler = function jsEventHandler(event) {
+          // Increment nesting count for the event handler.
+          ++JSEvents.inEventHandler;
+          JSEvents.currentEventHandler = eventHandler;
+          // Process any old deferred calls the user has placed.
+          JSEvents.runDeferredCalls();
+          // Process the actual event, calls back to user C code handler.
+          eventHandler.handlerFunc(event);
+          // Process any new deferred calls that were placed right now from this event handler.
+          JSEvents.runDeferredCalls();
+          // Out of event handler - restore nesting count.
+          --JSEvents.inEventHandler;
+        };
   
-          eventHandler.target.addEventListener(eventHandler.eventTypeString,
-                                               eventHandler.eventListenerFunc,
-                                               eventHandler.useCapture);
+        if (eventHandler.callbackfunc) {
+          eventHandler.eventListenerFunc = jsEventHandler;
+          eventHandler.target.addEventListener(eventHandler.eventTypeString, jsEventHandler, eventHandler.useCapture);
           JSEvents.eventHandlers.push(eventHandler);
+          JSEvents.registerRemoveEventListeners();
         } else {
           for (var i = 0; i < JSEvents.eventHandlers.length; ++i) {
             if (JSEvents.eventHandlers[i].target == eventHandler.target
@@ -6540,7 +6684,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         if (!target) return '';
         if (target == window) return '#window';
         if (target == screen) return '#screen';
-        return target?.nodeName || '';
+        return (target && target.nodeName) ? target.nodeName : '';
       },
   fullscreenEnabled() {
         return document.fullscreenEnabled
@@ -6565,15 +6709,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return cString > 2 ? UTF8ToString(cString) : cString;
     };
   
-  /** @type {Object} */
   var specialHTMLTargets = [0, typeof document != 'undefined' ? document : 0, typeof window != 'undefined' ? window : 0];
-  /** @suppress {duplicate } */
   var findEventTarget = (target) => {
       target = maybeCStringToJsString(target);
       var domElement = specialHTMLTargets[target] || (typeof document != 'undefined' ? document.querySelector(target) : undefined);
       return domElement;
     };
-  var findCanvasEventTarget = findEventTarget;
+  var findCanvasEventTarget = (target) => findEventTarget(target);
   var _emscripten_get_canvas_element_size = (target, width, height) => {
       var canvas = findCanvasEventTarget(target);
       if (!canvas) return -4;
@@ -6583,26 +6725,21 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   
-  
-  
-  var stackAlloc = (sz) => __emscripten_stack_alloc(sz);
   var stringToUTF8OnStack = (str) => {
       var size = lengthBytesUTF8(str) + 1;
       var ret = stackAlloc(size);
       stringToUTF8(str, ret, size);
       return ret;
     };
-  var getCanvasElementSize = (target) => {
-      var sp = stackSave();
+  var getCanvasElementSize = (target) => withStackSave(() => {
       var w = stackAlloc(8);
       var h = w + 4;
   
       var targetInt = stringToUTF8OnStack(target.id);
       var ret = _emscripten_get_canvas_element_size(targetInt, w, h);
       var size = [HEAP32[((w)>>2)], HEAP32[((h)>>2)]];
-      stackRestore(sp);
       return size;
-    };
+    });
   
   
   var _emscripten_set_canvas_element_size = (target, width, height) => {
@@ -6614,7 +6751,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
     };
   
   
-  
   var setCanvasElementSize = (target, width, height) => {
       if (!target.controlTransferredOffscreen) {
         target.width = width;
@@ -6622,10 +6758,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       } else {
         // This function is being called from high-level JavaScript code instead of asm.js/Wasm,
         // and it needs to synchronously proxy over to another thread, so marshal the string onto the heap to do the call.
-        var sp = stackSave();
-        var targetInt = stringToUTF8OnStack(target.id);
-        _emscripten_set_canvas_element_size(targetInt, width, height);
-        stackRestore(sp);
+        withStackSave(() => {
+          var targetInt = stringToUTF8OnStack(target.id);
+          _emscripten_set_canvas_element_size(targetInt, width, height);
+        });
       }
     };
   var registerRestoreOldStyle = (canvas) => {
@@ -6686,7 +6822,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           if (canvas.GLctxObject) canvas.GLctxObject.GLctx.viewport(0, 0, oldWidth, oldHeight);
   
           if (currentFullscreenStrategy.canvasResizedCallback) {
-            ((a1, a2, a3) => dynCall_iiii(currentFullscreenStrategy.canvasResizedCallback, a1, a2, a3))(37, 0, currentFullscreenStrategy.canvasResizedCallbackUserData);
+            ((a1, a2, a3) => dynCall_iiii.apply(null, [currentFullscreenStrategy.canvasResizedCallback, a1, a2, a3]))(37, 0, currentFullscreenStrategy.canvasResizedCallbackUserData);
           }
         }
       }
@@ -6699,9 +6835,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var setLetterbox = (element, topBottom, leftRight) => {
-      // Cannot use margin to specify letterboxes in FF or Chrome, since those ignore margins in fullscreen mode.
-      element.style.paddingLeft = element.style.paddingRight = leftRight + 'px';
-      element.style.paddingTop = element.style.paddingBottom = topBottom + 'px';
+        // Cannot use margin to specify letterboxes in FF or Chrome, since those ignore margins in fullscreen mode.
+        element.style.paddingLeft = element.style.paddingRight = leftRight + 'px';
+        element.style.paddingTop = element.style.paddingBottom = topBottom + 'px';
     };
   
   
@@ -6779,7 +6915,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       currentFullscreenStrategy = strategy;
   
       if (strategy.canvasResizedCallback) {
-        ((a1, a2, a3) => dynCall_iiii(strategy.canvasResizedCallback, a1, a2, a3))(37, 0, strategy.canvasResizedCallbackUserData);
+        ((a1, a2, a3) => dynCall_iiii.apply(null, [strategy.canvasResizedCallback, a1, a2, a3]))(37, 0, strategy.canvasResizedCallbackUserData);
       }
   
       return 0;
@@ -6829,19 +6965,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return 0;
     };
 
-  
-  var __emscripten_runtime_keepalive_clear = () => {
-      noExitRuntime = false;
-      runtimeKeepaliveCounter = 0;
-    };
-  
-  
-  var _emscripten_force_exit = (status) => {
-      warnOnce('emscripten_force_exit cannot actually shut down the runtime, as the build does not have EXIT_RUNTIME set');
-      __emscripten_runtime_keepalive_clear();
-      _exit(status);
-    };
-
   var _emscripten_get_device_pixel_ratio = () => {
       return (typeof devicePixelRatio == 'number' && devicePixelRatio) || 1.0;
     };
@@ -6874,22 +6997,23 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       }
       for (var i = 0; i < e.buttons.length; ++i) {
         if (typeof e.buttons[i] == 'object') {
-          HEAP8[(eventStruct+i)+(1040)] = e.buttons[i].pressed;
+          HEAP32[(((eventStruct+i*4)+(1040))>>2)] = e.buttons[i].pressed;
         } else {
           // Assigning a boolean to HEAP32, that's ok, but Closure would like to warn about it:
           /** @suppress {checkTypes} */
-          HEAP8[(eventStruct+i)+(1040)] = e.buttons[i] == 1;
+          HEAP32[(((eventStruct+i*4)+(1040))>>2)] = e.buttons[i] == 1;
         }
       }
-      HEAP8[(eventStruct)+(1104)] = e.connected;
-      HEAP32[(((eventStruct)+(1108))>>2)] = e.index;
+      HEAP32[(((eventStruct)+(1296))>>2)] = e.connected;
+      HEAP32[(((eventStruct)+(1300))>>2)] = e.index;
       HEAP32[(((eventStruct)+(8))>>2)] = e.axes.length;
       HEAP32[(((eventStruct)+(12))>>2)] = e.buttons.length;
-      stringToUTF8(e.id, eventStruct + 1112, 64);
-      stringToUTF8(e.mapping, eventStruct + 1176, 64);
+      stringToUTF8(e.id, eventStruct + 1304, 64);
+      stringToUTF8(e.mapping, eventStruct + 1368, 64);
     };
   var _emscripten_get_gamepad_status = (index, gamepadState) => {
       if (!JSEvents.lastGamepadState) throw 'emscripten_get_gamepad_status() can only be called after having first called emscripten_sample_gamepad_data() and that function has returned EMSCRIPTEN_RESULT_SUCCESS!';
+  
       // INVALID_PARAM is returned on a Gamepad index that never was there.
       if (index < 0 || index >= JSEvents.lastGamepadState.length) return -5;
   
@@ -6917,7 +7041,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
     };
 
   /** @suppress {duplicate } */
-  var _glActiveTexture = (x0) => GLctx.activeTexture(x0);
+  function _glActiveTexture(x0) { GLctx.activeTexture(x0) }
   var _emscripten_glActiveTexture = _glActiveTexture;
 
   /** @suppress {duplicate } */
@@ -6976,33 +7100,33 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glBindVertexArrayOES = _glBindVertexArrayOES;
 
   /** @suppress {duplicate } */
-  var _glBlendColor = (x0, x1, x2, x3) => GLctx.blendColor(x0, x1, x2, x3);
+  function _glBlendColor(x0, x1, x2, x3) { GLctx.blendColor(x0, x1, x2, x3) }
   var _emscripten_glBlendColor = _glBlendColor;
 
   /** @suppress {duplicate } */
-  var _glBlendEquation = (x0) => GLctx.blendEquation(x0);
+  function _glBlendEquation(x0) { GLctx.blendEquation(x0) }
   var _emscripten_glBlendEquation = _glBlendEquation;
 
   /** @suppress {duplicate } */
-  var _glBlendEquationSeparate = (x0, x1) => GLctx.blendEquationSeparate(x0, x1);
+  function _glBlendEquationSeparate(x0, x1) { GLctx.blendEquationSeparate(x0, x1) }
   var _emscripten_glBlendEquationSeparate = _glBlendEquationSeparate;
 
   /** @suppress {duplicate } */
-  var _glBlendFunc = (x0, x1) => GLctx.blendFunc(x0, x1);
+  function _glBlendFunc(x0, x1) { GLctx.blendFunc(x0, x1) }
   var _emscripten_glBlendFunc = _glBlendFunc;
 
   /** @suppress {duplicate } */
-  var _glBlendFuncSeparate = (x0, x1, x2, x3) => GLctx.blendFuncSeparate(x0, x1, x2, x3);
+  function _glBlendFuncSeparate(x0, x1, x2, x3) { GLctx.blendFuncSeparate(x0, x1, x2, x3) }
   var _emscripten_glBlendFuncSeparate = _glBlendFuncSeparate;
 
   /** @suppress {duplicate } */
   var _glBufferData = (target, size, data, usage) => {
   
-      // N.b. here first form specifies a heap subarray, second form an integer
-      // size, so the ?: code here is polymorphic. It is advised to avoid
-      // randomly mixing both uses in calling code, to avoid any potential JS
-      // engine JIT issues.
-      GLctx.bufferData(target, data ? HEAPU8.subarray(data, data+size) : size, usage);
+        // N.b. here first form specifies a heap subarray, second form an integer
+        // size, so the ?: code here is polymorphic. It is advised to avoid
+        // randomly mixing both uses in calling code, to avoid any potential JS
+        // engine JIT issues.
+        GLctx.bufferData(target, data ? HEAPU8.subarray(data, data+size) : size, usage);
     };
   var _emscripten_glBufferData = _glBufferData;
 
@@ -7013,31 +7137,25 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glBufferSubData = _glBufferSubData;
 
   /** @suppress {duplicate } */
-  var _glCheckFramebufferStatus = (x0) => GLctx.checkFramebufferStatus(x0);
+  function _glCheckFramebufferStatus(x0) { return GLctx.checkFramebufferStatus(x0) }
   var _emscripten_glCheckFramebufferStatus = _glCheckFramebufferStatus;
 
   /** @suppress {duplicate } */
-  var _glClear = (x0) => GLctx.clear(x0);
+  function _glClear(x0) { GLctx.clear(x0) }
   var _emscripten_glClear = _glClear;
 
   /** @suppress {duplicate } */
-  var _glClearColor = (x0, x1, x2, x3) => GLctx.clearColor(x0, x1, x2, x3);
+  function _glClearColor(x0, x1, x2, x3) { GLctx.clearColor(x0, x1, x2, x3) }
   var _emscripten_glClearColor = _glClearColor;
 
   /** @suppress {duplicate } */
-  var _glClearDepthf = (x0) => GLctx.clearDepth(x0);
+  function _glClearDepthf(x0) { GLctx.clearDepth(x0) }
   var _emscripten_glClearDepthf = _glClearDepthf;
 
   /** @suppress {duplicate } */
-  var _glClearStencil = (x0) => GLctx.clearStencil(x0);
+  function _glClearStencil(x0) { GLctx.clearStencil(x0) }
   var _emscripten_glClearStencil = _glClearStencil;
 
-  /** @suppress {duplicate } */
-  var _glClipControlEXT = (origin, depth) => {
-      GLctx.extClipControl['clipControlEXT'](origin, depth);
-    };
-  var _emscripten_glClipControlEXT = _glClipControlEXT;
-
   /** @suppress {duplicate } */
   var _glColorMask = (red, green, blue, alpha) => {
       GLctx.colorMask(!!red, !!green, !!blue, !!alpha);
@@ -7052,27 +7170,22 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
 
   /** @suppress {duplicate } */
   var _glCompressedTexImage2D = (target, level, internalFormat, width, height, border, imageSize, data) => {
-      // `data` may be null here, which means "allocate uniniitalized space but
-      // don't upload" in GLES parlance, but `compressedTexImage2D` requires the
-      // final data parameter, so we simply pass a heap view starting at zero
-      // effectively uploading whatever happens to be near address zero.  See
-      // https://github.com/emscripten-core/emscripten/issues/19300.
-      GLctx.compressedTexImage2D(target, level, internalFormat, width, height, border, HEAPU8.subarray((data), data+imageSize));
+      GLctx.compressedTexImage2D(target, level, internalFormat, width, height, border, data ? HEAPU8.subarray((data), (data+imageSize)) : null);
     };
   var _emscripten_glCompressedTexImage2D = _glCompressedTexImage2D;
 
   /** @suppress {duplicate } */
   var _glCompressedTexSubImage2D = (target, level, xoffset, yoffset, width, height, format, imageSize, data) => {
-      GLctx.compressedTexSubImage2D(target, level, xoffset, yoffset, width, height, format, HEAPU8.subarray((data), data+imageSize));
+      GLctx.compressedTexSubImage2D(target, level, xoffset, yoffset, width, height, format, data ? HEAPU8.subarray((data), (data+imageSize)) : null);
     };
   var _emscripten_glCompressedTexSubImage2D = _glCompressedTexSubImage2D;
 
   /** @suppress {duplicate } */
-  var _glCopyTexImage2D = (x0, x1, x2, x3, x4, x5, x6, x7) => GLctx.copyTexImage2D(x0, x1, x2, x3, x4, x5, x6, x7);
+  function _glCopyTexImage2D(x0, x1, x2, x3, x4, x5, x6, x7) { GLctx.copyTexImage2D(x0, x1, x2, x3, x4, x5, x6, x7) }
   var _emscripten_glCopyTexImage2D = _glCopyTexImage2D;
 
   /** @suppress {duplicate } */
-  var _glCopyTexSubImage2D = (x0, x1, x2, x3, x4, x5, x6, x7) => GLctx.copyTexSubImage2D(x0, x1, x2, x3, x4, x5, x6, x7);
+  function _glCopyTexSubImage2D(x0, x1, x2, x3, x4, x5, x6, x7) { GLctx.copyTexSubImage2D(x0, x1, x2, x3, x4, x5, x6, x7) }
   var _emscripten_glCopyTexSubImage2D = _glCopyTexSubImage2D;
 
   /** @suppress {duplicate } */
@@ -7100,7 +7213,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glCreateShader = _glCreateShader;
 
   /** @suppress {duplicate } */
-  var _glCullFace = (x0) => GLctx.cullFace(x0);
+  function _glCullFace(x0) { GLctx.cullFace(x0) }
   var _emscripten_glCullFace = _glCullFace;
 
   /** @suppress {duplicate } */
@@ -7219,7 +7332,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glDeleteVertexArraysOES = _glDeleteVertexArraysOES;
 
   /** @suppress {duplicate } */
-  var _glDepthFunc = (x0) => GLctx.depthFunc(x0);
+  function _glDepthFunc(x0) { GLctx.depthFunc(x0) }
   var _emscripten_glDepthFunc = _glDepthFunc;
 
   /** @suppress {duplicate } */
@@ -7229,7 +7342,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glDepthMask = _glDepthMask;
 
   /** @suppress {duplicate } */
-  var _glDepthRangef = (x0, x1) => GLctx.depthRange(x0, x1);
+  function _glDepthRangef(x0, x1) { GLctx.depthRange(x0, x1) }
   var _emscripten_glDepthRangef = _glDepthRangef;
 
   /** @suppress {duplicate } */
@@ -7239,7 +7352,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glDetachShader = _glDetachShader;
 
   /** @suppress {duplicate } */
-  var _glDisable = (x0) => GLctx.disable(x0);
+  function _glDisable(x0) { GLctx.disable(x0) }
   var _emscripten_glDisable = _glDisable;
 
   /** @suppress {duplicate } */
@@ -7300,7 +7413,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glDrawElementsInstancedANGLE = _glDrawElementsInstancedANGLE;
 
   /** @suppress {duplicate } */
-  var _glEnable = (x0) => GLctx.enable(x0);
+  function _glEnable(x0) { GLctx.enable(x0) }
   var _emscripten_glEnable = _glEnable;
 
   /** @suppress {duplicate } */
@@ -7316,11 +7429,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glEndQueryEXT = _glEndQueryEXT;
 
   /** @suppress {duplicate } */
-  var _glFinish = () => GLctx.finish();
+  function _glFinish() { GLctx.finish() }
   var _emscripten_glFinish = _glFinish;
 
   /** @suppress {duplicate } */
-  var _glFlush = () => GLctx.flush();
+  function _glFlush() { GLctx.flush() }
   var _emscripten_glFlush = _glFlush;
 
   /** @suppress {duplicate } */
@@ -7338,19 +7451,35 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glFramebufferTexture2D = _glFramebufferTexture2D;
 
   /** @suppress {duplicate } */
-  var _glFrontFace = (x0) => GLctx.frontFace(x0);
+  function _glFrontFace(x0) { GLctx.frontFace(x0) }
   var _emscripten_glFrontFace = _glFrontFace;
 
+  var __glGenObject = (n, buffers, createFunction, objectTable
+      ) => {
+      for (var i = 0; i < n; i++) {
+        var buffer = GLctx[createFunction]();
+        var id = buffer && GL.getNewId(objectTable);
+        if (buffer) {
+          buffer.name = id;
+          objectTable[id] = buffer;
+        } else {
+          GL.recordError(0x502 /* GL_INVALID_OPERATION */);
+        }
+        HEAP32[(((buffers)+(i*4))>>2)] = id;
+      }
+    };
+  
   /** @suppress {duplicate } */
   var _glGenBuffers = (n, buffers) => {
-      GL.genObject(n, buffers, 'createBuffer', GL.buffers
+      __glGenObject(n, buffers, 'createBuffer', GL.buffers
         );
     };
   var _emscripten_glGenBuffers = _glGenBuffers;
 
+  
   /** @suppress {duplicate } */
   var _glGenFramebuffers = (n, ids) => {
-      GL.genObject(n, ids, 'createFramebuffer', GL.framebuffers
+      __glGenObject(n, ids, 'createFramebuffer', GL.framebuffers
         );
     };
   var _emscripten_glGenFramebuffers = _glGenFramebuffers;
@@ -7372,32 +7501,35 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
     };
   var _emscripten_glGenQueriesEXT = _glGenQueriesEXT;
 
+  
   /** @suppress {duplicate } */
   var _glGenRenderbuffers = (n, renderbuffers) => {
-      GL.genObject(n, renderbuffers, 'createRenderbuffer', GL.renderbuffers
+      __glGenObject(n, renderbuffers, 'createRenderbuffer', GL.renderbuffers
         );
     };
   var _emscripten_glGenRenderbuffers = _glGenRenderbuffers;
 
+  
   /** @suppress {duplicate } */
   var _glGenTextures = (n, textures) => {
-      GL.genObject(n, textures, 'createTexture', GL.textures
+      __glGenObject(n, textures, 'createTexture', GL.textures
         );
     };
   var _emscripten_glGenTextures = _glGenTextures;
 
   
+  
   /** @suppress {duplicate } */
-  var _glGenVertexArrays = (n, arrays) => {
-      GL.genObject(n, arrays, 'createVertexArray', GL.vaos
+  function _glGenVertexArrays(n, arrays) {
+      __glGenObject(n, arrays, 'createVertexArray', GL.vaos
         );
-    };
+    }
   /** @suppress {duplicate } */
   var _glGenVertexArraysOES = _glGenVertexArrays;
   var _emscripten_glGenVertexArraysOES = _glGenVertexArraysOES;
 
   /** @suppress {duplicate } */
-  var _glGenerateMipmap = (x0) => GLctx.generateMipmap(x0);
+  function _glGenerateMipmap(x0) { GLctx.generateMipmap(x0) }
   var _emscripten_glGenerateMipmap = _glGenerateMipmap;
 
   
@@ -7542,7 +7674,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
                 switch (type) {
                   case 0: HEAP32[(((p)+(i*4))>>2)] = result[i]; break;
                   case 2: HEAPF32[(((p)+(i*4))>>2)] = result[i]; break;
-                  case 4: HEAP8[(p)+(i)] = result[i] ? 1 : 0; break;
+                  case 4: HEAP8[(((p)+(i))>>0)] = result[i] ? 1 : 0; break;
                 }
               }
               return;
@@ -7567,7 +7699,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         case 1: writeI53ToI64(p, ret); break;
         case 0: HEAP32[((p)>>2)] = ret; break;
         case 2:   HEAPF32[((p)>>2)] = ret; break;
-        case 4: HEAP8[p] = ret ? 1 : 0; break;
+        case 4: HEAP8[((p)>>0)] = ret ? 1 : 0; break;
       }
     };
   
@@ -7649,24 +7781,21 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         HEAP32[((p)>>2)] = log.length + 1;
       } else if (pname == 0x8B87 /* GL_ACTIVE_UNIFORM_MAX_LENGTH */) {
         if (!program.maxUniformLength) {
-          var numActiveUniforms = GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/);
-          for (var i = 0; i < numActiveUniforms; ++i) {
+          for (var i = 0; i < GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/); ++i) {
             program.maxUniformLength = Math.max(program.maxUniformLength, GLctx.getActiveUniform(program, i).name.length+1);
           }
         }
         HEAP32[((p)>>2)] = program.maxUniformLength;
       } else if (pname == 0x8B8A /* GL_ACTIVE_ATTRIBUTE_MAX_LENGTH */) {
         if (!program.maxAttributeLength) {
-          var numActiveAttributes = GLctx.getProgramParameter(program, 0x8B89/*GL_ACTIVE_ATTRIBUTES*/);
-          for (var i = 0; i < numActiveAttributes; ++i) {
+          for (var i = 0; i < GLctx.getProgramParameter(program, 0x8B89/*GL_ACTIVE_ATTRIBUTES*/); ++i) {
             program.maxAttributeLength = Math.max(program.maxAttributeLength, GLctx.getActiveAttrib(program, i).name.length+1);
           }
         }
         HEAP32[((p)>>2)] = program.maxAttributeLength;
       } else if (pname == 0x8A35 /* GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH */) {
         if (!program.maxUniformBlockNameLength) {
-          var numActiveUniformBlocks = GLctx.getProgramParameter(program, 0x8A36/*GL_ACTIVE_UNIFORM_BLOCKS*/);
-          for (var i = 0; i < numActiveUniformBlocks; ++i) {
+          for (var i = 0; i < GLctx.getProgramParameter(program, 0x8A36/*GL_ACTIVE_UNIFORM_BLOCKS*/); ++i) {
             program.maxUniformBlockNameLength = Math.max(program.maxUniformBlockNameLength, GLctx.getActiveUniformBlockName(program, i).length+1);
           }
         }
@@ -7814,20 +7943,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glGetShaderiv = _glGetShaderiv;
 
   
-  
-  var webglGetExtensions = function $webglGetExtensions() {
-      var exts = getEmscriptenSupportedExtensions(GLctx);
-      exts = exts.concat(exts.map((e) => "GL_" + e));
-      return exts;
-    };
-  
   /** @suppress {duplicate } */
   var _glGetString = (name_) => {
       var ret = GL.stringCache[name_];
       if (!ret) {
         switch (name_) {
           case 0x1F03 /* GL_EXTENSIONS */:
-            ret = stringToNewUTF8(webglGetExtensions().join(' '));
+            ret = stringToNewUTF8(GL.getExtensions().join(' '));
             break;
           case 0x1F00 /* GL_VENDOR */:
           case 0x1F01 /* GL_RENDERER */:
@@ -7841,9 +7963,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
             break;
   
           case 0x1F02 /* GL_VERSION */:
-            var webGLVersion = GLctx.getParameter(0x1F02 /*GL_VERSION*/);
+            var glVersion = GLctx.getParameter(0x1F02 /*GL_VERSION*/);
             // return GLES version string corresponding to the version of the WebGL context
-            var glVersion = `OpenGL ES 2.0 (${webGLVersion})`;
+            {
+              glVersion = `OpenGL ES 2.0 (${glVersion})`;
+            }
             ret = stringToNewUTF8(glVersion);
             break;
           case 0x8B8C /* GL_SHADING_LANGUAGE_VERSION */:
@@ -7912,8 +8036,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // maps integer locations back to uniform name strings, so that we can lazily fetch uniform array locations
         program.uniformArrayNamesById = {};
   
-        var numActiveUniforms = GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/);
-        for (i = 0; i < numActiveUniforms; ++i) {
+        for (i = 0; i < GLctx.getProgramParameter(program, 0x8B86/*GL_ACTIVE_UNIFORMS*/); ++i) {
           var u = GLctx.getActiveUniform(program, i);
           var nm = u.name;
           var sz = u.size;
@@ -8115,7 +8238,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glGetVertexAttribiv = _glGetVertexAttribiv;
 
   /** @suppress {duplicate } */
-  var _glHint = (x0, x1) => GLctx.hint(x0, x1);
+  function _glHint(x0, x1) { GLctx.hint(x0, x1) }
   var _emscripten_glHint = _glHint;
 
   /** @suppress {duplicate } */
@@ -8127,7 +8250,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glIsBuffer = _glIsBuffer;
 
   /** @suppress {duplicate } */
-  var _glIsEnabled = (x0) => GLctx.isEnabled(x0);
+  function _glIsEnabled(x0) { return GLctx.isEnabled(x0) }
   var _emscripten_glIsEnabled = _glIsEnabled;
 
   /** @suppress {duplicate } */
@@ -8191,7 +8314,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glIsVertexArrayOES = _glIsVertexArrayOES;
 
   /** @suppress {duplicate } */
-  var _glLineWidth = (x0) => GLctx.lineWidth(x0);
+  function _glLineWidth(x0) { GLctx.lineWidth(x0) }
   var _emscripten_glLineWidth = _glLineWidth;
 
   /** @suppress {duplicate } */
@@ -8207,43 +8330,29 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
 
   /** @suppress {duplicate } */
   var _glPixelStorei = (pname, param) => {
-      if (pname == 3317) {
+      if (pname == 0xCF5 /* GL_UNPACK_ALIGNMENT */) {
         GL.unpackAlignment = param;
-      } else if (pname == 3314) {
-        GL.unpackRowLength = param;
       }
       GLctx.pixelStorei(pname, param);
     };
   var _emscripten_glPixelStorei = _glPixelStorei;
 
   /** @suppress {duplicate } */
-  var _glPolygonModeWEBGL = (face, mode) => {
-      GLctx.webglPolygonMode['polygonModeWEBGL'](face, mode);
-    };
-  var _emscripten_glPolygonModeWEBGL = _glPolygonModeWEBGL;
-
-  /** @suppress {duplicate } */
-  var _glPolygonOffset = (x0, x1) => GLctx.polygonOffset(x0, x1);
+  function _glPolygonOffset(x0, x1) { GLctx.polygonOffset(x0, x1) }
   var _emscripten_glPolygonOffset = _glPolygonOffset;
 
-  /** @suppress {duplicate } */
-  var _glPolygonOffsetClampEXT = (factor, units, clamp) => {
-      GLctx.extPolygonOffsetClamp['polygonOffsetClampEXT'](factor, units, clamp);
-    };
-  var _emscripten_glPolygonOffsetClampEXT = _glPolygonOffsetClampEXT;
-
   /** @suppress {duplicate } */
   var _glQueryCounterEXT = (id, target) => {
       GLctx.disjointTimerQueryExt['queryCounterEXT'](GL.queries[id], target);
     };
   var _emscripten_glQueryCounterEXT = _glQueryCounterEXT;
 
-  var computeUnpackAlignedImageSize = (width, height, sizePerPixel) => {
+  var computeUnpackAlignedImageSize = (width, height, sizePerPixel, alignment) => {
       function roundedToNextMultipleOf(x, y) {
         return (x + y - 1) & -y;
       }
-      var plainRowSize = (GL.unpackRowLength || width) * sizePerPixel;
-      var alignedRowSize = roundedToNextMultipleOf(plainRowSize, GL.unpackAlignment);
+      var plainRowSize = width * sizePerPixel;
+      var alignedRowSize = roundedToNextMultipleOf(plainRowSize, alignment);
       return height * alignedRowSize;
     };
   
@@ -8286,14 +8395,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return HEAPU16;
     };
   
-  var toTypedArrayIndex = (pointer, heap) =>
-      pointer >>> (31 - Math.clz32(heap.BYTES_PER_ELEMENT));
+  var heapAccessShiftForWebGLHeap = (heap) => 31 - Math.clz32(heap.BYTES_PER_ELEMENT);
   
   var emscriptenWebGLGetTexPixelData = (type, format, width, height, pixels, internalFormat) => {
       var heap = heapObjectForWebGLType(type);
-      var sizePerPixel = colorChannelsInGlTextureFormat(format) * heap.BYTES_PER_ELEMENT;
-      var bytes = computeUnpackAlignedImageSize(width, height, sizePerPixel);
-      return heap.subarray(toTypedArrayIndex(pixels, heap), toTypedArrayIndex(pixels + bytes, heap));
+      var shift = heapAccessShiftForWebGLHeap(heap);
+      var byteSize = 1<<shift;
+      var sizePerPixel = colorChannelsInGlTextureFormat(format) * byteSize;
+      var bytes = computeUnpackAlignedImageSize(width, height, sizePerPixel, GL.unpackAlignment);
+      return heap.subarray(pixels >> shift, pixels + bytes >> shift);
     };
   
   /** @suppress {duplicate } */
@@ -8314,7 +8424,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glReleaseShaderCompiler = _glReleaseShaderCompiler;
 
   /** @suppress {duplicate } */
-  var _glRenderbufferStorage = (x0, x1, x2, x3) => GLctx.renderbufferStorage(x0, x1, x2, x3);
+  function _glRenderbufferStorage(x0, x1, x2, x3) { GLctx.renderbufferStorage(x0, x1, x2, x3) }
   var _emscripten_glRenderbufferStorage = _glRenderbufferStorage;
 
   /** @suppress {duplicate } */
@@ -8324,7 +8434,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glSampleCoverage = _glSampleCoverage;
 
   /** @suppress {duplicate } */
-  var _glScissor = (x0, x1, x2, x3) => GLctx.scissor(x0, x1, x2, x3);
+  function _glScissor(x0, x1, x2, x3) { GLctx.scissor(x0, x1, x2, x3) }
   var _emscripten_glScissor = _glScissor;
 
   /** @suppress {duplicate } */
@@ -8342,39 +8452,38 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glShaderSource = _glShaderSource;
 
   /** @suppress {duplicate } */
-  var _glStencilFunc = (x0, x1, x2) => GLctx.stencilFunc(x0, x1, x2);
+  function _glStencilFunc(x0, x1, x2) { GLctx.stencilFunc(x0, x1, x2) }
   var _emscripten_glStencilFunc = _glStencilFunc;
 
   /** @suppress {duplicate } */
-  var _glStencilFuncSeparate = (x0, x1, x2, x3) => GLctx.stencilFuncSeparate(x0, x1, x2, x3);
+  function _glStencilFuncSeparate(x0, x1, x2, x3) { GLctx.stencilFuncSeparate(x0, x1, x2, x3) }
   var _emscripten_glStencilFuncSeparate = _glStencilFuncSeparate;
 
   /** @suppress {duplicate } */
-  var _glStencilMask = (x0) => GLctx.stencilMask(x0);
+  function _glStencilMask(x0) { GLctx.stencilMask(x0) }
   var _emscripten_glStencilMask = _glStencilMask;
 
   /** @suppress {duplicate } */
-  var _glStencilMaskSeparate = (x0, x1) => GLctx.stencilMaskSeparate(x0, x1);
+  function _glStencilMaskSeparate(x0, x1) { GLctx.stencilMaskSeparate(x0, x1) }
   var _emscripten_glStencilMaskSeparate = _glStencilMaskSeparate;
 
   /** @suppress {duplicate } */
-  var _glStencilOp = (x0, x1, x2) => GLctx.stencilOp(x0, x1, x2);
+  function _glStencilOp(x0, x1, x2) { GLctx.stencilOp(x0, x1, x2) }
   var _emscripten_glStencilOp = _glStencilOp;
 
   /** @suppress {duplicate } */
-  var _glStencilOpSeparate = (x0, x1, x2, x3) => GLctx.stencilOpSeparate(x0, x1, x2, x3);
+  function _glStencilOpSeparate(x0, x1, x2, x3) { GLctx.stencilOpSeparate(x0, x1, x2, x3) }
   var _emscripten_glStencilOpSeparate = _glStencilOpSeparate;
 
   
   /** @suppress {duplicate } */
   var _glTexImage2D = (target, level, internalFormat, width, height, border, format, type, pixels) => {
-      var pixelData = pixels ? emscriptenWebGLGetTexPixelData(type, format, width, height, pixels, internalFormat) : null;
-      GLctx.texImage2D(target, level, internalFormat, width, height, border, format, type, pixelData);
+      GLctx.texImage2D(target, level, internalFormat, width, height, border, format, type, pixels ? emscriptenWebGLGetTexPixelData(type, format, width, height, pixels, internalFormat) : null);
     };
   var _emscripten_glTexImage2D = _glTexImage2D;
 
   /** @suppress {duplicate } */
-  var _glTexParameterf = (x0, x1, x2) => GLctx.texParameterf(x0, x1, x2);
+  function _glTexParameterf(x0, x1, x2) { GLctx.texParameterf(x0, x1, x2) }
   var _emscripten_glTexParameterf = _glTexParameterf;
 
   /** @suppress {duplicate } */
@@ -8385,7 +8494,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glTexParameterfv = _glTexParameterfv;
 
   /** @suppress {duplicate } */
-  var _glTexParameteri = (x0, x1, x2) => GLctx.texParameteri(x0, x1, x2);
+  function _glTexParameteri(x0, x1, x2) { GLctx.texParameteri(x0, x1, x2) }
   var _emscripten_glTexParameteri = _glTexParameteri;
 
   /** @suppress {duplicate } */
@@ -8398,7 +8507,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   /** @suppress {duplicate } */
   var _glTexSubImage2D = (target, level, xoffset, yoffset, width, height, format, type, pixels) => {
-      var pixelData = pixels ? emscriptenWebGLGetTexPixelData(type, format, width, height, pixels, 0) : null;
+      var pixelData = null;
+      if (pixels) pixelData = emscriptenWebGLGetTexPixelData(type, format, width, height, pixels, 0);
       GLctx.texSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixelData);
     };
   var _emscripten_glTexSubImage2D = _glTexSubImage2D;
@@ -8418,13 +8528,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 288) {
         // avoid allocation when uploading few enough uniforms
-        var view = miniTempWebGLFloatBuffers[count];
+        var view = miniTempWebGLFloatBuffers[count-1];
         for (var i = 0; i < count; ++i) {
           view[i] = HEAPF32[(((value)+(4*i))>>2)];
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*4)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*4)>>2);
       }
       GLctx.uniform1fv(webglGetUniformLocation(location), view);
     };
@@ -8445,13 +8555,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 288) {
         // avoid allocation when uploading few enough uniforms
-        var view = miniTempWebGLIntBuffers[count];
+        var view = miniTempWebGLIntBuffers[count-1];
         for (var i = 0; i < count; ++i) {
           view[i] = HEAP32[(((value)+(4*i))>>2)];
         }
       } else
       {
-        var view = HEAP32.subarray((((value)>>2)), ((value+count*4)>>2));
+        var view = HEAP32.subarray((value)>>2, (value+count*4)>>2);
       }
       GLctx.uniform1iv(webglGetUniformLocation(location), view);
     };
@@ -8471,15 +8581,14 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 144) {
         // avoid allocation when uploading few enough uniforms
-        count *= 2;
-        var view = miniTempWebGLFloatBuffers[count];
-        for (var i = 0; i < count; i += 2) {
+        var view = miniTempWebGLFloatBuffers[2*count-1];
+        for (var i = 0; i < 2*count; i += 2) {
           view[i] = HEAPF32[(((value)+(4*i))>>2)];
           view[i+1] = HEAPF32[(((value)+(4*i+4))>>2)];
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*8)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*8)>>2);
       }
       GLctx.uniform2fv(webglGetUniformLocation(location), view);
     };
@@ -8499,15 +8608,14 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 144) {
         // avoid allocation when uploading few enough uniforms
-        count *= 2;
-        var view = miniTempWebGLIntBuffers[count];
-        for (var i = 0; i < count; i += 2) {
+        var view = miniTempWebGLIntBuffers[2*count-1];
+        for (var i = 0; i < 2*count; i += 2) {
           view[i] = HEAP32[(((value)+(4*i))>>2)];
           view[i+1] = HEAP32[(((value)+(4*i+4))>>2)];
         }
       } else
       {
-        var view = HEAP32.subarray((((value)>>2)), ((value+count*8)>>2));
+        var view = HEAP32.subarray((value)>>2, (value+count*8)>>2);
       }
       GLctx.uniform2iv(webglGetUniformLocation(location), view);
     };
@@ -8527,16 +8635,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 96) {
         // avoid allocation when uploading few enough uniforms
-        count *= 3;
-        var view = miniTempWebGLFloatBuffers[count];
-        for (var i = 0; i < count; i += 3) {
+        var view = miniTempWebGLFloatBuffers[3*count-1];
+        for (var i = 0; i < 3*count; i += 3) {
           view[i] = HEAPF32[(((value)+(4*i))>>2)];
           view[i+1] = HEAPF32[(((value)+(4*i+4))>>2)];
           view[i+2] = HEAPF32[(((value)+(4*i+8))>>2)];
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*12)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*12)>>2);
       }
       GLctx.uniform3fv(webglGetUniformLocation(location), view);
     };
@@ -8556,16 +8663,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 96) {
         // avoid allocation when uploading few enough uniforms
-        count *= 3;
-        var view = miniTempWebGLIntBuffers[count];
-        for (var i = 0; i < count; i += 3) {
+        var view = miniTempWebGLIntBuffers[3*count-1];
+        for (var i = 0; i < 3*count; i += 3) {
           view[i] = HEAP32[(((value)+(4*i))>>2)];
           view[i+1] = HEAP32[(((value)+(4*i+4))>>2)];
           view[i+2] = HEAP32[(((value)+(4*i+8))>>2)];
         }
       } else
       {
-        var view = HEAP32.subarray((((value)>>2)), ((value+count*12)>>2));
+        var view = HEAP32.subarray((value)>>2, (value+count*12)>>2);
       }
       GLctx.uniform3iv(webglGetUniformLocation(location), view);
     };
@@ -8585,12 +8691,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 72) {
         // avoid allocation when uploading few enough uniforms
-        var view = miniTempWebGLFloatBuffers[4*count];
+        var view = miniTempWebGLFloatBuffers[4*count-1];
         // hoist the heap out of the loop for size and for pthreads+growth.
         var heap = HEAPF32;
-        value = ((value)>>2);
-        count *= 4;
-        for (var i = 0; i < count; i += 4) {
+        value >>= 2;
+        for (var i = 0; i < 4 * count; i += 4) {
           var dst = value + i;
           view[i] = heap[dst];
           view[i + 1] = heap[dst + 1];
@@ -8599,7 +8704,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*16)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*16)>>2);
       }
       GLctx.uniform4fv(webglGetUniformLocation(location), view);
     };
@@ -8619,9 +8724,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 72) {
         // avoid allocation when uploading few enough uniforms
-        count *= 4;
-        var view = miniTempWebGLIntBuffers[count];
-        for (var i = 0; i < count; i += 4) {
+        var view = miniTempWebGLIntBuffers[4*count-1];
+        for (var i = 0; i < 4*count; i += 4) {
           view[i] = HEAP32[(((value)+(4*i))>>2)];
           view[i+1] = HEAP32[(((value)+(4*i+4))>>2)];
           view[i+2] = HEAP32[(((value)+(4*i+8))>>2)];
@@ -8629,7 +8733,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
       } else
       {
-        var view = HEAP32.subarray((((value)>>2)), ((value+count*16)>>2));
+        var view = HEAP32.subarray((value)>>2, (value+count*16)>>2);
       }
       GLctx.uniform4iv(webglGetUniformLocation(location), view);
     };
@@ -8642,9 +8746,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 72) {
         // avoid allocation when uploading few enough uniforms
-        count *= 4;
-        var view = miniTempWebGLFloatBuffers[count];
-        for (var i = 0; i < count; i += 4) {
+        var view = miniTempWebGLFloatBuffers[4*count-1];
+        for (var i = 0; i < 4*count; i += 4) {
           view[i] = HEAPF32[(((value)+(4*i))>>2)];
           view[i+1] = HEAPF32[(((value)+(4*i+4))>>2)];
           view[i+2] = HEAPF32[(((value)+(4*i+8))>>2)];
@@ -8652,7 +8755,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*16)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*16)>>2);
       }
       GLctx.uniformMatrix2fv(webglGetUniformLocation(location), !!transpose, view);
     };
@@ -8665,9 +8768,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 32) {
         // avoid allocation when uploading few enough uniforms
-        count *= 9;
-        var view = miniTempWebGLFloatBuffers[count];
-        for (var i = 0; i < count; i += 9) {
+        var view = miniTempWebGLFloatBuffers[9*count-1];
+        for (var i = 0; i < 9*count; i += 9) {
           view[i] = HEAPF32[(((value)+(4*i))>>2)];
           view[i+1] = HEAPF32[(((value)+(4*i+4))>>2)];
           view[i+2] = HEAPF32[(((value)+(4*i+8))>>2)];
@@ -8680,7 +8782,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*36)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*36)>>2);
       }
       GLctx.uniformMatrix3fv(webglGetUniformLocation(location), !!transpose, view);
     };
@@ -8693,12 +8795,11 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       if (count <= 18) {
         // avoid allocation when uploading few enough uniforms
-        var view = miniTempWebGLFloatBuffers[16*count];
+        var view = miniTempWebGLFloatBuffers[16*count-1];
         // hoist the heap out of the loop for size and for pthreads+growth.
         var heap = HEAPF32;
-        value = ((value)>>2);
-        count *= 16;
-        for (var i = 0; i < count; i += 16) {
+        value >>= 2;
+        for (var i = 0; i < 16 * count; i += 16) {
           var dst = value + i;
           view[i] = heap[dst];
           view[i + 1] = heap[dst + 1];
@@ -8719,7 +8820,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
       } else
       {
-        var view = HEAPF32.subarray((((value)>>2)), ((value+count*64)>>2));
+        var view = HEAPF32.subarray((value)>>2, (value+count*64)>>2);
       }
       GLctx.uniformMatrix4fv(webglGetUniformLocation(location), !!transpose, view);
     };
@@ -8742,7 +8843,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glValidateProgram = _glValidateProgram;
 
   /** @suppress {duplicate } */
-  var _glVertexAttrib1f = (x0, x1) => GLctx.vertexAttrib1f(x0, x1);
+  function _glVertexAttrib1f(x0, x1) { GLctx.vertexAttrib1f(x0, x1) }
   var _emscripten_glVertexAttrib1f = _glVertexAttrib1f;
 
   /** @suppress {duplicate } */
@@ -8753,7 +8854,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glVertexAttrib1fv = _glVertexAttrib1fv;
 
   /** @suppress {duplicate } */
-  var _glVertexAttrib2f = (x0, x1, x2) => GLctx.vertexAttrib2f(x0, x1, x2);
+  function _glVertexAttrib2f(x0, x1, x2) { GLctx.vertexAttrib2f(x0, x1, x2) }
   var _emscripten_glVertexAttrib2f = _glVertexAttrib2f;
 
   /** @suppress {duplicate } */
@@ -8764,7 +8865,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glVertexAttrib2fv = _glVertexAttrib2fv;
 
   /** @suppress {duplicate } */
-  var _glVertexAttrib3f = (x0, x1, x2, x3) => GLctx.vertexAttrib3f(x0, x1, x2, x3);
+  function _glVertexAttrib3f(x0, x1, x2, x3) { GLctx.vertexAttrib3f(x0, x1, x2, x3) }
   var _emscripten_glVertexAttrib3f = _glVertexAttrib3f;
 
   /** @suppress {duplicate } */
@@ -8775,7 +8876,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glVertexAttrib3fv = _glVertexAttrib3fv;
 
   /** @suppress {duplicate } */
-  var _glVertexAttrib4f = (x0, x1, x2, x3, x4) => GLctx.vertexAttrib4f(x0, x1, x2, x3, x4);
+  function _glVertexAttrib4f(x0, x1, x2, x3, x4) { GLctx.vertexAttrib4f(x0, x1, x2, x3, x4) }
   var _emscripten_glVertexAttrib4f = _glVertexAttrib4f;
 
   /** @suppress {duplicate } */
@@ -8801,11 +8902,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var _emscripten_glVertexAttribPointer = _glVertexAttribPointer;
 
   /** @suppress {duplicate } */
-  var _glViewport = (x0, x1, x2, x3) => GLctx.viewport(x0, x1, x2, x3);
+  function _glViewport(x0, x1, x2, x3) { GLctx.viewport(x0, x1, x2, x3) }
   var _emscripten_glViewport = _glViewport;
 
   var _emscripten_has_asyncify = () => 1;
 
+  var _emscripten_memcpy_js = (dest, src, num) => HEAPU8.copyWithin(dest, src, src + num);
+
   
   
   
@@ -8824,9 +8927,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         return -3;
       }
   
-      // Queue this function call if we're not currently in an event handler and
-      // the user saw it appropriate to do so.
-      if (!JSEvents.canPerformEventHandlerRequests()) {
+      var canPerformRequests = JSEvents.canPerformEventHandlerRequests();
+  
+      // Queue this function call if we're not currently in an event handler and the user saw it appropriate to do so.
+      if (!canPerformRequests) {
         if (strategy.deferUntilInEventHandler) {
           JSEvents.deferCall(JSEvents_requestFullscreen, 1 /* priority over pointer lock */, [target, strategy]);
           return 1;
@@ -8861,9 +8965,10 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         return -1;
       }
   
-      // Queue this function call if we're not currently in an event handler and
-      // the user saw it appropriate to do so.
-      if (!JSEvents.canPerformEventHandlerRequests()) {
+      var canPerformRequests = JSEvents.canPerformEventHandlerRequests();
+  
+      // Queue this function call if we're not currently in an event handler and the user saw it appropriate to do so.
+      if (!canPerformRequests) {
         if (deferUntilInEventHandler) {
           JSEvents.deferCall(requestPointerLock, 2 /* priority below fullscreen */, [target]);
           return 1;
@@ -8881,7 +8986,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       // casing all heap size related code to treat 0 specially.
       2147483648;
   
-  
   var growMemory = (size) => {
       var b = wasmMemory.buffer;
       var pages = (size - b.byteLength + 65535) / 65536;
@@ -8929,6 +9033,8 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         return false;
       }
   
+      var alignUp = (x, multiple) => x + (multiple - x % multiple) % multiple;
+  
       // Loop through potential heap size increases. If we attempt a too eager
       // reservation that fails, cut down on the attempted size and reserve a
       // smaller bump instead. (max 3 times, chosen somewhat arbitrarily)
@@ -8937,7 +9043,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // but limit overreserving (default to capping at +96MB overgrowth at most)
         overGrownHeapSize = Math.min(overGrownHeapSize, requestedSize + 100663296 );
   
-        var newSize = Math.min(maxHeapSize, alignMemory(Math.max(requestedSize, overGrownHeapSize), 65536));
+        var newSize = Math.min(maxHeapSize, alignUp(Math.max(requestedSize, overGrownHeapSize), 65536));
   
         var replacement = growMemory(newSize);
         if (replacement) {
@@ -8949,16 +9055,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return false;
     };
 
-  /** @suppress {checkTypes} */
   var _emscripten_sample_gamepad_data = () => {
-      try {
-        if (navigator.getGamepads) return (JSEvents.lastGamepadState = navigator.getGamepads())
-          ? 0 : -1;
-      } catch(e) {
-        err(`navigator.getGamepads() exists, but failed to execute with exception ${e}. Disabling Gamepad access.`);
-        navigator.getGamepads = null; // Disable getGamepads() so that it won't be attempted to be used again.
-      }
-      return -1;
+      return (JSEvents.lastGamepadState = (navigator.getGamepads ? navigator.getGamepads() : (navigator.webkitGetGamepads ? navigator.webkitGetGamepads() : null)))
+        ? 0 : -1;
     };
 
   var _emscripten_scan_registers = (func) => {
@@ -8970,7 +9069,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         safeSetTimeout(() => {
           var stackBegin = Asyncify.currData + 12;
           var stackEnd = HEAPU32[((Asyncify.currData)>>2)];
-          ((a1, a2) => dynCall_vii(func, a1, a2))(stackBegin, stackEnd);
+          ((a1, a2) => dynCall_vii.apply(null, [func, a1, a2]))(stackBegin, stackEnd);
           wakeUp();
         }, 0);
       });
@@ -8982,7 +9081,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var registerBeforeUnloadEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString) => {
       var beforeUnloadEventHandlerFunc = (e = event) => {
         // Note: This is always called on the main browser thread, since it needs synchronously return a value!
-        var confirmationMessage = ((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, 0, userData);
+        var confirmationMessage = ((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, 0, userData);
   
         if (confirmationMessage) {
           confirmationMessage = UTF8ToString(confirmationMessage);
@@ -9025,7 +9124,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         stringToUTF8(nodeName, focusEvent + 0, 128);
         stringToUTF8(id, focusEvent + 128, 128);
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, focusEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, focusEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9037,8 +9136,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       };
       return JSEvents.registerOrRemoveHandler(eventHandler);
     };
-  var _emscripten_set_blur_callback_on_thread = (target, userData, useCapture, callbackfunc, targetThread) =>
-      registerFocusEventCallback(target, userData, useCapture, callbackfunc, 12, "blur", targetThread);
+  var _emscripten_set_blur_callback_on_thread = (target, userData, useCapture, callbackfunc, targetThread) => {
+      return registerFocusEventCallback(target, userData, useCapture, callbackfunc, 12, "blur", targetThread);
+    };
 
 
   
@@ -9052,8 +9152,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return 0;
     };
 
-  var _emscripten_set_focus_callback_on_thread = (target, userData, useCapture, callbackfunc, targetThread) =>
-      registerFocusEventCallback(target, userData, useCapture, callbackfunc, 13, "focus", targetThread);
+  var _emscripten_set_focus_callback_on_thread = (target, userData, useCapture, callbackfunc, targetThread) => {
+      return registerFocusEventCallback(target, userData, useCapture, callbackfunc, 13, "focus", targetThread);
+    };
 
   
   
@@ -9063,19 +9164,19 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       var isFullscreen = !!fullscreenElement;
       // Assigning a boolean to HEAP32 with expected type coercion.
       /** @suppress{checkTypes} */
-      HEAP8[eventStruct] = isFullscreen;
-      HEAP8[(eventStruct)+(1)] = JSEvents.fullscreenEnabled();
+      HEAP32[((eventStruct)>>2)] = isFullscreen;
+      HEAP32[(((eventStruct)+(4))>>2)] = JSEvents.fullscreenEnabled();
       // If transitioning to fullscreen, report info about the element that is now fullscreen.
       // If transitioning to windowed mode, report info about the element that just was fullscreen.
       var reportedElement = isFullscreen ? fullscreenElement : JSEvents.previousFullscreenElement;
       var nodeName = JSEvents.getNodeNameForTarget(reportedElement);
-      var id = reportedElement?.id || '';
-      stringToUTF8(nodeName, eventStruct + 2, 128);
-      stringToUTF8(id, eventStruct + 130, 128);
-      HEAP32[(((eventStruct)+(260))>>2)] = reportedElement ? reportedElement.clientWidth : 0;
-      HEAP32[(((eventStruct)+(264))>>2)] = reportedElement ? reportedElement.clientHeight : 0;
-      HEAP32[(((eventStruct)+(268))>>2)] = screen.width;
-      HEAP32[(((eventStruct)+(272))>>2)] = screen.height;
+      var id = (reportedElement && reportedElement.id) ? reportedElement.id : '';
+      stringToUTF8(nodeName, eventStruct + 8, 128);
+      stringToUTF8(id, eventStruct + 136, 128);
+      HEAP32[(((eventStruct)+(264))>>2)] = reportedElement ? reportedElement.clientWidth : 0;
+      HEAP32[(((eventStruct)+(268))>>2)] = reportedElement ? reportedElement.clientHeight : 0;
+      HEAP32[(((eventStruct)+(272))>>2)] = screen.width;
+      HEAP32[(((eventStruct)+(276))>>2)] = screen.height;
       if (isFullscreen) {
         JSEvents.previousFullscreenElement = fullscreenElement;
       }
@@ -9083,14 +9184,14 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var registerFullscreenChangeEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.fullscreenChangeEvent) JSEvents.fullscreenChangeEvent = _malloc(276);
+      if (!JSEvents.fullscreenChangeEvent) JSEvents.fullscreenChangeEvent = _malloc(280);
   
       var fullscreenChangeEventhandlerFunc = (e = event) => {
         var fullscreenChangeEvent = JSEvents.fullscreenChangeEvent;
   
         fillFullscreenChangeEventData(fullscreenChangeEvent);
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, fullscreenChangeEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, fullscreenChangeEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9120,13 +9221,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var registerGamepadEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.gamepadEvent) JSEvents.gamepadEvent = _malloc(1240);
+      if (!JSEvents.gamepadEvent) JSEvents.gamepadEvent = _malloc(1432);
   
       var gamepadEventHandlerFunc = (e = event) => {
         var gamepadEvent = JSEvents.gamepadEvent;
         fillGamepadEventData(gamepadEvent, e["gamepad"]);
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, gamepadEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, gamepadEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9139,15 +9240,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       };
       return JSEvents.registerOrRemoveHandler(eventHandler);
     };
-  
   var _emscripten_set_gamepadconnected_callback_on_thread = (userData, useCapture, callbackfunc, targetThread) => {
-      if (_emscripten_sample_gamepad_data()) return -1;
+      if (!navigator.getGamepads && !navigator.webkitGetGamepads) return -1;
       return registerGamepadEventCallback(2, userData, useCapture, callbackfunc, 26, "gamepadconnected", targetThread);
     };
 
-  
   var _emscripten_set_gamepaddisconnected_callback_on_thread = (userData, useCapture, callbackfunc, targetThread) => {
-      if (_emscripten_sample_gamepad_data()) return -1;
+      if (!navigator.getGamepads && !navigator.webkitGetGamepads) return -1;
       return registerGamepadEventCallback(2, userData, useCapture, callbackfunc, 27, "gamepaddisconnected", targetThread);
     };
 
@@ -9155,7 +9254,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var registerKeyEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.keyEvent) JSEvents.keyEvent = _malloc(160);
+      if (!JSEvents.keyEvent) JSEvents.keyEvent = _malloc(176);
   
       var keyEventHandlerFunc = (e) => {
         assert(e);
@@ -9163,27 +9262,28 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var keyEventData = JSEvents.keyEvent;
         HEAPF64[((keyEventData)>>3)] = e.timeStamp;
   
-        var idx = ((keyEventData)>>2);
+        var idx =((keyEventData)>>2);
   
         HEAP32[idx + 2] = e.location;
-        HEAP8[keyEventData + 12] = e.ctrlKey;
-        HEAP8[keyEventData + 13] = e.shiftKey;
-        HEAP8[keyEventData + 14] = e.altKey;
-        HEAP8[keyEventData + 15] = e.metaKey;
-        HEAP8[keyEventData + 16] = e.repeat;
-        HEAP32[idx + 5] = e.charCode;
-        HEAP32[idx + 6] = e.keyCode;
-        HEAP32[idx + 7] = e.which;
-        stringToUTF8(e.key || '', keyEventData + 32, 32);
-        stringToUTF8(e.code || '', keyEventData + 64, 32);
-        stringToUTF8(e.char || '', keyEventData + 96, 32);
-        stringToUTF8(e.locale || '', keyEventData + 128, 32);
-  
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, keyEventData, userData)) e.preventDefault();
+        HEAP32[idx + 3] = e.ctrlKey;
+        HEAP32[idx + 4] = e.shiftKey;
+        HEAP32[idx + 5] = e.altKey;
+        HEAP32[idx + 6] = e.metaKey;
+        HEAP32[idx + 7] = e.repeat;
+        HEAP32[idx + 8] = e.charCode;
+        HEAP32[idx + 9] = e.keyCode;
+        HEAP32[idx + 10] = e.which;
+        stringToUTF8(e.key || '', keyEventData + 44, 32);
+        stringToUTF8(e.code || '', keyEventData + 76, 32);
+        stringToUTF8(e.char || '', keyEventData + 108, 32);
+        stringToUTF8(e.locale || '', keyEventData + 140, 32);
+  
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, keyEventData, userData)) e.preventDefault();
       };
   
       var eventHandler = {
         target: findEventTarget(target),
+        allowsDeferredCalls: true,
         eventTypeString,
         callbackfunc,
         handlerFunc: keyEventHandlerFunc,
@@ -9202,7 +9302,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
 
   
   var _emscripten_set_main_loop = (func, fps, simulateInfiniteLoop) => {
-      var browserIterationFunc = (() => dynCall_v(func));
+      var browserIterationFunc = (() => dynCall_v.call(null, func));
       setMainLoop(browserIterationFunc, fps, simulateInfiniteLoop);
     };
 
@@ -9217,36 +9317,35 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       HEAP32[idx + 3] = e.screenY;
       HEAP32[idx + 4] = e.clientX;
       HEAP32[idx + 5] = e.clientY;
-      HEAP8[eventStruct + 24] = e.ctrlKey;
-      HEAP8[eventStruct + 25] = e.shiftKey;
-      HEAP8[eventStruct + 26] = e.altKey;
-      HEAP8[eventStruct + 27] = e.metaKey;
-      HEAP16[idx*2 + 14] = e.button;
-      HEAP16[idx*2 + 15] = e.buttons;
-  
-      HEAP32[idx + 8] = e["movementX"]
+      HEAP32[idx + 6] = e.ctrlKey;
+      HEAP32[idx + 7] = e.shiftKey;
+      HEAP32[idx + 8] = e.altKey;
+      HEAP32[idx + 9] = e.metaKey;
+      HEAP16[idx*2 + 20] = e.button;
+      HEAP16[idx*2 + 21] = e.buttons;
+  
+      HEAP32[idx + 11] = e["movementX"]
         ;
   
-      HEAP32[idx + 9] = e["movementY"]
+      HEAP32[idx + 12] = e["movementY"]
         ;
   
-      // Note: rect contains doubles (truncated to placate SAFE_HEAP, which is the same behaviour when writing to HEAP32 anyway)
       var rect = getBoundingClientRect(target);
-      HEAP32[idx + 10] = e.clientX - (rect.left | 0);
-      HEAP32[idx + 11] = e.clientY - (rect.top  | 0);
+      HEAP32[idx + 13] = e.clientX - rect.left;
+      HEAP32[idx + 14] = e.clientY - rect.top;
   
     };
   
   
   var registerMouseEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.mouseEvent) JSEvents.mouseEvent = _malloc(64);
+      if (!JSEvents.mouseEvent) JSEvents.mouseEvent = _malloc(72);
       target = findEventTarget(target);
   
       var mouseEventHandlerFunc = (e = event) => {
         // TODO: Make this access thread safe, or this could update live while app is reading it.
         fillMouseEventData(JSEvents.mouseEvent, e, target);
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, JSEvents.mouseEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, JSEvents.mouseEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9282,22 +9381,22 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       var isPointerlocked = !!pointerLockElement;
       // Assigning a boolean to HEAP32 with expected type coercion.
       /** @suppress{checkTypes} */
-      HEAP8[eventStruct] = isPointerlocked;
+      HEAP32[((eventStruct)>>2)] = isPointerlocked;
       var nodeName = JSEvents.getNodeNameForTarget(pointerLockElement);
-      var id = pointerLockElement?.id || '';
-      stringToUTF8(nodeName, eventStruct + 1, 128);
-      stringToUTF8(id, eventStruct + 129, 128);
+      var id = (pointerLockElement && pointerLockElement.id) ? pointerLockElement.id : '';
+      stringToUTF8(nodeName, eventStruct + 4, 128);
+      stringToUTF8(id, eventStruct + 132, 128);
     };
   
   
   var registerPointerlockChangeEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.pointerlockChangeEvent) JSEvents.pointerlockChangeEvent = _malloc(257);
+      if (!JSEvents.pointerlockChangeEvent) JSEvents.pointerlockChangeEvent = _malloc(260);
   
       var pointerlockChangeEventHandlerFunc = (e = event) => {
         var pointerlockChangeEvent = JSEvents.pointerlockChangeEvent;
         fillPointerlockChangeEventData(pointerlockChangeEvent);
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, pointerlockChangeEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, pointerlockChangeEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9347,16 +9446,16 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           return;
         }
         var uiEvent = JSEvents.uiEvent;
-        HEAP32[((uiEvent)>>2)] = 0; // always zero for resize and scroll
+        HEAP32[((uiEvent)>>2)] = e.detail;
         HEAP32[(((uiEvent)+(4))>>2)] = b.clientWidth;
         HEAP32[(((uiEvent)+(8))>>2)] = b.clientHeight;
         HEAP32[(((uiEvent)+(12))>>2)] = innerWidth;
         HEAP32[(((uiEvent)+(16))>>2)] = innerHeight;
         HEAP32[(((uiEvent)+(20))>>2)] = outerWidth;
         HEAP32[(((uiEvent)+(24))>>2)] = outerHeight;
-        HEAP32[(((uiEvent)+(28))>>2)] = pageXOffset | 0; // scroll offsets are float
-        HEAP32[(((uiEvent)+(32))>>2)] = pageYOffset | 0;
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, uiEvent, userData)) e.preventDefault();
+        HEAP32[(((uiEvent)+(28))>>2)] = pageXOffset;
+        HEAP32[(((uiEvent)+(32))>>2)] = pageYOffset;
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, uiEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9375,7 +9474,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var registerTouchEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.touchEvent) JSEvents.touchEvent = _malloc(1552);
+      if (!JSEvents.touchEvent) JSEvents.touchEvent = _malloc(1696);
   
       target = findEventTarget(target);
   
@@ -9386,46 +9485,49 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         // only changed touches in e.changedTouches, and touches on target at a.targetTouches), mark a boolean in
         // each Touch object so that we can later loop only once over all touches we see to marshall over to Wasm.
   
-        for (let t of et) {
+        for (var i = 0; i < et.length; ++i) {
+          t = et[i];
           // Browser might recycle the generated Touch objects between each frame (Firefox on Android), so reset any
           // changed/target states we may have set from previous frame.
           t.isChanged = t.onTarget = 0;
           touches[t.identifier] = t;
         }
         // Mark which touches are part of the changedTouches list.
-        for (let t of e.changedTouches) {
+        for (var i = 0; i < e.changedTouches.length; ++i) {
+          t = e.changedTouches[i];
           t.isChanged = 1;
           touches[t.identifier] = t;
         }
         // Mark which touches are part of the targetTouches list.
-        for (let t of e.targetTouches) {
-          touches[t.identifier].onTarget = 1;
+        for (var i = 0; i < e.targetTouches.length; ++i) {
+          touches[e.targetTouches[i].identifier].onTarget = 1;
         }
   
         var touchEvent = JSEvents.touchEvent;
         HEAPF64[((touchEvent)>>3)] = e.timeStamp;
-        HEAP8[touchEvent + 12] = e.ctrlKey;
-        HEAP8[touchEvent + 13] = e.shiftKey;
-        HEAP8[touchEvent + 14] = e.altKey;
-        HEAP8[touchEvent + 15] = e.metaKey;
-        var idx = touchEvent + 16;
+        var idx =((touchEvent)>>2);// Pre-shift the ptr to index to HEAP32 to save code size
+        HEAP32[idx + 3] = e.ctrlKey;
+        HEAP32[idx + 4] = e.shiftKey;
+        HEAP32[idx + 5] = e.altKey;
+        HEAP32[idx + 6] = e.metaKey;
+        idx += 7; // Advance to the start of the touch array.
         var targetRect = getBoundingClientRect(target);
         var numTouches = 0;
-        for (let t of Object.values(touches)) {
-          var idx32 = ((idx)>>2); // Pre-shift the ptr to index to HEAP32 to save code size
-          HEAP32[idx32 + 0] = t.identifier;
-          HEAP32[idx32 + 1] = t.screenX;
-          HEAP32[idx32 + 2] = t.screenY;
-          HEAP32[idx32 + 3] = t.clientX;
-          HEAP32[idx32 + 4] = t.clientY;
-          HEAP32[idx32 + 5] = t.pageX;
-          HEAP32[idx32 + 6] = t.pageY;
-          HEAP8[idx + 28] = t.isChanged;
-          HEAP8[idx + 29] = t.onTarget;
-          HEAP32[idx32 + 8] = t.clientX - (targetRect.left | 0);
-          HEAP32[idx32 + 9] = t.clientY - (targetRect.top  | 0);
-  
-          idx += 48;
+        for (var i in touches) {
+          t = touches[i];
+          HEAP32[idx + 0] = t.identifier;
+          HEAP32[idx + 1] = t.screenX;
+          HEAP32[idx + 2] = t.screenY;
+          HEAP32[idx + 3] = t.clientX;
+          HEAP32[idx + 4] = t.clientY;
+          HEAP32[idx + 5] = t.pageX;
+          HEAP32[idx + 6] = t.pageY;
+          HEAP32[idx + 7] = t.isChanged;
+          HEAP32[idx + 8] = t.onTarget;
+          HEAP32[idx + 9] = t.clientX - targetRect.left;
+          HEAP32[idx + 10] = t.clientY - targetRect.top;
+  
+          idx += 13;
   
           if (++numTouches > 31) {
             break;
@@ -9433,7 +9535,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         }
         HEAP32[(((touchEvent)+(8))>>2)] = numTouches;
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, touchEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, touchEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9465,7 +9567,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       // Assigning a boolean to HEAP32 with expected type coercion.
       /** @suppress{checkTypes} */
-      HEAP8[eventStruct] = document.hidden;
+      HEAP32[((eventStruct)>>2)] = document.hidden;
       HEAP32[(((eventStruct)+(4))>>2)] = visibilityState;
     };
   
@@ -9478,7 +9580,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
         fillVisibilityChangeEventData(visibilityChangeEvent);
   
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, visibilityChangeEvent, userData)) e.preventDefault();
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, visibilityChangeEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9503,17 +9605,17 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   var registerWheelEventCallback = (target, userData, useCapture, callbackfunc, eventTypeId, eventTypeString, targetThread) => {
-      if (!JSEvents.wheelEvent) JSEvents.wheelEvent = _malloc(96);
+      if (!JSEvents.wheelEvent) JSEvents.wheelEvent = _malloc(104);
   
       // The DOM Level 3 events spec event 'wheel'
       var wheelHandlerFunc = (e = event) => {
         var wheelEvent = JSEvents.wheelEvent;
         fillMouseEventData(wheelEvent, e, target);
-        HEAPF64[(((wheelEvent)+(64))>>3)] = e["deltaX"];
-        HEAPF64[(((wheelEvent)+(72))>>3)] = e["deltaY"];
-        HEAPF64[(((wheelEvent)+(80))>>3)] = e["deltaZ"];
-        HEAP32[(((wheelEvent)+(88))>>2)] = e["deltaMode"];
-        if (((a1, a2, a3) => dynCall_iiii(callbackfunc, a1, a2, a3))(eventTypeId, wheelEvent, userData)) e.preventDefault();
+        HEAPF64[(((wheelEvent)+(72))>>3)] = e["deltaX"];
+        HEAPF64[(((wheelEvent)+(80))>>3)] = e["deltaY"];
+        HEAPF64[(((wheelEvent)+(88))>>3)] = e["deltaZ"];
+        HEAP32[(((wheelEvent)+(96))>>2)] = e["deltaMode"];
+        if (((a1, a2, a3) => dynCall_iiii.apply(null, [callbackfunc, a1, a2, a3]))(eventTypeId, wheelEvent, userData)) e.preventDefault();
       };
   
       var eventHandler = {
@@ -9589,11 +9691,12 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   var stringToAscii = (str, buffer) => {
       for (var i = 0; i < str.length; ++i) {
         assert(str.charCodeAt(i) === (str.charCodeAt(i) & 0xff));
-        HEAP8[buffer++] = str.charCodeAt(i);
+        HEAP8[((buffer++)>>0)] = str.charCodeAt(i);
       }
       // Null-terminate the string
-      HEAP8[buffer] = 0;
+      HEAP8[((buffer)>>0)] = 0;
     };
+  
   var _environ_get = (__environ, environ_buf) => {
       var bufSize = 0;
       getEnvStrings().forEach((string, i) => {
@@ -9605,6 +9708,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return 0;
     };
 
+  
   var _environ_sizes_get = (penviron_count, penviron_buf_size) => {
       var strings = getEnvStrings();
       HEAPU32[((penviron_count)>>2)] = strings.length;
@@ -9637,7 +9741,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         if (curr < 0) return -1;
         ret += curr;
         if (curr < len) break; // nothing more to read
-        if (typeof offset != 'undefined') {
+        if (typeof offset !== 'undefined') {
           offset += curr;
         }
       }
@@ -9664,7 +9768,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       return ((hi + 0x200000) >>> 0 < 0x400001 - !!lo) ? (lo >>> 0) + hi * 4294967296 : NaN;
     };
   function _fd_seek(fd,offset_low, offset_high,whence,newOffset) {
-    var offset = convertI32PairToI53Checked(offset_low, offset_high);
+    var offset = convertI32PairToI53Checked(offset_low, offset_high);;
   
     
   try {
@@ -9718,11 +9822,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var curr = FS.write(stream, HEAP8, ptr, len, offset);
         if (curr < 0) return -1;
         ret += curr;
-        if (curr < len) {
-          // No more space to write.
-          break;
-        }
-        if (typeof offset != 'undefined') {
+        if (typeof offset !== 'undefined') {
           offset += curr;
         }
       }
@@ -9783,58 +9883,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
 
   var _mp_js_time_ms = () => Date.now();
 
-  var listenOnce = (object, event, func) => {
-      object.addEventListener(event, func, { 'once': true });
-    };
-  /** @param {Object=} elements */
-  var autoResumeAudioContext = (ctx, elements) => {
-      if (!elements) {
-        elements = [document, document.getElementById('canvas')];
-      }
-      ['keydown', 'mousedown', 'touchstart'].forEach((event) => {
-        elements.forEach((element) => {
-          if (element) {
-            listenOnce(element, event, () => {
-              if (ctx.state === 'suspended') ctx.resume();
-            });
-          }
-        });
-      });
-    };
-
-  var dynCallLegacy = (sig, ptr, args) => {
-      sig = sig.replace(/p/g, 'i')
-      assert(('dynCall_' + sig) in Module, `bad function pointer type - dynCall function not found for sig '${sig}'`);
-      if (args?.length) {
-        // j (64-bit integer) must be passed in as two numbers [low 32, high 32].
-        assert(args.length === sig.substring(1).replace(/j/g, '--').length);
-      } else {
-        assert(sig.length == 1);
-      }
-      var f = Module['dynCall_' + sig];
-      return f(ptr, ...args);
-    };
-  
-  var wasmTableMirror = [];
-  
-  /** @type {WebAssembly.Table} */
-  var wasmTable;
-  var getWasmTableEntry = (funcPtr) => {
-      var func = wasmTableMirror[funcPtr];
-      if (!func) {
-        if (funcPtr >= wasmTableMirror.length) wasmTableMirror.length = funcPtr + 1;
-        wasmTableMirror[funcPtr] = func = wasmTable.get(funcPtr);
-      }
-      assert(wasmTable.get(funcPtr) == func, 'JavaScript-side Wasm function table mirror is out of date!');
-      return func;
-    };
-  var dynCall = (sig, ptr, args = []) => {
-      var rtn = dynCallLegacy(sig, ptr, args);
-      return rtn;
-    };
-
-
-
 
 
   var runAndAbortIfError = (func) => {
@@ -9881,56 +9929,63 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   instrumentWasmImports(imports) {
         var importPattern = /^(invoke_.*|__asyncjs__.*)$/;
   
-        for (let [x, original] of Object.entries(imports)) {
-          if (typeof original == 'function') {
-            let isAsyncifyImport = original.isAsync || importPattern.test(x);
-            imports[x] = (...args) => {
-              var originalAsyncifyState = Asyncify.state;
-              try {
-                return original(...args);
-              } finally {
-                // Only asyncify-declared imports are allowed to change the
-                // state.
-                // Changing the state from normal to disabled is allowed (in any
-                // function) as that is what shutdown does (and we don't have an
-                // explicit list of shutdown imports).
-                var changedToDisabled =
-                      originalAsyncifyState === Asyncify.State.Normal &&
-                      Asyncify.state        === Asyncify.State.Disabled;
-                // invoke_* functions are allowed to change the state if we do
-                // not ignore indirect calls.
-                var ignoredInvoke = x.startsWith('invoke_') &&
-                                    true;
-                if (Asyncify.state !== originalAsyncifyState &&
-                    !isAsyncifyImport &&
-                    !changedToDisabled &&
-                    !ignoredInvoke) {
-                  throw new Error(`import ${x} was not in ASYNCIFY_IMPORTS, but changed the state`);
+        for (var x in imports) {
+          (function(x) {
+            var original = imports[x];
+            var sig = original.sig;
+            if (typeof original == 'function') {
+              var isAsyncifyImport = original.isAsync || importPattern.test(x);
+              imports[x] = function() {
+                var originalAsyncifyState = Asyncify.state;
+                try {
+                  return original.apply(null, arguments);
+                } finally {
+                  // Only asyncify-declared imports are allowed to change the
+                  // state.
+                  // Changing the state from normal to disabled is allowed (in any
+                  // function) as that is what shutdown does (and we don't have an
+                  // explicit list of shutdown imports).
+                  var changedToDisabled =
+                        originalAsyncifyState === Asyncify.State.Normal &&
+                        Asyncify.state        === Asyncify.State.Disabled;
+                  // invoke_* functions are allowed to change the state if we do
+                  // not ignore indirect calls.
+                  var ignoredInvoke = x.startsWith('invoke_') &&
+                                      true;
+                  if (Asyncify.state !== originalAsyncifyState &&
+                      !isAsyncifyImport &&
+                      !changedToDisabled &&
+                      !ignoredInvoke) {
+                    throw new Error(`import ${x} was not in ASYNCIFY_IMPORTS, but changed the state`);
+                  }
                 }
-              }
-            };
-          }
+              };
+            }
+          })(x);
         }
       },
   instrumentWasmExports(exports) {
         var ret = {};
-        for (let [x, original] of Object.entries(exports)) {
-          if (typeof original == 'function') {
-            ret[x] = (...args) => {
-              Asyncify.exportCallStack.push(x);
-              try {
-                return original(...args);
-              } finally {
-                if (!ABORT) {
-                  var y = Asyncify.exportCallStack.pop();
-                  assert(y === x);
-                  Asyncify.maybeStopUnwind();
+        for (var x in exports) {
+          (function(x) {
+            var original = exports[x];
+            if (typeof original == 'function') {
+              ret[x] = function() {
+                Asyncify.exportCallStack.push(x);
+                try {
+                  return original.apply(null, arguments);
+                } finally {
+                  if (!ABORT) {
+                    var y = Asyncify.exportCallStack.pop();
+                    assert(y === x);
+                    Asyncify.maybeStopUnwind();
+                  }
                 }
-              }
-            };
-          } else {
-            ret[x] = original;
-          }
+              };
+            } else {
+              ret[x] = original;
+            }
+          })(x);
         }
         return ret;
       },
@@ -10009,22 +10064,18 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
         var rewindId = Asyncify.getCallStackId(bottomOfCallStack);
         HEAP32[(((ptr)+(8))>>2)] = rewindId;
       },
-  getDataRewindFuncName(ptr) {
+  getDataRewindFunc(ptr) {
         var id = HEAP32[(((ptr)+(8))>>2)];
         var name = Asyncify.callStackIdToName[id];
-        return name;
-      },
-  getDataRewindFunc(name) {
         var func = wasmExports[name];
         return func;
       },
   doRewind(ptr) {
-        var name = Asyncify.getDataRewindFuncName(ptr);
-        var func = Asyncify.getDataRewindFunc(name);
+        var start = Asyncify.getDataRewindFunc(ptr);
         // Once we have rewound and the stack we no longer need to artificially
         // keep the runtime alive.
         
-        return func();
+        return start();
       },
   handleSleep(startAsync) {
         assert(Asyncify.state !== Asyncify.State.Disabled, 'Asyncify cannot be done during or after the runtime exits');
@@ -10110,7 +10161,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
           _free(Asyncify.currData);
           Asyncify.currData = null;
           // Call all sleep callbacks now that the sleep-resume is all done.
-          Asyncify.sleepCallbacks.forEach(callUserCallback);
+          Asyncify.sleepCallbacks.forEach((func) => callUserCallback(func));
         } else {
           abort(`invalid state: ${Asyncify.state}`);
         }
@@ -10139,9 +10190,6 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
   
   
-  
-  
-  
     /**
      * @param {string|null=} returnType
      * @param {Array=} argTypes
@@ -10168,6 +10216,7 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
   
       function convertReturnValue(ret) {
         if (returnType === 'string') {
+          
           return UTF8ToString(ret);
         }
         if (returnType === 'boolean') return Boolean(ret);
@@ -10191,13 +10240,13 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       }
       // Data for a previous async operation that was in flight before us.
       var previousAsync = Asyncify.currData;
-      var ret = func(...cArgs);
+      var ret = func.apply(null, cArgs);
       function onDone(ret) {
         runtimeKeepalivePop();
         if (stack !== 0) stackRestore(stack);
         return convertReturnValue(ret);
       }
-    var asyncMode = opts?.async;
+    var asyncMode = opts && opts.async;
   
       // Keep the runtime alive through all calls. Note that this call might not be
       // async, but for simplicity we push and pop in all calls.
@@ -10232,7 +10281,9 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
      * @param {Object=} opts
      */
   var cwrap = (ident, returnType, argTypes, opts) => {
-      return (...args) => ccall(ident, returnType, argTypes, args, opts);
+      return function() {
+        return ccall(ident, returnType, argTypes, arguments, opts);
+      }
     };
 
 
@@ -10243,26 +10294,57 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
 
 
 
-  var FS_createPath = FS.createPath;
-
 
 
   var FS_unlink = (path) => FS.unlink(path);
 
-  var FS_createLazyFile = FS.createLazyFile;
-
-  var FS_createDevice = FS.createDevice;
-
+  var FSNode = /** @constructor */ function(parent, name, mode, rdev) {
+    if (!parent) {
+      parent = this;  // root node sets parent to itself
+    }
+    this.parent = parent;
+    this.mount = parent.mount;
+    this.mounted = null;
+    this.id = FS.nextInode++;
+    this.name = name;
+    this.mode = mode;
+    this.node_ops = {};
+    this.stream_ops = {};
+    this.rdev = rdev;
+  };
+  var readMode = 292/*292*/ | 73/*73*/;
+  var writeMode = 146/*146*/;
+  Object.defineProperties(FSNode.prototype, {
+   read: {
+    get: /** @this{FSNode} */function() {
+     return (this.mode & readMode) === readMode;
+    },
+    set: /** @this{FSNode} */function(val) {
+     val ? this.mode |= readMode : this.mode &= ~readMode;
+    }
+   },
+   write: {
+    get: /** @this{FSNode} */function() {
+     return (this.mode & writeMode) === writeMode;
+    },
+    set: /** @this{FSNode} */function(val) {
+     val ? this.mode |= writeMode : this.mode &= ~writeMode;
+    }
+   },
+   isFolder: {
+    get: /** @this{FSNode} */function() {
+     return FS.isDir(this.mode);
+    }
+   },
+   isDevice: {
+    get: /** @this{FSNode} */function() {
+     return FS.isChrdev(this.mode);
+    }
+   }
+  });
+  FS.FSNode = FSNode;
   FS.createPreloadedFile = FS_createPreloadedFile;
-  FS.staticInit();
-  // Set module methods based on EXPORTED_RUNTIME_METHODS
-  Module["FS_createPath"] = FS.createPath;
-  Module["FS_createDataFile"] = FS.createDataFile;
-  Module["FS_createPreloadedFile"] = FS.createPreloadedFile;
-  Module["FS_unlink"] = FS.unlink;
-  Module["FS_createLazyFile"] = FS.createLazyFile;
-  Module["FS_createDevice"] = FS.createDevice;
-  ;
+  FS.staticInit();Module["FS_createPath"] = FS.createPath;Module["FS_createDataFile"] = FS.createDataFile;Module["FS_createPreloadedFile"] = FS.createPreloadedFile;Module["FS_unlink"] = FS.unlink;Module["FS_createLazyFile"] = FS.createLazyFile;Module["FS_createDevice"] = FS.createDevice;;
 
       // exports
       Module["requestFullscreen"] = Browser.requestFullscreen;
@@ -10275,16 +10357,15 @@ function create_promise(out_set,out_promise) { const out_set_js = proxy_convert_
       Module["createContext"] = Browser.createContext;
       var preloadedImages = {};
       var preloadedAudios = {};;
+var GLctx;;
 for (var i = 0; i < 32; ++i) tempFixedLengthArray.push(new Array(i));;
 var miniTempWebGLFloatBuffersStorage = new Float32Array(288);
-  // Create GL_POOL_TEMP_BUFFERS_SIZE+1 temporary buffers, for uploads of size 0 through GL_POOL_TEMP_BUFFERS_SIZE inclusive
-  for (/**@suppress{duplicate}*/var i = 0; i <= 288; ++i) {
-    miniTempWebGLFloatBuffers[i] = miniTempWebGLFloatBuffersStorage.subarray(0, i);
+  for (/**@suppress{duplicate}*/var i = 0; i < 288; ++i) {
+    miniTempWebGLFloatBuffers[i] = miniTempWebGLFloatBuffersStorage.subarray(0, i+1);
   };
 var miniTempWebGLIntBuffersStorage = new Int32Array(288);
-  // Create GL_POOL_TEMP_BUFFERS_SIZE+1 temporary buffers, for uploads of size 0 through GL_POOL_TEMP_BUFFERS_SIZE inclusive
-  for (/**@suppress{duplicate}*/var i = 0; i <= 288; ++i) {
-    miniTempWebGLIntBuffers[i] = miniTempWebGLIntBuffersStorage.subarray(0, i);
+  for (/**@suppress{duplicate}*/var i = 0; i < 288; ++i) {
+    miniTempWebGLIntBuffers[i] = miniTempWebGLIntBuffersStorage.subarray(0, i+1);
   };
 if (globalThis.crypto === undefined) { globalThis.crypto = require('crypto'); };
 var MP_JS_EPOCH = Date.now();
@@ -10327,23 +10408,21 @@ var wasmImports = {
   /** @export */
   _emscripten_get_now_is_monotonic: __emscripten_get_now_is_monotonic,
   /** @export */
-  _emscripten_memcpy_js: __emscripten_memcpy_js,
-  /** @export */
   _emscripten_throw_longjmp: __emscripten_throw_longjmp,
   /** @export */
-  call0,
+  call0: call0,
   /** @export */
-  call0_kwarg,
+  call0_kwarg: call0_kwarg,
   /** @export */
-  call1,
+  call1: call1,
   /** @export */
-  call1_kwarg,
+  call1_kwarg: call1_kwarg,
   /** @export */
-  call2,
+  call2: call2,
   /** @export */
-  calln,
+  calln: calln,
   /** @export */
-  create_promise,
+  create_promise: create_promise,
   /** @export */
   eglBindAPI: _eglBindAPI,
   /** @export */
@@ -10383,18 +10462,12 @@ var wasmImports = {
   /** @export */
   emscripten_asm_const_int_sync_on_main_thread: _emscripten_asm_const_int_sync_on_main_thread,
   /** @export */
-  emscripten_asm_const_ptr_sync_on_main_thread: _emscripten_asm_const_ptr_sync_on_main_thread,
-  /** @export */
-  emscripten_cancel_main_loop: _emscripten_cancel_main_loop,
-  /** @export */
   emscripten_date_now: _emscripten_date_now,
   /** @export */
   emscripten_exit_fullscreen: _emscripten_exit_fullscreen,
   /** @export */
   emscripten_exit_pointerlock: _emscripten_exit_pointerlock,
   /** @export */
-  emscripten_force_exit: _emscripten_force_exit,
-  /** @export */
   emscripten_get_device_pixel_ratio: _emscripten_get_device_pixel_ratio,
   /** @export */
   emscripten_get_element_css_size: _emscripten_get_element_css_size,
@@ -10449,8 +10522,6 @@ var wasmImports = {
   /** @export */
   emscripten_glClearStencil: _emscripten_glClearStencil,
   /** @export */
-  emscripten_glClipControlEXT: _emscripten_glClipControlEXT,
-  /** @export */
   emscripten_glColorMask: _emscripten_glColorMask,
   /** @export */
   emscripten_glCompileShader: _emscripten_glCompileShader,
@@ -10625,12 +10696,8 @@ var wasmImports = {
   /** @export */
   emscripten_glPixelStorei: _emscripten_glPixelStorei,
   /** @export */
-  emscripten_glPolygonModeWEBGL: _emscripten_glPolygonModeWEBGL,
-  /** @export */
   emscripten_glPolygonOffset: _emscripten_glPolygonOffset,
   /** @export */
-  emscripten_glPolygonOffsetClampEXT: _emscripten_glPolygonOffsetClampEXT,
-  /** @export */
   emscripten_glQueryCounterEXT: _emscripten_glQueryCounterEXT,
   /** @export */
   emscripten_glReadPixels: _emscripten_glReadPixels,
@@ -10737,6 +10804,8 @@ var wasmImports = {
   /** @export */
   emscripten_has_asyncify: _emscripten_has_asyncify,
   /** @export */
+  emscripten_memcpy_js: _emscripten_memcpy_js,
+  /** @export */
   emscripten_request_fullscreen_strategy: _emscripten_request_fullscreen_strategy,
   /** @export */
   emscripten_request_pointerlock: _emscripten_request_pointerlock,
@@ -10815,53 +10884,53 @@ var wasmImports = {
   /** @export */
   fd_write: _fd_write,
   /** @export */
-  has_attr,
+  has_attr: has_attr,
   /** @export */
-  invoke_i,
+  invoke_i: invoke_i,
   /** @export */
-  invoke_ii,
+  invoke_ii: invoke_ii,
   /** @export */
-  invoke_iii,
+  invoke_iii: invoke_iii,
   /** @export */
-  invoke_iiii,
+  invoke_iiii: invoke_iiii,
   /** @export */
-  invoke_iiiii,
+  invoke_iiiii: invoke_iiiii,
   /** @export */
-  invoke_iiiiii,
+  invoke_iiiiii: invoke_iiiiii,
   /** @export */
-  invoke_v,
+  invoke_v: invoke_v,
   /** @export */
-  invoke_vi,
+  invoke_vi: invoke_vi,
   /** @export */
-  invoke_vii,
+  invoke_vii: invoke_vii,
   /** @export */
-  invoke_viii,
+  invoke_viii: invoke_viii,
   /** @export */
-  invoke_viiii,
+  invoke_viiii: invoke_viiii,
   /** @export */
-  js_check_existing,
+  js_check_existing: js_check_existing,
   /** @export */
-  js_get_error_info,
+  js_get_error_info: js_get_error_info,
   /** @export */
-  js_get_iter,
+  js_get_iter: js_get_iter,
   /** @export */
-  js_get_proxy_js_ref_info,
+  js_get_proxy_js_ref_info: js_get_proxy_js_ref_info,
   /** @export */
-  js_iter_next,
+  js_iter_next: js_iter_next,
   /** @export */
-  js_reflect_construct,
+  js_reflect_construct: js_reflect_construct,
   /** @export */
-  js_subscr_load,
+  js_subscr_load: js_subscr_load,
   /** @export */
-  js_subscr_store,
+  js_subscr_store: js_subscr_store,
   /** @export */
-  js_then_continue,
+  js_then_continue: js_then_continue,
   /** @export */
-  js_then_reject,
+  js_then_reject: js_then_reject,
   /** @export */
-  js_then_resolve,
+  js_then_resolve: js_then_resolve,
   /** @export */
-  lookup_attr,
+  lookup_attr: lookup_attr,
   /** @export */
   mp_js_hook: _mp_js_hook,
   /** @export */
@@ -10871,98 +10940,103 @@ var wasmImports = {
   /** @export */
   mp_js_time_ms: _mp_js_time_ms,
   /** @export */
-  proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside,
+  proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside: proxy_convert_mp_to_js_then_js_to_js_then_js_to_mp_obj_jsside,
   /** @export */
-  proxy_convert_mp_to_js_then_js_to_mp_obj_jsside,
+  proxy_convert_mp_to_js_then_js_to_mp_obj_jsside: proxy_convert_mp_to_js_then_js_to_mp_obj_jsside,
   /** @export */
-  proxy_js_free_obj,
+  proxy_js_free_obj: proxy_js_free_obj,
   /** @export */
-  store_attr
+  store_attr: store_attr
 };
+Asyncify.instrumentWasmImports(wasmImports);
 var wasmExports = createWasm();
-var ___wasm_call_ctors = createExportWrapper('__wasm_call_ctors', 0);
-var _mp_sched_keyboard_interrupt = Module['_mp_sched_keyboard_interrupt'] = createExportWrapper('mp_sched_keyboard_interrupt', 0);
-var _mp_handle_pending = Module['_mp_handle_pending'] = createExportWrapper('mp_handle_pending', 1);
-var _tulip_tick = Module['_tulip_tick'] = createExportWrapper('tulip_tick', 1);
-var _mp_js_init = Module['_mp_js_init'] = createExportWrapper('mp_js_init', 2);
-var _malloc = Module['_malloc'] = createExportWrapper('malloc', 1);
-var _mp_js_register_js_module = Module['_mp_js_register_js_module'] = createExportWrapper('mp_js_register_js_module', 2);
-var _mp_js_do_import = Module['_mp_js_do_import'] = createExportWrapper('mp_js_do_import', 2);
-var _proxy_convert_mp_to_js_obj_cside = Module['_proxy_convert_mp_to_js_obj_cside'] = createExportWrapper('proxy_convert_mp_to_js_obj_cside', 2);
-var _mp_js_do_exec = Module['_mp_js_do_exec'] = createExportWrapper('mp_js_do_exec', 3);
-var _mp_js_frozen_exec = Module['_mp_js_frozen_exec'] = createExportWrapper('mp_js_frozen_exec', 1);
-var _mp_js_do_exec_async = Module['_mp_js_do_exec_async'] = createExportWrapper('mp_js_do_exec_async', 3);
-var _mp_js_repl_init = Module['_mp_js_repl_init'] = createExportWrapper('mp_js_repl_init', 0);
-var _mp_js_repl_process_char = Module['_mp_js_repl_process_char'] = createExportWrapper('mp_js_repl_process_char', 1);
-var _mp_hal_get_interrupt_char = Module['_mp_hal_get_interrupt_char'] = createExportWrapper('mp_hal_get_interrupt_char', 1);
-var _proxy_c_init = Module['_proxy_c_init'] = createExportWrapper('proxy_c_init', 0);
-var _proxy_c_free_obj = Module['_proxy_c_free_obj'] = createExportWrapper('proxy_c_free_obj', 1);
-var _free = Module['_free'] = createExportWrapper('free', 1);
-var _proxy_c_to_js_call = Module['_proxy_c_to_js_call'] = createExportWrapper('proxy_c_to_js_call', 4);
-var _proxy_c_to_js_dir = Module['_proxy_c_to_js_dir'] = createExportWrapper('proxy_c_to_js_dir', 2);
-var _proxy_c_to_js_has_attr = Module['_proxy_c_to_js_has_attr'] = createExportWrapper('proxy_c_to_js_has_attr', 2);
-var _proxy_c_to_js_lookup_attr = Module['_proxy_c_to_js_lookup_attr'] = createExportWrapper('proxy_c_to_js_lookup_attr', 3);
-var _proxy_c_to_js_store_attr = Module['_proxy_c_to_js_store_attr'] = createExportWrapper('proxy_c_to_js_store_attr', 3);
-var _proxy_c_to_js_delete_attr = Module['_proxy_c_to_js_delete_attr'] = createExportWrapper('proxy_c_to_js_delete_attr', 2);
-var _proxy_c_to_js_get_type = Module['_proxy_c_to_js_get_type'] = createExportWrapper('proxy_c_to_js_get_type', 1);
-var _proxy_c_to_js_get_array = Module['_proxy_c_to_js_get_array'] = createExportWrapper('proxy_c_to_js_get_array', 2);
-var _proxy_c_to_js_get_dict = Module['_proxy_c_to_js_get_dict'] = createExportWrapper('proxy_c_to_js_get_dict', 2);
-var _proxy_c_to_js_get_iter = Module['_proxy_c_to_js_get_iter'] = createExportWrapper('proxy_c_to_js_get_iter', 1);
-var _proxy_c_to_js_iternext = Module['_proxy_c_to_js_iternext'] = createExportWrapper('proxy_c_to_js_iternext', 2);
-var _proxy_c_to_js_resume = Module['_proxy_c_to_js_resume'] = createExportWrapper('proxy_c_to_js_resume', 2);
-var _process_single_midi_byte = Module['_process_single_midi_byte'] = createExportWrapper('process_single_midi_byte', 1);
+var ___wasm_call_ctors = createExportWrapper('__wasm_call_ctors');
+var _malloc = Module['_malloc'] = createExportWrapper('malloc');
+var _free = Module['_free'] = createExportWrapper('free');
+var _mp_sched_keyboard_interrupt = Module['_mp_sched_keyboard_interrupt'] = createExportWrapper('mp_sched_keyboard_interrupt');
+var _mp_handle_pending = Module['_mp_handle_pending'] = createExportWrapper('mp_handle_pending');
+var ___errno_location = createExportWrapper('__errno_location');
+var _tulip_tick = Module['_tulip_tick'] = createExportWrapper('tulip_tick');
+var _mp_js_init = Module['_mp_js_init'] = createExportWrapper('mp_js_init');
+var _mp_js_register_js_module = Module['_mp_js_register_js_module'] = createExportWrapper('mp_js_register_js_module');
+var _mp_js_do_import = Module['_mp_js_do_import'] = createExportWrapper('mp_js_do_import');
+var _proxy_convert_mp_to_js_obj_cside = Module['_proxy_convert_mp_to_js_obj_cside'] = createExportWrapper('proxy_convert_mp_to_js_obj_cside');
+var _mp_js_do_exec = Module['_mp_js_do_exec'] = createExportWrapper('mp_js_do_exec');
+var _mp_js_frozen_exec = Module['_mp_js_frozen_exec'] = createExportWrapper('mp_js_frozen_exec');
+var _mp_js_do_exec_async = Module['_mp_js_do_exec_async'] = createExportWrapper('mp_js_do_exec_async');
+var _mp_js_repl_init = Module['_mp_js_repl_init'] = createExportWrapper('mp_js_repl_init');
+var _mp_js_repl_process_char = Module['_mp_js_repl_process_char'] = createExportWrapper('mp_js_repl_process_char');
+var _mp_hal_get_interrupt_char = Module['_mp_hal_get_interrupt_char'] = createExportWrapper('mp_hal_get_interrupt_char');
+var _proxy_c_init = Module['_proxy_c_init'] = createExportWrapper('proxy_c_init');
+var _proxy_c_free_obj = Module['_proxy_c_free_obj'] = createExportWrapper('proxy_c_free_obj');
+var _proxy_c_to_js_call = Module['_proxy_c_to_js_call'] = createExportWrapper('proxy_c_to_js_call');
+var _proxy_c_to_js_dir = Module['_proxy_c_to_js_dir'] = createExportWrapper('proxy_c_to_js_dir');
+var _proxy_c_to_js_has_attr = Module['_proxy_c_to_js_has_attr'] = createExportWrapper('proxy_c_to_js_has_attr');
+var _proxy_c_to_js_lookup_attr = Module['_proxy_c_to_js_lookup_attr'] = createExportWrapper('proxy_c_to_js_lookup_attr');
+var _proxy_c_to_js_store_attr = Module['_proxy_c_to_js_store_attr'] = createExportWrapper('proxy_c_to_js_store_attr');
+var _proxy_c_to_js_delete_attr = Module['_proxy_c_to_js_delete_attr'] = createExportWrapper('proxy_c_to_js_delete_attr');
+var _proxy_c_to_js_get_type = Module['_proxy_c_to_js_get_type'] = createExportWrapper('proxy_c_to_js_get_type');
+var _proxy_c_to_js_get_array = Module['_proxy_c_to_js_get_array'] = createExportWrapper('proxy_c_to_js_get_array');
+var _proxy_c_to_js_get_dict = Module['_proxy_c_to_js_get_dict'] = createExportWrapper('proxy_c_to_js_get_dict');
+var _proxy_c_to_js_get_iter = Module['_proxy_c_to_js_get_iter'] = createExportWrapper('proxy_c_to_js_get_iter');
+var _proxy_c_to_js_iternext = Module['_proxy_c_to_js_iternext'] = createExportWrapper('proxy_c_to_js_iternext');
+var _proxy_c_to_js_resume = Module['_proxy_c_to_js_resume'] = createExportWrapper('proxy_c_to_js_resume');
+var _process_single_midi_byte = Module['_process_single_midi_byte'] = createExportWrapper('process_single_midi_byte');
 var _emscripten_stack_get_base = () => (_emscripten_stack_get_base = wasmExports['emscripten_stack_get_base'])();
 var _emscripten_stack_get_current = () => (_emscripten_stack_get_current = wasmExports['emscripten_stack_get_current'])();
-var _fflush = createExportWrapper('fflush', 1);
-var _strerror = createExportWrapper('strerror', 1);
-var _setThrew = createExportWrapper('setThrew', 2);
+var _fflush = Module['_fflush'] = createExportWrapper('fflush');
+var _setThrew = createExportWrapper('setThrew');
 var _emscripten_stack_init = () => (_emscripten_stack_init = wasmExports['emscripten_stack_init'])();
 var _emscripten_stack_get_free = () => (_emscripten_stack_get_free = wasmExports['emscripten_stack_get_free'])();
 var _emscripten_stack_get_end = () => (_emscripten_stack_get_end = wasmExports['emscripten_stack_get_end'])();
-var __emscripten_stack_restore = (a0) => (__emscripten_stack_restore = wasmExports['_emscripten_stack_restore'])(a0);
-var __emscripten_stack_alloc = (a0) => (__emscripten_stack_alloc = wasmExports['_emscripten_stack_alloc'])(a0);
-var dynCall_viii = Module['dynCall_viii'] = createExportWrapper('dynCall_viii', 4);
-var dynCall_vi = Module['dynCall_vi'] = createExportWrapper('dynCall_vi', 2);
-var dynCall_ii = Module['dynCall_ii'] = createExportWrapper('dynCall_ii', 2);
-var dynCall_vii = Module['dynCall_vii'] = createExportWrapper('dynCall_vii', 3);
-var dynCall_iii = Module['dynCall_iii'] = createExportWrapper('dynCall_iii', 3);
-var dynCall_viiii = Module['dynCall_viiii'] = createExportWrapper('dynCall_viiii', 5);
-var dynCall_iiii = Module['dynCall_iiii'] = createExportWrapper('dynCall_iiii', 4);
-var dynCall_v = Module['dynCall_v'] = createExportWrapper('dynCall_v', 1);
-var dynCall_iiiii = Module['dynCall_iiiii'] = createExportWrapper('dynCall_iiiii', 5);
-var dynCall_i = Module['dynCall_i'] = createExportWrapper('dynCall_i', 1);
-var dynCall_viiiiii = Module['dynCall_viiiiii'] = createExportWrapper('dynCall_viiiiii', 7);
-var dynCall_iiiiii = Module['dynCall_iiiiii'] = createExportWrapper('dynCall_iiiiii', 6);
-var dynCall_viiiii = Module['dynCall_viiiii'] = createExportWrapper('dynCall_viiiii', 6);
-var dynCall_jji = Module['dynCall_jji'] = createExportWrapper('dynCall_jji', 4);
-var dynCall_viiiiiii = Module['dynCall_viiiiiii'] = createExportWrapper('dynCall_viiiiiii', 8);
-var dynCall_viiiiiiii = Module['dynCall_viiiiiiii'] = createExportWrapper('dynCall_viiiiiiii', 9);
-var dynCall_iiiiiiii = Module['dynCall_iiiiiiii'] = createExportWrapper('dynCall_iiiiiiii', 8);
-var dynCall_viiiiiiiiii = Module['dynCall_viiiiiiiiii'] = createExportWrapper('dynCall_viiiiiiiiii', 11);
-var dynCall_iiiiiiiiii = Module['dynCall_iiiiiiiiii'] = createExportWrapper('dynCall_iiiiiiiiii', 10);
-var dynCall_iiiiiiiiiiiiiiff = Module['dynCall_iiiiiiiiiiiiiiff'] = createExportWrapper('dynCall_iiiiiiiiiiiiiiff', 16);
-var dynCall_iiiiiiiii = Module['dynCall_iiiiiiiii'] = createExportWrapper('dynCall_iiiiiiiii', 9);
-var dynCall_viiiiiiiiiii = Module['dynCall_viiiiiiiiiii'] = createExportWrapper('dynCall_viiiiiiiiiii', 12);
-var dynCall_iiiiiidiiff = Module['dynCall_iiiiiidiiff'] = createExportWrapper('dynCall_iiiiiidiiff', 11);
-var dynCall_jiji = Module['dynCall_jiji'] = createExportWrapper('dynCall_jiji', 5);
-var dynCall_ji = Module['dynCall_ji'] = createExportWrapper('dynCall_ji', 2);
-var dynCall_vffff = Module['dynCall_vffff'] = createExportWrapper('dynCall_vffff', 5);
-var dynCall_vf = Module['dynCall_vf'] = createExportWrapper('dynCall_vf', 2);
-var dynCall_viiiiiiiii = Module['dynCall_viiiiiiiii'] = createExportWrapper('dynCall_viiiiiiiii', 10);
-var dynCall_vff = Module['dynCall_vff'] = createExportWrapper('dynCall_vff', 3);
-var dynCall_vfi = Module['dynCall_vfi'] = createExportWrapper('dynCall_vfi', 3);
-var dynCall_viif = Module['dynCall_viif'] = createExportWrapper('dynCall_viif', 4);
-var dynCall_vif = Module['dynCall_vif'] = createExportWrapper('dynCall_vif', 3);
-var dynCall_viff = Module['dynCall_viff'] = createExportWrapper('dynCall_viff', 4);
-var dynCall_vifff = Module['dynCall_vifff'] = createExportWrapper('dynCall_vifff', 5);
-var dynCall_viffff = Module['dynCall_viffff'] = createExportWrapper('dynCall_viffff', 6);
-var dynCall_vfff = Module['dynCall_vfff'] = createExportWrapper('dynCall_vfff', 4);
-var dynCall_iidiiii = Module['dynCall_iidiiii'] = createExportWrapper('dynCall_iidiiii', 7);
-var _asyncify_start_unwind = createExportWrapper('asyncify_start_unwind', 1);
-var _asyncify_stop_unwind = createExportWrapper('asyncify_stop_unwind', 0);
-var _asyncify_start_rewind = createExportWrapper('asyncify_start_rewind', 1);
-var _asyncify_stop_rewind = createExportWrapper('asyncify_stop_rewind', 0);
-
+var stackSave = createExportWrapper('stackSave');
+var stackRestore = createExportWrapper('stackRestore');
+var stackAlloc = createExportWrapper('stackAlloc');
+var dynCall_viii = Module['dynCall_viii'] = createExportWrapper('dynCall_viii');
+var dynCall_vi = Module['dynCall_vi'] = createExportWrapper('dynCall_vi');
+var dynCall_ii = Module['dynCall_ii'] = createExportWrapper('dynCall_ii');
+var dynCall_vii = Module['dynCall_vii'] = createExportWrapper('dynCall_vii');
+var dynCall_iii = Module['dynCall_iii'] = createExportWrapper('dynCall_iii');
+var dynCall_viiii = Module['dynCall_viiii'] = createExportWrapper('dynCall_viiii');
+var dynCall_iiii = Module['dynCall_iiii'] = createExportWrapper('dynCall_iiii');
+var dynCall_v = Module['dynCall_v'] = createExportWrapper('dynCall_v');
+var dynCall_iiiii = Module['dynCall_iiiii'] = createExportWrapper('dynCall_iiiii');
+var dynCall_i = Module['dynCall_i'] = createExportWrapper('dynCall_i');
+var dynCall_viiiiii = Module['dynCall_viiiiii'] = createExportWrapper('dynCall_viiiiii');
+var dynCall_iiiiii = Module['dynCall_iiiiii'] = createExportWrapper('dynCall_iiiiii');
+var dynCall_viiiii = Module['dynCall_viiiii'] = createExportWrapper('dynCall_viiiii');
+var dynCall_jji = Module['dynCall_jji'] = createExportWrapper('dynCall_jji');
+var dynCall_viiiiiii = Module['dynCall_viiiiiii'] = createExportWrapper('dynCall_viiiiiii');
+var dynCall_viiiiiiii = Module['dynCall_viiiiiiii'] = createExportWrapper('dynCall_viiiiiiii');
+var dynCall_iiiiiiii = Module['dynCall_iiiiiiii'] = createExportWrapper('dynCall_iiiiiiii');
+var dynCall_viiiiiiiiii = Module['dynCall_viiiiiiiiii'] = createExportWrapper('dynCall_viiiiiiiiii');
+var dynCall_di = Module['dynCall_di'] = createExportWrapper('dynCall_di');
+var dynCall_vid = Module['dynCall_vid'] = createExportWrapper('dynCall_vid');
+var dynCall_dd = Module['dynCall_dd'] = createExportWrapper('dynCall_dd');
+var dynCall_iiiiiiiiii = Module['dynCall_iiiiiiiiii'] = createExportWrapper('dynCall_iiiiiiiiii');
+var dynCall_iiiiiiiiiiiiiiff = Module['dynCall_iiiiiiiiiiiiiiff'] = createExportWrapper('dynCall_iiiiiiiiiiiiiiff');
+var dynCall_iiiiiiiii = Module['dynCall_iiiiiiiii'] = createExportWrapper('dynCall_iiiiiiiii');
+var dynCall_viiiiiiiiiii = Module['dynCall_viiiiiiiiiii'] = createExportWrapper('dynCall_viiiiiiiiiii');
+var dynCall_iiiiiidiiff = Module['dynCall_iiiiiidiiff'] = createExportWrapper('dynCall_iiiiiidiiff');
+var dynCall_jiji = Module['dynCall_jiji'] = createExportWrapper('dynCall_jiji');
+var dynCall_ji = Module['dynCall_ji'] = createExportWrapper('dynCall_ji');
+var dynCall_vffff = Module['dynCall_vffff'] = createExportWrapper('dynCall_vffff');
+var dynCall_vf = Module['dynCall_vf'] = createExportWrapper('dynCall_vf');
+var dynCall_viiiiiiiii = Module['dynCall_viiiiiiiii'] = createExportWrapper('dynCall_viiiiiiiii');
+var dynCall_vff = Module['dynCall_vff'] = createExportWrapper('dynCall_vff');
+var dynCall_vfi = Module['dynCall_vfi'] = createExportWrapper('dynCall_vfi');
+var dynCall_viif = Module['dynCall_viif'] = createExportWrapper('dynCall_viif');
+var dynCall_vif = Module['dynCall_vif'] = createExportWrapper('dynCall_vif');
+var dynCall_viff = Module['dynCall_viff'] = createExportWrapper('dynCall_viff');
+var dynCall_vifff = Module['dynCall_vifff'] = createExportWrapper('dynCall_vifff');
+var dynCall_viffff = Module['dynCall_viffff'] = createExportWrapper('dynCall_viffff');
+var dynCall_iidiiii = Module['dynCall_iidiiii'] = createExportWrapper('dynCall_iidiiii');
+var _asyncify_start_unwind = createExportWrapper('asyncify_start_unwind');
+var _asyncify_stop_unwind = createExportWrapper('asyncify_stop_unwind');
+var _asyncify_start_rewind = createExportWrapper('asyncify_start_rewind');
+var _asyncify_stop_rewind = createExportWrapper('asyncify_stop_rewind');
+var ___start_em_js = Module['___start_em_js'] = 67672041;
+var ___stop_em_js = Module['___stop_em_js'] = 67677661;
 function invoke_ii(index,a1) {
   var sp = stackSave();
   try {
@@ -11090,6 +11164,9 @@ function invoke_iiiiii(index,a1,a2,a3,a4,a5) {
 
 Module['addRunDependency'] = addRunDependency;
 Module['removeRunDependency'] = removeRunDependency;
+Module['FS_createPath'] = FS.createPath;
+Module['FS_createLazyFile'] = FS.createLazyFile;
+Module['FS_createDevice'] = FS.createDevice;
 Module['ccall'] = ccall;
 Module['cwrap'] = cwrap;
 Module['setValue'] = setValue;
@@ -11099,13 +11176,10 @@ Module['PATH_FS'] = PATH_FS;
 Module['UTF8ToString'] = UTF8ToString;
 Module['stringToUTF8'] = stringToUTF8;
 Module['lengthBytesUTF8'] = lengthBytesUTF8;
-Module['FS_createPreloadedFile'] = FS_createPreloadedFile;
-Module['FS_unlink'] = FS_unlink;
-Module['FS_createPath'] = FS_createPath;
-Module['FS_createDevice'] = FS_createDevice;
+Module['FS_createPreloadedFile'] = FS.createPreloadedFile;
 Module['FS'] = FS;
-Module['FS_createDataFile'] = FS_createDataFile;
-Module['FS_createLazyFile'] = FS_createLazyFile;
+Module['FS_createDataFile'] = FS.createDataFile;
+Module['FS_unlink'] = FS.unlink;
 var missingLibrarySymbols = [
   'writeI53ToI64Clamped',
   'writeI53ToI64Signaling',
@@ -11113,17 +11187,24 @@ var missingLibrarySymbols = [
   'writeI53ToU64Signaling',
   'convertI32PairToI53',
   'convertU32PairToI53',
-  'getTempRet0',
-  'setTempRet0',
+  'isLeapYear',
+  'ydayFromDate',
+  'arraySum',
+  'addDays',
   'inetPton4',
   'inetNtop4',
   'inetPton6',
   'inetNtop6',
   'readSockaddr',
   'writeSockaddr',
+  'getHostByName',
+  'getCallstack',
   'emscriptenLog',
+  'convertPCtoSourceLocation',
+  'jstoi_s',
   'getDynCaller',
   'asmjsMangle',
+  'handleAllocatorInit',
   'HandleAllocator',
   'getNativeTypeSize',
   'STACK_SIZE',
@@ -11166,8 +11247,7 @@ var missingLibrarySymbols = [
   'battery',
   'registerBatteryEventCallback',
   'jsStackTrace',
-  'getCallstack',
-  'convertPCtoSourceLocation',
+  'stackTrace',
   'checkWasiClock',
   'wasiRightsToMuslOFlags',
   'wasiOFlagsToMuslOFlags',
@@ -11181,11 +11261,6 @@ var missingLibrarySymbols = [
   'makePromiseCallback',
   'ExceptionInfo',
   'findMatchingCatch',
-  'Browser_asyncPrepareDataCounter',
-  'isLeapYear',
-  'ydayFromDate',
-  'arraySum',
-  'addDays',
   'getSocketFromFD',
   'getSocketAddress',
   'FS_mkdirTree',
@@ -11197,9 +11272,6 @@ var missingLibrarySymbols = [
   'allocate',
   'writeStringToMemory',
   'writeAsciiToMemory',
-  'setErrNo',
-  'demangle',
-  'stackTrace',
 ];
 missingLibrarySymbols.forEach(missingLibrarySymbol)
 
@@ -11210,29 +11282,39 @@ var unexportedSymbols = [
   'addOnPreMain',
   'addOnExit',
   'addOnPostRun',
+  'FS_createFolder',
+  'FS_createLink',
+  'FS_readFile',
   'out',
   'err',
   'callMain',
   'abort',
   'wasmMemory',
   'wasmExports',
+  'stackAlloc',
+  'stackSave',
+  'stackRestore',
+  'getTempRet0',
+  'setTempRet0',
   'writeStackCookie',
   'checkStackCookie',
   'writeI53ToI64',
   'readI53FromI64',
   'readI53FromU64',
   'convertI32PairToI53Checked',
-  'stackSave',
-  'stackRestore',
-  'stackAlloc',
   'ptrToString',
   'zeroMemory',
   'exitJS',
   'getHeapMax',
   'growMemory',
   'ENV',
+  'MONTH_DAYS_REGULAR',
+  'MONTH_DAYS_LEAP',
+  'MONTH_DAYS_REGULAR_CUMULATIVE',
+  'MONTH_DAYS_LEAP_CUMULATIVE',
   'ERRNO_CODES',
-  'strError',
+  'ERRNO_MESSAGES',
+  'setErrNo',
   'DNS',
   'Protocols',
   'Sockets',
@@ -11240,12 +11322,12 @@ var unexportedSymbols = [
   'randomFill',
   'timers',
   'warnOnce',
+  'UNWIND_CACHE',
   'readEmAsmArgsArray',
   'readEmAsmArgs',
   'runEmAsmFunction',
   'runMainThreadEmAsm',
   'jstoi_q',
-  'jstoi_s',
   'getExecutableName',
   'listenOnce',
   'autoResumeAudioContext',
@@ -11307,7 +11389,8 @@ var unexportedSymbols = [
   'registerBeforeUnloadEventCallback',
   'setCanvasElementSize',
   'getCanvasElementSize',
-  'UNWIND_CACHE',
+  'demangle',
+  'demangleAll',
   'ExitStatus',
   'getEnvStrings',
   'doReadv',
@@ -11319,19 +11402,13 @@ var unexportedSymbols = [
   'exceptionCaught',
   'Browser',
   'setMainLoop',
-  'getPreloadedImageData__data',
   'wget',
-  'MONTH_DAYS_REGULAR',
-  'MONTH_DAYS_LEAP',
-  'MONTH_DAYS_REGULAR_CUMULATIVE',
-  'MONTH_DAYS_LEAP_CUMULATIVE',
   'SYSCALLS',
   'preloadPlugins',
   'FS_modeStringToFlags',
   'FS_getMode',
   'FS_stdin_getChar_buffer',
   'FS_stdin_getChar',
-  'FS_readFile',
   'MEMFS',
   'TTY',
   'PIPEFS',
@@ -11340,25 +11417,24 @@ var unexportedSymbols = [
   'miniTempWebGLFloatBuffers',
   'miniTempWebGLIntBuffers',
   'heapObjectForWebGLType',
-  'toTypedArrayIndex',
+  'heapAccessShiftForWebGLHeap',
   'webgl_enable_ANGLE_instanced_arrays',
   'webgl_enable_OES_vertex_array_object',
   'webgl_enable_WEBGL_draw_buffers',
   'webgl_enable_WEBGL_multi_draw',
-  'webgl_enable_EXT_polygon_offset_clamp',
-  'webgl_enable_EXT_clip_control',
-  'webgl_enable_WEBGL_polygon_mode',
   'GL',
   'emscriptenWebGLGet',
   'computeUnpackAlignedImageSize',
   'colorChannelsInGlTextureFormat',
   'emscriptenWebGLGetTexPixelData',
+  '__glGenObject',
   'emscriptenWebGLGetUniform',
   'webglGetUniformLocation',
   'webglPrepareUniformLocationsBeforeFirstUse',
   'webglGetLeftBracePos',
   'emscriptenWebGLGetVertexAttrib',
   '__glGetActiveAttribOrUniform',
+  'emscripten_webgl_power_preferences',
   'AL',
   'GLUT',
   'EGL',
@@ -11369,8 +11445,6 @@ var unexportedSymbols = [
   'Fibers',
   'allocateUTF8',
   'allocateUTF8OnStack',
-  'print',
-  'printErr',
   'IDBFS',
 ];
 unexportedSymbols.forEach(unexportedRuntimeSymbol);
@@ -11421,7 +11495,7 @@ function run() {
     initRuntime();
 
     readyPromiseResolve(Module);
-    Module['onRuntimeInitialized']?.();
+    if (Module['onRuntimeInitialized']) Module['onRuntimeInitialized']();
 
     assert(!Module['_main'], 'compiled without a main, but one is present. if you added it from JS, use Module["onRuntimeInitialized"]');
 
@@ -11430,8 +11504,10 @@ function run() {
 
   if (Module['setStatus']) {
     Module['setStatus']('Running...');
-    setTimeout(() => {
-      setTimeout(() => Module['setStatus'](''), 1);
+    setTimeout(function() {
+      setTimeout(function() {
+        Module['setStatus']('');
+      }, 1);
       doRun();
     }, 1);
   } else
@@ -11462,13 +11538,13 @@ function checkUnflushedContent() {
   try { // it doesn't matter if it fails
     _fflush(0);
     // also flush in the JS FS layer
-    ['stdout', 'stderr'].forEach((name) => {
+    ['stdout', 'stderr'].forEach(function(name) {
       var info = FS.analyzePath('/dev/' + name);
       if (!info) return;
       var stream = info.object;
       var rdev = stream.rdev;
       var tty = TTY.ttys[rdev];
-      if (tty?.output?.length) {
+      if (tty && tty.output && tty.output.length) {
         has = true;
       }
     });
@@ -11489,42 +11565,16 @@ if (Module['preInit']) {
 
 run();
 
-// end include: postamble.js
-
-// include: postamble_modularize.js
-// In MODULARIZE mode we wrap the generated code in a factory function
-// and return either the Module itself, or a promise of the module.
-//
-// We assign to the `moduleRtn` global here and configure closure to see
-// this as and extern so it won't get minified.
-
-moduleRtn = readyPromise;
-
-// Assertion for attempting to access module properties on the incoming
-// moduleArg.  In the past we used this object as the prototype of the module
-// and assigned properties to it, but now we return a distinct object.  This
-// keeps the instance private until it is ready (i.e the promise has been
-// resolved).
-for (const prop of Object.keys(Module)) {
-  if (!(prop in moduleArg)) {
-    Object.defineProperty(moduleArg, prop, {
-      configurable: true,
-      get() {
-        abort(`Access to module property ('${prop}') is no longer possible via the module constructor argument; Instead, use the result of the module constructor.`)
-      }
-    });
-  }
-}
-// end include: postamble_modularize.js
 
+// end include: postamble.js
 
 
-  return moduleRtn;
+  return moduleArg.ready
 }
 );
 })();
-export default _createMicroPythonModule;
-/*
+;
+export default _createMicroPythonModule;/*
  * This file is part of the MicroPython project, http://micropython.org/
  *
  * The MIT License (MIT)
diff --git a/www/run/micropython.wasm b/www/run/micropython.wasm
index a30350226..e856ea813 100755
Binary files a/www/run/micropython.wasm and b/www/run/micropython.wasm differ